* elf64-ppc.c: Define more insns used in plt call stubs.
	(ppc64_elf_brtaken_reloc): Assume isa version 2 or above.
	(ppc64_elf_relocate_section): Likewise.
	(enum ppc_stub_type): Add ppc_stub_plt_call_r2save.
	(struct ppc_link_hash_table): Increase size of stub_count array.
	Add plt_stub_align and plt_thread_safe.
	(ALWAYS_USE_FAKE_DEP, ALWAYS_EMIT_R2SAVE): Define.
	(plt_stub_size, plt_stub_pad): New functions.
	(build_plt_stub): Emit barriers for power7 thread safety.  Don't
	emit needless save of r2.
	(build_tls_get_addr_stub): Adjust params.
	(ppc_build_one_stub): Handle ppc_stub_plt_call_r2save and aligning
	plt stubs.  Adjust build_*plt_stub calls.
	(ppc_size_one_stub): Similarly.
	(ppc64_elf_size_stubs): Accept plt_thread_safe and plt_stub_align
	params.  Choose default for plt_thread_safe based on existence of
	calls to thread creation functions.  Modify plt_call to
	plt_call_r2save when no tocsave reloc found.  Align tail of stub
	sections.
	(ppc64_elf_build_stubs): Align tail of stub sections.  Adjust
	output of stub statistics.
	(ppc64_elf_relocate_section): Handle ppc_stub_plt_call_r2save.
	* elf64-ppc.h (ppc64_elf_size_stubs): Update prototype.
ld/
	* emultempl/ppc64elf.em (PARSE_AND_LIST_PROLOGUE,
	PARSE_AND_LIST_LONGOPTS, PARSE_AND_LIST_OPTIONS,
	PARSE_AND_LIST_ARGS_CASES): Handle --{no-,}plt-thread-safe and
	--{no-,}plt-align.
	(plt_thread_safe, plt_stub_align): New vars.
	(gld${EMULATION_NAME}_after_allocation): Pass them to
	ppc64_elf_size_stubs.  Align stub sections according to plt_stub_align.
	* ld.texinfo: Document new command line options, and an old
	undocumented option.
This commit is contained in:
Alan Modra 2012-01-11 09:28:45 +00:00
parent 313a658140
commit 794e51c094
6 changed files with 414 additions and 63 deletions

View File

@ -1,3 +1,29 @@
2012-01-11 Alan Modra <amodra@gmail.com>
* elf64-ppc.c: Define more insns used in plt call stubs.
(ppc64_elf_brtaken_reloc): Assume isa version 2 or above.
(ppc64_elf_relocate_section): Likewise.
(enum ppc_stub_type): Add ppc_stub_plt_call_r2save.
(struct ppc_link_hash_table): Increase size of stub_count array.
Add plt_stub_align and plt_thread_safe.
(ALWAYS_USE_FAKE_DEP, ALWAYS_EMIT_R2SAVE): Define.
(plt_stub_size, plt_stub_pad): New functions.
(build_plt_stub): Emit barriers for power7 thread safety. Don't
emit needless save of r2.
(build_tls_get_addr_stub): Adjust params.
(ppc_build_one_stub): Handle ppc_stub_plt_call_r2save and aligning
plt stubs. Adjust build_*plt_stub calls.
(ppc_size_one_stub): Similarly.
(ppc64_elf_size_stubs): Accept plt_thread_safe and plt_stub_align
params. Choose default for plt_thread_safe based on existence of
calls to thread creation functions. Modify plt_call to
plt_call_r2save when no tocsave reloc found. Align tail of stub
sections.
(ppc64_elf_build_stubs): Align tail of stub sections. Adjust
output of stub statistics.
(ppc64_elf_relocate_section): Handle ppc_stub_plt_call_r2save.
* elf64-ppc.h (ppc64_elf_size_stubs): Update prototype.
2012-01-10 H.J. Lu <hongjiu.lu@intel.com>
PR ld/13581

View File

@ -1,6 +1,6 @@
/* PowerPC64-specific support for 64-bit ELF.
Copyright 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008,
2009, 2010, 2011 Free Software Foundation, Inc.
2009, 2010, 2011, 2012 Free Software Foundation, Inc.
Written by Linus Nordberg, Swox AB <info@swox.com>,
based on elf32-ppc.c by Ian Lance Taylor.
Largely rewritten by Alan Modra.
@ -152,6 +152,13 @@ static bfd_vma opd_entry_value
#define ADDIS_R2_R2 0x3c420000 /* addis %r2,%r2,off@ha */
#define ADDI_R2_R2 0x38420000 /* addi %r2,%r2,off@l */
#define XOR_R11_R11_R11 0x7d6b5a78 /* xor %r11,%r11,%r11 */
#define ADD_R12_R12_R11 0x7d8c5a14 /* add %r12,%r12,%r11 */
#define ADD_R2_R2_R11 0x7c425a14 /* add %r2,%r2,%r11 */
#define CMPLDI_R2_0 0x28220000 /* cmpldi %r2,0 */
#define BNECTR 0x4ca20420 /* bnectr+ */
#define BNECTR_P4 0x4ce20420 /* bnectr+ */
#define LD_R11_0R2 0xe9620000 /* ld %r11,xxx+0(%r2) */
#define LD_R2_0R2 0xe8420000 /* ld %r2,xxx+0(%r2) */
@ -2356,8 +2363,8 @@ ppc64_elf_brtaken_reloc (bfd *abfd, arelent *reloc_entry, asymbol *symbol,
long insn;
enum elf_ppc64_reloc_type r_type;
bfd_size_type octets;
/* Disabled until we sort out how ld should choose 'y' vs 'at'. */
bfd_boolean is_power4 = FALSE;
/* Assume 'at' branch hints. */
bfd_boolean is_isa_v2 = TRUE;
/* If this is a relocatable link (output_bfd test tells us), just
call the generic function. Any adjustment will be done at final
@ -2374,7 +2381,7 @@ ppc64_elf_brtaken_reloc (bfd *abfd, arelent *reloc_entry, asymbol *symbol,
|| r_type == R_PPC64_REL14_BRTAKEN)
insn |= 0x01 << 21; /* 'y' or 't' bit, lowest bit of BO field. */
if (is_power4)
if (is_isa_v2)
{
/* Set 'a' bit. This is 0b00010 in BO field for branch
on CR(BI) insns (BO == 001at or 011at), and 0b01000
@ -3584,7 +3591,8 @@ enum ppc_stub_type {
ppc_stub_long_branch_r2off,
ppc_stub_plt_branch,
ppc_stub_plt_branch_r2off,
ppc_stub_plt_call
ppc_stub_plt_call,
ppc_stub_plt_call_r2save
};
struct ppc_stub_hash_entry {
@ -3752,14 +3760,20 @@ struct ppc_link_hash_table
bfd_size_type got_reli_size;
/* Statistics. */
unsigned long stub_count[ppc_stub_plt_call];
unsigned long stub_count[ppc_stub_plt_call_r2save];
/* Number of stubs against global syms. */
unsigned long stub_globals;
/* Alignment of PLT call stubs. */
unsigned int plt_stub_align:4;
/* Set if PLT call stubs should load r11. */
unsigned int plt_static_chain:1;
/* Set if PLT call stubs need a read-read barrier. */
unsigned int plt_thread_safe:1;
/* Set if we should emit symbols for stubs. */
unsigned int emit_stub_syms:1;
@ -9471,21 +9485,126 @@ ppc_type_of_stub (asection *input_sec,
return ppc_stub_none;
}
/* Build a .plt call stub. */
/* With power7 weakly ordered memory model, it is possible for ld.so
to update a plt entry in one thread and have another thread see a
stale zero toc entry. To avoid this we need some sort of acquire
barrier in the call stub. One solution is to make the load of the
toc word seem to appear to depend on the load of the function entry
word. Another solution is to test for r2 being zero, and branch to
the appropriate glink entry if so.
. fake dep barrier compare
. ld 11,xxx(2) ld 11,xxx(2)
. mtctr 11 mtctr 11
. xor 11,11,11 ld 2,xxx+8(2)
. add 2,2,11 cmpldi 2,0
. ld 2,xxx+8(2) bnectr+
. bctr b <glink_entry>
The solution involving the compare turns out to be faster, so
that's what we use unless the branch won't reach. */
#define ALWAYS_USE_FAKE_DEP 0
#define ALWAYS_EMIT_R2SAVE 0
static inline bfd_byte *
build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
bfd_boolean plt_static_chain)
{
#define PPC_LO(v) ((v) & 0xffff)
#define PPC_HI(v) (((v) >> 16) & 0xffff)
#define PPC_HA(v) PPC_HI ((v) + 0x8000)
static inline unsigned int
plt_stub_size (struct ppc_link_hash_table *htab,
struct ppc_stub_hash_entry *stub_entry,
bfd_vma off)
{
unsigned size = PLT_CALL_STUB_SIZE;
if (!(ALWAYS_EMIT_R2SAVE
|| stub_entry->stub_type == ppc_stub_plt_call_r2save))
size -= 4;
if (!htab->plt_static_chain)
size -= 4;
if (htab->plt_thread_safe)
size += 8;
if (PPC_HA (off) == 0)
size -= 4;
if (PPC_HA (off + 8 + 8 * htab->plt_static_chain) != PPC_HA (off))
size += 4;
if (stub_entry->h != NULL
&& (stub_entry->h == htab->tls_get_addr_fd
|| stub_entry->h == htab->tls_get_addr)
&& !htab->no_tls_get_addr_opt)
size += 13 * 4;
return size;
}
/* If this stub would cross fewer 2**plt_stub_align boundaries if we align,
then return the padding needed to do so. */
static inline unsigned int
plt_stub_pad (struct ppc_link_hash_table *htab,
struct ppc_stub_hash_entry *stub_entry,
bfd_vma plt_off)
{
int stub_align = 1 << htab->plt_stub_align;
unsigned stub_size = plt_stub_size (htab, stub_entry, plt_off);
bfd_vma stub_off = stub_entry->stub_sec->size;
if (((stub_off + stub_size - 1) & -stub_align) - (stub_off & -stub_align)
> (stub_size & -stub_align))
return stub_align - (stub_off & (stub_align - 1));
return 0;
}
/* Build a .plt call stub. */
static inline bfd_byte *
build_plt_stub (struct ppc_link_hash_table *htab,
struct ppc_stub_hash_entry *stub_entry,
bfd_byte *p, bfd_vma offset, Elf_Internal_Rela *r)
{
bfd *obfd = htab->stub_bfd;
bfd_boolean plt_static_chain = htab->plt_static_chain;
bfd_boolean plt_thread_safe = htab->plt_thread_safe;
bfd_boolean use_fake_dep = plt_thread_safe;
bfd_vma cmp_branch_off = 0;
if (!ALWAYS_USE_FAKE_DEP
&& plt_thread_safe
&& !(stub_entry->h != NULL
&& (stub_entry->h == htab->tls_get_addr_fd
|| stub_entry->h == htab->tls_get_addr)
&& !htab->no_tls_get_addr_opt))
{
bfd_vma pltoff = stub_entry->plt_ent->plt.offset & ~1;
bfd_vma pltindex = (pltoff - PLT_INITIAL_ENTRY_SIZE) / PLT_ENTRY_SIZE;
bfd_vma glinkoff = GLINK_CALL_STUB_SIZE + pltindex * 8;
bfd_vma to, from;
if (pltindex > 32767)
glinkoff += (pltindex - 32767) * 4;
to = (glinkoff
+ htab->glink->output_offset
+ htab->glink->output_section->vma);
from = (p - stub_entry->stub_sec->contents
+ 4 * (ALWAYS_EMIT_R2SAVE
|| stub_entry->stub_type == ppc_stub_plt_call_r2save)
+ 4 * (PPC_HA (offset) != 0)
+ 4 * (PPC_HA (offset + 8 + 8 * plt_static_chain)
!= PPC_HA (offset))
+ 4 * (plt_static_chain != 0)
+ 20
+ stub_entry->stub_sec->output_offset
+ stub_entry->stub_sec->output_section->vma);
cmp_branch_off = to - from;
use_fake_dep = cmp_branch_off + (1 << 25) >= (1 << 26);
}
if (PPC_HA (offset) != 0)
{
if (r != NULL)
{
r[0].r_offset += 4;
if (ALWAYS_EMIT_R2SAVE
|| stub_entry->stub_type == ppc_stub_plt_call_r2save)
r[0].r_offset += 4;
r[0].r_info = ELF64_R_INFO (0, R_PPC64_TOC16_HA);
r[1].r_offset = r[0].r_offset + 4;
r[1].r_info = ELF64_R_INFO (0, R_PPC64_TOC16_LO_DS);
@ -9498,7 +9617,7 @@ build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
}
else
{
r[2].r_offset = r[1].r_offset + 8;
r[2].r_offset = r[1].r_offset + 8 + 8 * use_fake_dep;
r[2].r_info = ELF64_R_INFO (0, R_PPC64_TOC16_LO_DS);
r[2].r_addend = r[0].r_addend + 8;
if (plt_static_chain)
@ -9509,7 +9628,9 @@ build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
}
}
}
bfd_put_32 (obfd, STD_R2_40R1, p), p += 4;
if (ALWAYS_EMIT_R2SAVE
|| stub_entry->stub_type == ppc_stub_plt_call_r2save)
bfd_put_32 (obfd, STD_R2_40R1, p), p += 4;
bfd_put_32 (obfd, ADDIS_R12_R2 | PPC_HA (offset), p), p += 4;
bfd_put_32 (obfd, LD_R11_0R12 | PPC_LO (offset), p), p += 4;
if (PPC_HA (offset + 8 + 8 * plt_static_chain) != PPC_HA (offset))
@ -9518,16 +9639,22 @@ build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
offset = 0;
}
bfd_put_32 (obfd, MTCTR_R11, p), p += 4;
if (use_fake_dep)
{
bfd_put_32 (obfd, XOR_R11_R11_R11, p), p += 4;
bfd_put_32 (obfd, ADD_R12_R12_R11, p), p += 4;
}
bfd_put_32 (obfd, LD_R2_0R12 | PPC_LO (offset + 8), p), p += 4;
if (plt_static_chain)
bfd_put_32 (obfd, LD_R11_0R12 | PPC_LO (offset + 16), p), p += 4;
bfd_put_32 (obfd, BCTR, p), p += 4;
}
else
{
if (r != NULL)
{
r[0].r_offset += 4;
if (ALWAYS_EMIT_R2SAVE
|| stub_entry->stub_type == ppc_stub_plt_call_r2save)
r[0].r_offset += 4;
r[0].r_info = ELF64_R_INFO (0, R_PPC64_TOC16_DS);
if (PPC_HA (offset + 8 + 8 * plt_static_chain) != PPC_HA (offset))
{
@ -9537,7 +9664,7 @@ build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
}
else
{
r[1].r_offset = r[0].r_offset + 8;
r[1].r_offset = r[0].r_offset + 8 + 8 * use_fake_dep;
r[1].r_info = ELF64_R_INFO (0, R_PPC64_TOC16_DS);
r[1].r_addend = r[0].r_addend + 8 + 8 * plt_static_chain;
if (plt_static_chain)
@ -9548,7 +9675,9 @@ build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
}
}
}
bfd_put_32 (obfd, STD_R2_40R1, p), p += 4;
if (ALWAYS_EMIT_R2SAVE
|| stub_entry->stub_type == ppc_stub_plt_call_r2save)
bfd_put_32 (obfd, STD_R2_40R1, p), p += 4;
bfd_put_32 (obfd, LD_R11_0R2 | PPC_LO (offset), p), p += 4;
if (PPC_HA (offset + 8 + 8 * plt_static_chain) != PPC_HA (offset))
{
@ -9556,11 +9685,23 @@ build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
offset = 0;
}
bfd_put_32 (obfd, MTCTR_R11, p), p += 4;
if (use_fake_dep)
{
bfd_put_32 (obfd, XOR_R11_R11_R11, p), p += 4;
bfd_put_32 (obfd, ADD_R2_R2_R11, p), p += 4;
}
if (plt_static_chain)
bfd_put_32 (obfd, LD_R11_0R2 | PPC_LO (offset + 16), p), p += 4;
bfd_put_32 (obfd, LD_R2_0R2 | PPC_LO (offset + 8), p), p += 4;
bfd_put_32 (obfd, BCTR, p), p += 4;
}
if (plt_thread_safe && !use_fake_dep)
{
bfd_put_32 (obfd, CMPLDI_R2_0, p), p += 4;
bfd_put_32 (obfd, BNECTR_P4, p), p += 4;
bfd_put_32 (obfd, B_DOT + cmp_branch_off, p), p += 4;
}
else
bfd_put_32 (obfd, BCTR, p), p += 4;
return p;
}
@ -9581,9 +9722,12 @@ build_plt_stub (bfd *obfd, bfd_byte *p, int offset, Elf_Internal_Rela *r,
#define MTLR_R11 0x7d6803a6
static inline bfd_byte *
build_tls_get_addr_stub (bfd *obfd, bfd_byte *p, int offset,
Elf_Internal_Rela *r, bfd_boolean plt_static_chain)
build_tls_get_addr_stub (struct ppc_link_hash_table *htab,
struct ppc_stub_hash_entry *stub_entry,
bfd_byte *p, bfd_vma offset, Elf_Internal_Rela *r)
{
bfd *obfd = htab->stub_bfd;
bfd_put_32 (obfd, LD_R11_0R3 + 0, p), p += 4;
bfd_put_32 (obfd, LD_R12_0R3 + 8, p), p += 4;
bfd_put_32 (obfd, MR_R0_R3, p), p += 4;
@ -9596,7 +9740,7 @@ build_tls_get_addr_stub (bfd *obfd, bfd_byte *p, int offset,
if (r != NULL)
r[0].r_offset += 9 * 4;
p = build_plt_stub (obfd, p, offset, r, plt_static_chain);
p = build_plt_stub (htab, stub_entry, p, offset, r);
bfd_put_32 (obfd, BCTRL, p - 4);
bfd_put_32 (obfd, LD_R11_0R1 + 32, p), p += 4;
@ -9943,6 +10087,7 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
break;
case ppc_stub_plt_call:
case ppc_stub_plt_call_r2save:
if (stub_entry->h != NULL
&& stub_entry->h->is_func_descriptor
&& stub_entry->h->oh != NULL)
@ -10009,6 +10154,15 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
return FALSE;
}
if (htab->plt_stub_align != 0)
{
unsigned pad = plt_stub_pad (htab, stub_entry, off);
stub_entry->stub_sec->size += pad;
stub_entry->stub_offset = stub_entry->stub_sec->size;
loc += pad;
}
r = NULL;
if (info->emitrelocations)
{
@ -10028,11 +10182,9 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
&& (stub_entry->h == htab->tls_get_addr_fd
|| stub_entry->h == htab->tls_get_addr)
&& !htab->no_tls_get_addr_opt)
p = build_tls_get_addr_stub (htab->stub_bfd, loc, off, r,
htab->plt_static_chain);
p = build_tls_get_addr_stub (htab, stub_entry, loc, off, r);
else
p = build_plt_stub (htab->stub_bfd, loc, off, r,
htab->plt_static_chain);
p = build_plt_stub (htab, stub_entry, loc, off, r);
size = p - loc;
break;
@ -10052,6 +10204,7 @@ ppc_build_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
"long_branch_r2off",
"plt_branch",
"plt_branch_r2off",
"plt_call",
"plt_call" };
len1 = strlen (stub_str[stub_entry->stub_type - 1]);
@ -10102,7 +10255,8 @@ ppc_size_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
if (htab == NULL)
return FALSE;
if (stub_entry->stub_type == ppc_stub_plt_call)
if (stub_entry->stub_type == ppc_stub_plt_call
|| stub_entry->stub_type == ppc_stub_plt_call_r2save)
{
asection *plt;
off = stub_entry->plt_ent->plt.offset & ~(bfd_vma) 1;
@ -10118,18 +10272,9 @@ ppc_size_one_stub (struct bfd_hash_entry *gen_entry, void *in_arg)
- elf_gp (plt->output_section->owner)
- htab->stub_group[stub_entry->id_sec->id].toc_off);
size = PLT_CALL_STUB_SIZE;
if (!htab->plt_static_chain)
size -= 4;
if (PPC_HA (off) == 0)
size -= 4;
if (PPC_HA (off + 8 + 8 * htab->plt_static_chain) != PPC_HA (off))
size += 4;
if (stub_entry->h != NULL
&& (stub_entry->h == htab->tls_get_addr_fd
|| stub_entry->h == htab->tls_get_addr)
&& !htab->no_tls_get_addr_opt)
size += 13 * 4;
size = plt_stub_size (htab, stub_entry, off);
if (htab->plt_stub_align)
size += plt_stub_pad (htab, stub_entry, off);
if (info->emitrelocations)
{
stub_entry->stub_sec->reloc_count
@ -11098,7 +11243,8 @@ maybe_strip_output (struct bfd_link_info *info, asection *isec)
bfd_boolean
ppc64_elf_size_stubs (struct bfd_link_info *info, bfd_signed_vma group_size,
bfd_boolean plt_static_chain)
bfd_boolean plt_static_chain, int plt_thread_safe,
int plt_stub_align)
{
bfd_size_type stub_group_size;
bfd_boolean stubs_always_before_branch;
@ -11108,6 +11254,40 @@ ppc64_elf_size_stubs (struct bfd_link_info *info, bfd_signed_vma group_size,
return FALSE;
htab->plt_static_chain = plt_static_chain;
htab->plt_stub_align = plt_stub_align;
if (plt_thread_safe == -1)
{
const char *const thread_starter[] =
{
"pthread_create",
/* libstdc++ */
"_ZNSt6thread15_M_start_threadESt10shared_ptrINS_10_Impl_baseEE",
/* librt */
"aio_init", "aio_read", "aio_write", "aio_fsync", "lio_listio",
"mq_notify", "create_timer",
/* libanl */
"getaddrinfo_a",
/* libgomp */
"GOMP_parallel_start",
"GOMP_parallel_loop_static_start",
"GOMP_parallel_loop_dynamic_start",
"GOMP_parallel_loop_guided_start",
"GOMP_parallel_loop_runtime_start",
"GOMP_parallel_sections_start",
};
unsigned i;
for (i = 0; i < sizeof (thread_starter)/ sizeof (thread_starter[0]); i++)
{
struct elf_link_hash_entry *h;
h = elf_link_hash_lookup (&htab->elf, thread_starter[i],
FALSE, FALSE, TRUE);
plt_thread_safe = h != NULL && h->ref_regular;
if (plt_thread_safe)
break;
}
}
htab->plt_thread_safe = plt_thread_safe;
stubs_always_before_branch = group_size < 0;
if (group_size < 0)
stub_group_size = -group_size;
@ -11342,10 +11522,14 @@ ppc64_elf_size_stubs (struct bfd_link_info *info, bfd_signed_vma group_size,
if (stub_type == ppc_stub_plt_call
&& irela + 1 < irelaend
&& irela[1].r_offset == irela->r_offset + 4
&& ELF64_R_TYPE (irela[1].r_info) == R_PPC64_TOCSAVE
&& !tocsave_find (htab, INSERT,
&local_syms, irela + 1, input_bfd))
goto error_ret_free_internal;
&& ELF64_R_TYPE (irela[1].r_info) == R_PPC64_TOCSAVE)
{
if (!tocsave_find (htab, INSERT,
&local_syms, irela + 1, input_bfd))
goto error_ret_free_internal;
}
else if (stub_type == ppc_stub_plt_call)
stub_type = ppc_stub_plt_call_r2save;
/* Support for grouping stub sections. */
id_sec = htab->stub_group[section->id].link_sec;
@ -11361,6 +11545,8 @@ ppc64_elf_size_stubs (struct bfd_link_info *info, bfd_signed_vma group_size,
{
/* The proper stub has already been created. */
free (stub_name);
if (stub_type == ppc_stub_plt_call_r2save)
stub_entry->stub_type = stub_type;
continue;
}
@ -11380,7 +11566,8 @@ ppc64_elf_size_stubs (struct bfd_link_info *info, bfd_signed_vma group_size,
}
stub_entry->stub_type = stub_type;
if (stub_type != ppc_stub_plt_call)
if (stub_type != ppc_stub_plt_call
&& stub_type != ppc_stub_plt_call_r2save)
{
stub_entry->target_value = code_value;
stub_entry->target_section = code_sec;
@ -11460,6 +11647,14 @@ ppc64_elf_size_stubs (struct bfd_link_info *info, bfd_signed_vma group_size,
htab->glink_eh_frame->size = size;
}
if (htab->plt_stub_align != 0)
for (stub_sec = htab->stub_bfd->sections;
stub_sec != NULL;
stub_sec = stub_sec->next)
if ((stub_sec->flags & SEC_LINKER_CREATED) == 0)
stub_sec->size = ((stub_sec->size + (1 << htab->plt_stub_align) - 1)
& (-1 << htab->plt_stub_align));
for (stub_sec = htab->stub_bfd->sections;
stub_sec != NULL;
stub_sec = stub_sec->next)
@ -11785,6 +11980,14 @@ ppc64_elf_build_stubs (bfd_boolean emit_stub_syms,
if (htab->relbrlt != NULL)
htab->relbrlt->reloc_count = 0;
if (htab->plt_stub_align != 0)
for (stub_sec = htab->stub_bfd->sections;
stub_sec != NULL;
stub_sec = stub_sec->next)
if ((stub_sec->flags & SEC_LINKER_CREATED) == 0)
stub_sec->size = ((stub_sec->size + (1 << htab->plt_stub_align) - 1)
& (-1 << htab->plt_stub_align));
for (stub_sec = htab->stub_bfd->sections;
stub_sec != NULL;
stub_sec = stub_sec->next)
@ -11818,14 +12021,16 @@ ppc64_elf_build_stubs (bfd_boolean emit_stub_syms,
" toc adjust %lu\n"
" long branch %lu\n"
" long toc adj %lu\n"
" plt call %lu"),
" plt call %lu\n"
" plt call toc %lu"),
stub_sec_count,
stub_sec_count == 1 ? "" : "s",
htab->stub_count[ppc_stub_long_branch - 1],
htab->stub_count[ppc_stub_long_branch_r2off - 1],
htab->stub_count[ppc_stub_plt_branch - 1],
htab->stub_count[ppc_stub_plt_branch_r2off - 1],
htab->stub_count[ppc_stub_plt_call - 1]);
htab->stub_count[ppc_stub_plt_call - 1],
htab->stub_count[ppc_stub_plt_call_r2save - 1]);
}
return TRUE;
}
@ -11925,8 +12130,8 @@ ppc64_elf_relocate_section (bfd *output_bfd,
bfd_vma TOCstart;
bfd_boolean ret = TRUE;
bfd_boolean is_opd;
/* Disabled until we sort out how ld should choose 'y' vs 'at'. */
bfd_boolean is_power4 = FALSE;
/* Assume 'at' branch hints. */
bfd_boolean is_isa_v2 = TRUE;
bfd_vma d_offset = (bfd_big_endian (output_bfd) ? 2 : 0);
/* Initialize howto table if needed. */
@ -12558,6 +12763,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
stub_entry = ppc_get_stub_entry (input_section, sec, fdh, rel, htab);
if (stub_entry != NULL
&& (stub_entry->stub_type == ppc_stub_plt_call
|| stub_entry->stub_type == ppc_stub_plt_call_r2save
|| stub_entry->stub_type == ppc_stub_plt_branch_r2off
|| stub_entry->stub_type == ppc_stub_long_branch_r2off))
{
@ -12586,7 +12792,8 @@ ppc64_elf_relocate_section (bfd *output_bfd,
if (!can_plt_call)
{
if (stub_entry->stub_type == ppc_stub_plt_call)
if (stub_entry->stub_type == ppc_stub_plt_call
|| stub_entry->stub_type == ppc_stub_plt_call_r2save)
{
/* If this is a plain branch rather than a branch
and link, don't require a nop. However, don't
@ -12633,7 +12840,8 @@ ppc64_elf_relocate_section (bfd *output_bfd,
}
if (can_plt_call
&& stub_entry->stub_type == ppc_stub_plt_call)
&& (stub_entry->stub_type == ppc_stub_plt_call
|| stub_entry->stub_type == ppc_stub_plt_call_r2save))
unresolved_reloc = FALSE;
}
@ -12679,7 +12887,10 @@ ppc64_elf_relocate_section (bfd *output_bfd,
+ stub_entry->stub_sec->output_section->vma);
addend = 0;
if (stub_entry->stub_type == ppc_stub_plt_call
if ((stub_entry->stub_type == ppc_stub_plt_call
|| stub_entry->stub_type == ppc_stub_plt_call_r2save)
&& (ALWAYS_EMIT_R2SAVE
|| stub_entry->stub_type == ppc_stub_plt_call_r2save)
&& rel + 1 < relend
&& rel[1].r_offset == rel->r_offset + 4
&& ELF64_R_TYPE (rel[1].r_info) == R_PPC64_TOCSAVE)
@ -12688,7 +12899,7 @@ ppc64_elf_relocate_section (bfd *output_bfd,
if (insn != 0)
{
if (is_power4)
if (is_isa_v2)
{
/* Set 'a' bit. This is 0b00010 in BO field for branch
on CR(BI) insns (BO == 001at or 011at), and 0b01000

View File

@ -1,5 +1,5 @@
/* PowerPC64-specific support for 64-bit ELF.
Copyright 2002, 2003, 2004, 2005, 2007, 2008, 2010, 2011
Copyright 2002, 2003, 2004, 2005, 2007, 2008, 2010, 2011, 2012
Free Software Foundation, Inc.
This file is part of BFD, the Binary File Descriptor library.
@ -49,7 +49,7 @@ bfd_boolean ppc64_elf_check_init_fini
bfd_boolean ppc64_elf_next_input_section
(struct bfd_link_info *, asection *);
bfd_boolean ppc64_elf_size_stubs
(struct bfd_link_info *, bfd_signed_vma, bfd_boolean);
(struct bfd_link_info *, bfd_signed_vma, bfd_boolean, int, int);
bfd_boolean ppc64_elf_build_stubs
(bfd_boolean, struct bfd_link_info *, char **);
void ppc64_elf_restore_symbols

View File

@ -1,3 +1,15 @@
2012-01-11 Alan Modra <amodra@gmail.com>
* emultempl/ppc64elf.em (PARSE_AND_LIST_PROLOGUE,
PARSE_AND_LIST_LONGOPTS, PARSE_AND_LIST_OPTIONS,
PARSE_AND_LIST_ARGS_CASES): Handle --{no-,}plt-thread-safe and
--{no-,}plt-align.
(plt_thread_safe, plt_stub_align): New vars.
(gld${EMULATION_NAME}_after_allocation): Pass them to
ppc64_elf_size_stubs. Align stub sections according to plt_stub_align.
* ld.texinfo: Document new command line options, and an old
undocumented option.
2012-01-09 Roland McGrath <mcgrathr@google.com>
* configure.in: Use AM_ZLIB.

View File

@ -1,5 +1,5 @@
# This shell script emits a C file. -*- C -*-
# Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
# Copyright 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
# Free Software Foundation, Inc.
#
# This file is part of the GNU Binutils.
@ -64,6 +64,12 @@ static int no_toc_sort = 0;
/* Set if PLT call stubs should load r11. */
static int plt_static_chain = ${DEFAULT_PLT_STATIC_CHAIN-0};
/* Set if PLT call stubs need to be thread safe on power7+. */
static int plt_thread_safe = -1;
/* Set if individual PLT call stubs should be aligned. */
static int plt_stub_align = 0;
/* Whether to emit symbols for stubs. */
static int emit_stub_syms = -1;
@ -379,7 +385,8 @@ ppc_add_stub_section (const char *stub_sec_name, asection *input_section)
stub_sec = bfd_make_section_anyway_with_flags (stub_file->the_bfd,
stub_sec_name, flags);
if (stub_sec == NULL
|| !bfd_set_section_alignment (stub_file->the_bfd, stub_sec, 5))
|| !bfd_set_section_alignment (stub_file->the_bfd, stub_sec,
plt_stub_align > 5 ? plt_stub_align : 5))
goto err_ret;
output_section = input_section->output_section;
@ -504,7 +511,9 @@ gld${EMULATION_NAME}_after_allocation (void)
einfo ("%P: .init/.fini fragments use differing TOC pointers\n");
/* Call into the BFD backend to do the real work. */
if (!ppc64_elf_size_stubs (&link_info, group_size, plt_static_chain))
if (!ppc64_elf_size_stubs (&link_info, group_size,
plt_static_chain, plt_thread_safe,
plt_stub_align))
einfo ("%X%P: can not size stub section: %E\n");
}
}
@ -649,7 +658,11 @@ PARSE_AND_LIST_PROLOGUE=${PARSE_AND_LIST_PROLOGUE}'
#define OPTION_STUBGROUP_SIZE 321
#define OPTION_PLT_STATIC_CHAIN (OPTION_STUBGROUP_SIZE + 1)
#define OPTION_NO_PLT_STATIC_CHAIN (OPTION_PLT_STATIC_CHAIN + 1)
#define OPTION_STUBSYMS (OPTION_NO_PLT_STATIC_CHAIN + 1)
#define OPTION_PLT_THREAD_SAFE (OPTION_NO_PLT_STATIC_CHAIN + 1)
#define OPTION_NO_PLT_THREAD_SAFE (OPTION_PLT_THREAD_SAFE + 1)
#define OPTION_PLT_ALIGN (OPTION_NO_PLT_THREAD_SAFE + 1)
#define OPTION_NO_PLT_ALIGN (OPTION_PLT_ALIGN + 1)
#define OPTION_STUBSYMS (OPTION_NO_PLT_ALIGN + 1)
#define OPTION_NO_STUBSYMS (OPTION_STUBSYMS + 1)
#define OPTION_DOTSYMS (OPTION_NO_STUBSYMS + 1)
#define OPTION_NO_DOTSYMS (OPTION_DOTSYMS + 1)
@ -666,6 +679,10 @@ PARSE_AND_LIST_LONGOPTS=${PARSE_AND_LIST_LONGOPTS}'
{ "stub-group-size", required_argument, NULL, OPTION_STUBGROUP_SIZE },
{ "plt-static-chain", no_argument, NULL, OPTION_PLT_STATIC_CHAIN },
{ "no-plt-static-chain", no_argument, NULL, OPTION_NO_PLT_STATIC_CHAIN },
{ "plt-thread-safe", no_argument, NULL, OPTION_PLT_THREAD_SAFE },
{ "no-plt-thread-safe", no_argument, NULL, OPTION_NO_PLT_THREAD_SAFE },
{ "plt-align", optional_argument, NULL, OPTION_PLT_ALIGN },
{ "no-plt-align", no_argument, NULL, OPTION_NO_PLT_ALIGN },
{ "emit-stub-syms", no_argument, NULL, OPTION_STUBSYMS },
{ "no-emit-stub-syms", no_argument, NULL, OPTION_NO_STUBSYMS },
{ "dotsyms", no_argument, NULL, OPTION_DOTSYMS },
@ -691,10 +708,22 @@ PARSE_AND_LIST_OPTIONS=${PARSE_AND_LIST_OPTIONS}'
choose suitable defaults.\n"
));
fprintf (file, _("\
--plt-static-chain PLT call stubs should load r11.\n"
--plt-static-chain PLT call stubs should load r11.${DEFAULT_PLT_STATIC_CHAIN- (default)}\n"
));
fprintf (file, _("\
--no-plt-static-chain PLT call stubs should not load r11. (default)\n"
--no-plt-static-chain PLT call stubs should not load r11.${DEFAULT_PLT_STATIC_CHAIN+ (default)}\n"
));
fprintf (file, _("\
--plt-thread-safe PLT call stubs with load-load barrier.\n"
));
fprintf (file, _("\
--no-plt-thread-safe PLT call stubs without barrier.\n"
));
fprintf (file, _("\
--plt-align [=<align>] Align PLT call stubs to fit cache lines.\n"
));
fprintf (file, _("\
--no-plt-align Dont'\''t align individual PLT call stubs.\n"
));
fprintf (file, _("\
--emit-stub-syms Label linker stubs with a symbol.\n"
@ -753,6 +782,31 @@ PARSE_AND_LIST_ARGS_CASES=${PARSE_AND_LIST_ARGS_CASES}'
plt_static_chain = 0;
break;
case OPTION_PLT_THREAD_SAFE:
plt_thread_safe = 1;
break;
case OPTION_NO_PLT_THREAD_SAFE:
plt_thread_safe = 0;
break;
case OPTION_PLT_ALIGN:
if (optarg != NULL)
{
char *end;
unsigned long val = strtoul (optarg, &end, 0);
if (*end || val > 8)
einfo (_("%P%F: invalid --plt-align `%s'\''\n"), optarg);
plt_stub_align = val;
}
else
plt_stub_align = 5;
break;
case OPTION_NO_PLT_ALIGN:
plt_stub_align = 0;
break;
case OPTION_STUBSYMS:
emit_stub_syms = 1;
break;

View File

@ -1,7 +1,7 @@
\input texinfo
@setfilename ld.info
@c Copyright 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
@c 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
@c 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
@c Free Software Foundation, Inc.
@syncodeindex ky cp
@c man begin INCLUDE
@ -6710,7 +6710,9 @@ optimization.
@cindex PowerPC64 multi-TOC
@kindex --no-multi-toc
@item --no-multi-toc
By default, PowerPC64 GCC generates code for a TOC model where TOC
If given any toc option besides @code{-mcmodel=medium} or
@code{-mcmodel=large}, PowerPC64 GCC generates code for a TOC model
where TOC
entries are accessed with a 16-bit offset from r2. This limits the
total TOC size to 64K. PowerPC64 @command{ld} extends this limit by
grouping code sections such that each group uses less than 64K for its
@ -6719,6 +6721,52 @@ calls. @command{ld} does not split apart input sections, so cannot
help if a single input file has a @code{.toc} section that exceeds
64K, most likely from linking multiple files with @command{ld -r}.
Use this option to turn off this feature.
@cindex PowerPC64 TOC sorting
@kindex --no-toc-sort
@item --no-toc-sort
By default, @command{ld} sorts TOC sections so that those whose file
happens to have a section called @code{.init} or @code{.fini} are
placed first, followed by TOC sections referenced by code generated
with PowerPC64 gcc's @code{-mcmodel=small}, and lastly TOC sections
referenced only by code generated with PowerPC64 gcc's
@code{-mcmodel=medium} or @code{-mcmodel=large} options. Doing this
results in better TOC grouping for multi-TOC. Use this option to turn
off this feature.
@cindex PowerPC64 PLT stub alignment
@kindex --plt-align
@kindex --no-plt-align
@item --plt-align
@itemx --no-plt-align
Use these options to control whether individual PLT call stubs are
aligned to a 32-byte boundary, or to the specified power of two
boundary when using @code{--plt-align=}. By default PLT call stubs
are packed tightly.
@cindex PowerPC64 PLT call stub static chain
@kindex --plt-static-chain
@kindex --no-plt-static-chain
@item --plt-static-chain
@itemx --no-plt-static-chain
Use these options to control whether PLT call stubs load the static
chain pointer (r11). @code{ld} defaults to not loading the static
chain since there is never any need to do so on a PLT call.
@cindex PowerPC64 PLT call stub thread safety
@kindex --plt-thread-safe
@kindex --no-plt-thread-safe
@item --plt-thread-safe
@itemx --no-thread-safe
With power7's weakly ordered memory model, it is possible when using
lazy binding for ld.so to update a plt entry in one thread and have
another thread see the individual plt entry words update in the wrong
order, despite ld.so carefully writing in the correct order and using
memory write barriers. To avoid this we need some sort of read
barrier in the call stub, or use LD_BIND_NOW=1. By default, @code{ld}
looks for calls to commonly used functions that create threads, and if
seen, adds the necessary barriers. Use these options to change the
default behaviour.
@end table
@ifclear GENERIC