mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-04 03:44:59 +00:00
01c7f4b606
This change affects the non-linker script case (precisely, when the `SECTIONS` command is not used). It deletes 3 alignments at PT_LOAD boundaries for the default case: the size of a powerpc64 binary can be decreased by at most 192kb. The technique can be ported to other targets. Let me demonstrate the idea with a maxPageSize=65536 example: When assigning the address to the first output section of a new PT_LOAD, if the end p_vaddr of the previous PT_LOAD is 0x10020, we advance to the next multiple of maxPageSize: 0x20000. The new PT_LOAD will thus have p_vaddr=0x20000. Because p_offset and p_vaddr are congruent modulo maxPageSize, p_offset will be 0x20000, leaving a p_offset gap [0x10020, 0x20000) in the output. Alternatively, if we advance to 0x20020, the new PT_LOAD will have p_vaddr=0x20020. We can pick either 0x10020 or 0x20020 for p_offset! Obviously 0x10020 is the choice because it leaves no gap. At runtime, p_vaddr will be rounded down by pagesize (65536 if pagesize=maxPageSize). This PT_LOAD will load additional initial contents from p_offset ranges [0x10000,0x10020), which will also be loaded by the previous PT_LOAD. This is fine if -z noseparate-code is in effect or if we are not transiting between executable and non-executable segments. ld.bfd -z noseparate-code leverages this technique to keep output small. This patch implements the technique in lld, which is mostly effective on targets with large defaultMaxPageSize (AArch64/MIPS/PPC: 65536). The 3 removed alignments can save almost 3*65536 bytes. Two places that rely on p_vaddr%pagesize = 0 have to be updated. 1) We used to round p_memsz(PT_GNU_RELRO) up to commonPageSize (defaults to 4096 on all targets). Now p_vaddr%commonPageSize may be non-zero. The updated formula takes account of that factor. 2) Our TP offsets formulae are only correct if p_vaddr%p_align = 0. Fix them. See the updated comments in InputSection.cpp for details. On targets that we enable the technique (only PPC64 now), we can potentially make `p_vaddr(PT_TLS)%p_align(PT_TLS) != 0` if `sh_addralign(.tdata) < sh_addralign(.tbss)` This exposes many problems in ld.so implementations, especially the offsets of dynamic TLS blocks. Known issues: FreeBSD 13.0-CURRENT rtld-elf (i386/amd64/powerpc/arm64) glibc (HEAD) i386 and x86_64 https://sourceware.org/bugzilla/show_bug.cgi?id=24606 musl<=1.1.22 on TLS Variant I architectures (aarch64/powerpc64/...) So, force p_vaddr%p_align = 0 by rounding dot up to p_align(PT_TLS). The technique will be enabled (with updated tests) for other targets in subsequent patches. Reviewed By: ruiu Differential Revision: https://reviews.llvm.org/D64906 llvm-svn: 369343
74 lines
1.8 KiB
ArmAsm
74 lines
1.8 KiB
ArmAsm
# REQUIRES: ppc
|
|
# RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
|
|
# RUN: ld.lld %t.o -o %t
|
|
# RUN: llvm-readelf -S %t | FileCheck --check-prefixes=SECTIONS %s
|
|
# RUN: llvm-readelf -x .toc %t | FileCheck --check-prefixes=HEX-LE %s
|
|
# RUN: llvm-objdump -d %t | FileCheck --check-prefixes=CHECK %s
|
|
|
|
# RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
|
|
# RUN: ld.lld %t.o -o %t
|
|
# RUN: llvm-readelf -S %t | FileCheck --check-prefixes=SECTIONS %s
|
|
# RUN: llvm-readelf -x .toc %t | FileCheck --check-prefixes=HEX-BE %s
|
|
# RUN: llvm-objdump -d %t | FileCheck --check-prefixes=CHECK %s
|
|
|
|
# .LJT is a local symbol (non-preemptable).
|
|
# Test we can perform the toc-indirect to toc-relative relaxation.
|
|
|
|
# SECTIONS: .rodata PROGBITS 00000000100001c8
|
|
|
|
# HEX-LE: section '.toc':
|
|
# HEX-LE-NEXT: 10020228 c8010010 00000000
|
|
|
|
# HEX-BE: section '.toc':
|
|
# HEX-BE-NEXT: 10020228 00000000 100001c8
|
|
|
|
# CHECK-LABEL: _start
|
|
# CHECK: clrldi 3, 3, 62
|
|
# CHECK-NEXT: addis 4, 2, -3
|
|
# CHECK-NEXT: addi 4, 4, 32680
|
|
# CHECK-NEXT: sldi 3, 3, 2
|
|
|
|
.text
|
|
.global _start
|
|
.type _start, @function
|
|
_start:
|
|
.Lstart_gep:
|
|
addis 2, 12, .TOC.-.Lstart_gep@ha
|
|
addi 2, 2, .TOC.-.Lstart_gep@l
|
|
.Lstart_lep:
|
|
.localentry _start, .Lstart_lep-.Lstart_gep
|
|
rldicl 3, 3, 0, 62
|
|
addis 4, 2, .LJTI_TE@toc@ha
|
|
ld 4, .LJTI_TE@toc@l(4)
|
|
sldi 3, 3, 2
|
|
lwax 3, 3, 4
|
|
add 3, 3, 4
|
|
mtctr 3
|
|
bctr
|
|
|
|
.LBB1:
|
|
li 3, 0
|
|
blr
|
|
.LBB2:
|
|
li 3, 10
|
|
blr
|
|
.LBB3:
|
|
li 3, 55
|
|
blr
|
|
.LBB4:
|
|
li 3, 255
|
|
blr
|
|
|
|
.section .rodata,"a",@progbits
|
|
.p2align 2
|
|
.LJT:
|
|
.long .LBB1-.LJT
|
|
.long .LBB2-.LJT
|
|
.long .LBB3-.LJT
|
|
.long .LBB4-.LJT
|
|
|
|
.section .toc,"aw",@progbits
|
|
# TOC entry for the jumptable address.
|
|
.LJTI_TE:
|
|
.tc .LJT[TC],.LJT
|