diff options
author | Tom Stellard <tstellar@redhat.com> | 2019-05-31 22:44:22 +0000 |
---|---|---|
committer | Tom Stellard <tstellar@redhat.com> | 2019-05-31 22:44:22 +0000 |
commit | 0489682ef3b5659eaf05dd7d51ab6c049575ca6b (patch) | |
tree | bd2b645afc25b5816c722df9a27b427f5fe6d220 | |
parent | Merging part of r358975: (diff) | |
download | llvm-project-0489682ef3b5659eaf05dd7d51ab6c049575ca6b.tar.gz llvm-project-0489682ef3b5659eaf05dd7d51ab6c049575ca6b.tar.bz2 llvm-project-0489682ef3b5659eaf05dd7d51ab6c049575ca6b.zip |
Merging r360405:
------------------------------------------------------------------------
r360405 | maskray | 2019-05-09 22:51:00 -0700 (Thu, 09 May 2019) | 25 lines
[PPC64] Define getThunkSectionSpacing() based on the range of R_PPC64_REL24
Suggested by Sean Fertile and Peter Smith.
Thunk section spacing decrease the total number of thunks. I measured a
decrease of 1% or less in some large programs, with no perceivable
slowdown in link time. Override getThunkSectionSpacing() to enable it.
0x2000000 is the farthest point R_PPC64_REL24 can reach. I tried several
numbers and found 0x2000000 works the best. Numbers near 0x2000000 work
as well but let's just use the simpler number.
As demonstrated by the updated tests, this essentially changes placement
of most thunks to the end of the output section. We leverage this
property to fix PR40740 reported by Alfredo Dal'Ava Júnior:
The output section .init consists of input sections from several object
files (crti.o crtbegin.o crtend.o crtn.o). Sections other than the last
one do not have a terminator. With this patch, we create the thunk after
the last .init input section and thus fix the issue. This is not
foolproof but works quite well for such sections (with no terminator) in
practice.
Reviewed By: ruiu, sfertile
Differential Revision: https://reviews.llvm.org/D61720
------------------------------------------------------------------------
llvm-svn: 362274
-rw-r--r-- | lld/ELF/Arch/PPC64.cpp | 9 | ||||
-rw-r--r-- | lld/test/ELF/ppc64-bsymbolic-toc-restore.s | 4 | ||||
-rw-r--r-- | lld/test/ELF/ppc64-call-reach.s | 15 | ||||
-rw-r--r-- | lld/test/ELF/ppc64-ifunc.s | 28 | ||||
-rw-r--r-- | lld/test/ELF/ppc64-local-dynamic.s | 2 | ||||
-rw-r--r-- | lld/test/ELF/ppc64-long-branch-init.s | 43 | ||||
-rw-r--r-- | lld/test/ELF/ppc64-plt-stub.s | 11 | ||||
-rw-r--r-- | lld/test/ELF/ppc64-toc-restore-recursive-call.s | 11 | ||||
-rw-r--r-- | lld/test/ELF/ppc64-toc-restore.s | 37 |
9 files changed, 101 insertions, 59 deletions
diff --git a/lld/ELF/Arch/PPC64.cpp b/lld/ELF/Arch/PPC64.cpp index 8a320c9a4e9e..cbfa8073d33f 100644 --- a/lld/ELF/Arch/PPC64.cpp +++ b/lld/ELF/Arch/PPC64.cpp @@ -113,6 +113,7 @@ public: void writeGotHeader(uint8_t *Buf) const override; bool needsThunk(RelExpr Expr, RelType Type, const InputFile *File, uint64_t BranchAddr, const Symbol &S) const override; + uint32_t getThunkSectionSpacing() const override; bool inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const override; RelExpr adjustRelaxExpr(RelType Type, const uint8_t *Data, RelExpr Expr) const override; @@ -759,6 +760,14 @@ bool PPC64::needsThunk(RelExpr Expr, RelType Type, const InputFile *File, return !inBranchRange(Type, BranchAddr, S.getVA()); } +uint32_t PPC64::getThunkSectionSpacing() const { + // See comment in Arch/ARM.cpp for a more detailed explanation of + // getThunkSectionSpacing(). For PPC64 we pick the constant here based on + // R_PPC64_REL24, which is used by unconditional branch instructions. + // 0x2000000 = (1 << 24-1) * 4 + return 0x2000000; +} + bool PPC64::inBranchRange(RelType Type, uint64_t Src, uint64_t Dst) const { int64_t Offset = Dst - Src; if (Type == R_PPC64_REL14) diff --git a/lld/test/ELF/ppc64-bsymbolic-toc-restore.s b/lld/test/ELF/ppc64-bsymbolic-toc-restore.s index b7d9edd45d43..d467d22ff7b1 100644 --- a/lld/test/ELF/ppc64-bsymbolic-toc-restore.s +++ b/lld/test/ELF/ppc64-bsymbolic-toc-restore.s @@ -53,7 +53,7 @@ caller: # CHECK-LABEL: caller # CHECK: bl .+44 # CHECK-NEXT: mr 31, 3 -# CHECK-NEXT: bl .-48 +# CHECK-NEXT: bl .+44 # CHECK-NEXT: ld 2, 24(1) # CHECK-NEXT: add 3, 3, 31 # CHECK-NEXT: addi 1, 1, 32 @@ -63,6 +63,6 @@ caller: # CHECK-EMPTY: # CHECK-NEXT: def: # CHECK-NEXT: addis 2, 12, 2 -# CHECK-NEXT: addi 2, 2, -32636 +# CHECK-NEXT: addi 2, 2, -32616 # CHECK-NEXT: li 3, 55 # CHECK-NEXT: blr diff --git a/lld/test/ELF/ppc64-call-reach.s b/lld/test/ELF/ppc64-call-reach.s index 085e68f9aebd..b843e7e531c9 100644 --- a/lld/test/ELF/ppc64-call-reach.s +++ b/lld/test/ELF/ppc64-call-reach.s @@ -65,27 +65,24 @@ test: # NEGOFFSET: 10010014: bl .-33554432 # NEGOFFSET: 10010024: b .+33554432 +# THUNK-LABEL: test: +# THUNK: 10010014: bl .+20 +# THUNK: 10010024: b .+20 + # .branch_lt[0] # THUNK-LABEL: __long_branch_callee: -# THUNK-NEXT: 10010000: addis 12, 2, -1 +# THUNK-NEXT: 10010028: addis 12, 2, -1 # THUNK-NEXT: ld 12, -32768(12) # THUNK-NEXT: mtctr 12 # THUNK-NEXT: bctr # .branch_lt[1] # THUNK-LABEL: __long_branch_tail_callee: -# THUNK-NEXT: 10010010: addis 12, 2, -1 +# THUNK-NEXT: 10010038: addis 12, 2, -1 # THUNK-NEXT: ld 12, -32760(12) # THUNK-NEXT: mtctr 12 # THUNK-NEXT: bctr -# Each call now branches to a thunk, and although it is printed as positive -# the offset is interpreted as a signed 26 bit value so 67108812 is actually -# -52. -# THUNK-LABEL: test: -# THUNK: 10010034: bl .-52 -# THUNK: 10010044: b .+67108812 - # The offset from the TOC to the .branch_lt section is (-1 << 16) - 32768. # Name Type Address Off Size # BRANCHLT: .branch_lt PROGBITS 0000000010020000 020000 000010 diff --git a/lld/test/ELF/ppc64-ifunc.s b/lld/test/ELF/ppc64-ifunc.s index 4bf50b98db11..32e317f3c059 100644 --- a/lld/test/ELF/ppc64-ifunc.s +++ b/lld/test/ELF/ppc64-ifunc.s @@ -15,11 +15,21 @@ # RUN: llvm-readelf -r %t | FileCheck --check-prefix=DYNREL %s # NM-DAG: 0000000010028000 d .TOC. -# NM-DAG: 0000000010010028 T ifunc -# NM-DAG: 000000001001002c T ifunc2 +# NM-DAG: 0000000010010000 T ifunc +# NM-DAG: 0000000010010004 T ifunc2 # SECTIONS: .plt NOBITS 0000000010030000 +# __plt_ifunc - . = 0x10010020 - 0x10010010 = 16 +# __plt_ifunc2 - . = 0x10010044 - 0x10010018 = 28 +# CHECK: _start: +# CHECK-NEXT: addis 2, 12, 1 +# CHECK-NEXT: addi 2, 2, 32760 +# CHECK-NEXT: 10010010: bl .+16 +# CHECK-NEXT: ld 2, 24(1) +# CHECK-NEXT: 10010018: bl .+28 +# CHECK-NEXT: ld 2, 24(1) + # .plt[0] - .TOC. = 0x10030000 - 0x10028000 = (1<<16) - 32768 # CHECK: __plt_ifunc: # CHECK-NEXT: std 2, 24(1) @@ -36,19 +46,9 @@ # CHECK-NEXT: mtctr 12 # CHECK-NEXT: bctr -# __plt_ifunc - . = 0x10010000 - 0x10010038 = -56 -# __plt_ifunc2 - . = 0x10010014 - 0x10010040 = -44 -# CHECK: _start: -# CHECK-NEXT: addis 2, 12, 1 -# CHECK-NEXT: addi 2, 2, 32720 -# CHECK-NEXT: 10010038: bl .-56 -# CHECK-NEXT: ld 2, 24(1) -# CHECK-NEXT: 10010040: bl .-44 -# CHECK-NEXT: ld 2, 24(1) - # Check that we emit 2 R_PPC64_IRELATIVE. -# DYNREL: R_PPC64_IRELATIVE 10010028 -# DYNREL: R_PPC64_IRELATIVE 1001002c +# DYNREL: R_PPC64_IRELATIVE 10010000 +# DYNREL: R_PPC64_IRELATIVE 10010004 .type ifunc STT_GNU_IFUNC .globl ifunc diff --git a/lld/test/ELF/ppc64-local-dynamic.s b/lld/test/ELF/ppc64-local-dynamic.s index 8a23863f67de..87e33b784b8b 100644 --- a/lld/test/ELF/ppc64-local-dynamic.s +++ b/lld/test/ELF/ppc64-local-dynamic.s @@ -113,7 +113,7 @@ k: // Dis: test: // Dis: addis 3, 2, 0 // Dis-NEXT: addi 3, 3, -32760 -// Dis-NEXT: bl .-60 +// Dis-NEXT: bl .+60 // Dis-NEXT: ld 2, 24(1) // Dis-NEXT: addis 3, 3, 0 // Dis-NEXT: lwa 3, -32768(3) diff --git a/lld/test/ELF/ppc64-long-branch-init.s b/lld/test/ELF/ppc64-long-branch-init.s new file mode 100644 index 000000000000..80b3919cc455 --- /dev/null +++ b/lld/test/ELF/ppc64-long-branch-init.s @@ -0,0 +1,43 @@ +# REQUIRES: ppc + +# RUN: llvm-mc -filetype=obj -triple=powerpc64-pc-freebsd13.0 %s -o %t.o +# RUN: ld.lld %t.o -o %t +# RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s + +## .init consists of sections from several object files. Sections other than the +## last one do not have a terminator. Check we do not create a long branch stub +## in the middle. +## We currently use thunk section spacing to ensure the stub is in the end. This +## is not foolproof but good enough to not break in practice. + +# CHECK: Disassembly of section .init: +# CHECK-LABEL: _init: +# CHECK: blr +# CHECK-EMPTY: +# CHECK-LABEL: __long_branch_foo: + +.globl foo +foo: + .space 0x2000000 + blr + +.section .init,"ax",@progbits,unique,0 +.globl _init +_init: + stdu 1, -48(1) + mflr 0 + std 0, 64(1) + +.section .init,"ax",@progbits,unique,1 + bl foo + nop + +.section .init,"ax",@progbits,unique,2 + bl foo + nop + +.section .init,"ax",@progbits,unique,3 + ld 1, 0(1) + ld 0, 16(1) + mtlr 0 + blr diff --git a/lld/test/ELF/ppc64-plt-stub.s b/lld/test/ELF/ppc64-plt-stub.s index 95e28a5850a9..bf3ac09fd516 100644 --- a/lld/test/ELF/ppc64-plt-stub.s +++ b/lld/test/ELF/ppc64-plt-stub.s @@ -4,16 +4,19 @@ // RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o // RUN: ld.lld -shared %t2.o -o %t2.so // RUN: ld.lld %t.o %t2.so -o %t -// RUN: llvm-objdump -d %t | FileCheck %s +// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s // RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o // RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %p/Inputs/shared-ppc64.s -o %t2.o // RUN: ld.lld -shared %t2.o -o %t2.so // RUN: ld.lld %t.o %t2.so -o %t -// RUN: llvm-objdump -d %t | FileCheck %s +// RUN: llvm-objdump -d --no-show-raw-insn %t | FileCheck %s // CHECK: Disassembly of section .text: -// CHECK-NEXT: __plt_foo: +// CHECK-NEXT: _start: +// CHECK: 10010008: bl .+16 + +// CHECK-LABEL: 0000000010010018 __plt_foo: // CHECK-NEXT: std 2, 24(1) // CHECK-NEXT: addis 12, 2, 0 // CHECK-NEXT: ld 12, 32560(12) @@ -21,8 +24,6 @@ // CHECK-NEXT: bctr -// CHECK: _start: -// CHECK: bl .-40 .text .abiversion 2 .globl _start diff --git a/lld/test/ELF/ppc64-toc-restore-recursive-call.s b/lld/test/ELF/ppc64-toc-restore-recursive-call.s index 538b12c7c90e..756a058cc565 100644 --- a/lld/test/ELF/ppc64-toc-restore-recursive-call.s +++ b/lld/test/ELF/ppc64-toc-restore-recursive-call.s @@ -14,12 +14,11 @@ # for recursive calls as well as keeps the logic for recursive calls consistent # with non-recursive calls. -# CHECK-LABEL: __plt_recursive_func: -# CHECK-NEXT: 10000: -# CHECK-LABEL: recursive_func -# CHECK-NEXT: 10014: -# CHECK: 1003c: bl .-60 -# CHECK-NEXT: 10040: ld 2, 24(1) +# CHECK-LABEL: 0000000000010000 recursive_func: +# CHECK: 10028: bl .+32 +# CHECK-NEXT: ld 2, 24(1) + +# CHECK-LABEL: 0000000000010048 __plt_recursive_func: .abiversion 2 .section ".text" diff --git a/lld/test/ELF/ppc64-toc-restore.s b/lld/test/ELF/ppc64-toc-restore.s index 8ffe2136591d..d65bef847a7b 100644 --- a/lld/test/ELF/ppc64-toc-restore.s +++ b/lld/test/ELF/ppc64-toc-restore.s @@ -28,16 +28,11 @@ _start: bl foo nop bl bar_local - - -// CHECK: Disassembly of section .text: -// CHECK: _start: -// CHECK: 1001001c: bl .-28 -// CHECK-NOT: 10010020: nop -// CHECK: 10010020: ld 2, 24(1) -// CHECK: 10010024: bl .-16 -// CHECK-NOT: 10010028: nop -// CHECK-NOT: 10010028: ld 2, 24(1) +// CHECK-LABEL: _start: +// CHECK-NEXT: 10010008: bl .+64 +// CHECK-NEXT: 1001000c: ld 2, 24(1) +// CHECK-NEXT: 10010010: bl .-16 +// CHECK-EMPTY: # Calling a function in another object file which will have same # TOC base does not need a nop. If nop present, do not rewrite to @@ -47,26 +42,24 @@ _diff_object: bl foo_not_shared bl foo_not_shared nop - -// CHECK: _diff_object: -// CHECK-NEXT: 10010028: bl .+24 -// CHECK-NEXT: 1001002c: bl .+20 -// CHECK-NEXT: 10010030: nop +// CHECK-LABEL: _diff_object: +// CHECK-NEXT: 10010014: bl .+28 +// CHECK-NEXT: 10010018: bl .+24 +// CHECK-NEXT: 1001001c: nop # Branching to a local function does not need a nop .global noretbranch noretbranch: b bar_local -// CHECK: noretbranch: -// CHECK: 10010034: b .+67108832 -// CHECK-NOT: 10010038: nop -// CHECK-NOT: 1001003c: ld 2, 24(1) +// CHECK-LABEL: noretbranch: +// CHECK: 10010020: b .+67108832 +// CHECK-EMPTY: // This should come last to check the end-of-buffer condition. .global last last: bl foo nop -// CHECK: last: -// CHECK: 10010038: bl .-56 -// CHECK-NEXT: 1001003c: ld 2, 24(1) +// CHECK-LABEL: last: +// CHECK-NEXT: 10010024: bl .+36 +// CHECK-NEXT: 10010028: ld 2, 24(1) |