-
Notifications
You must be signed in to change notification settings - Fork 14.6k
release/21.x: [Sparc] Remove bogus stack adjustment for LD/GD TLS (#149890) #150048
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-sparc Author: None (llvmbot) ChangesRequested by: @jrtc27 Full diff: https://github.com/llvm/llvm-project/pull/150048.diff 2 Files Affected:
diff --git a/llvm/lib/Target/Sparc/SparcISelLowering.cpp b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
index 9b434d87c2676..1aa8efe3e9979 100644
--- a/llvm/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/llvm/lib/Target/Sparc/SparcISelLowering.cpp
@@ -2201,7 +2201,7 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SDValue Chain = DAG.getEntryNode();
SDValue InGlue;
- Chain = DAG.getCALLSEQ_START(Chain, 1, 0, DL);
+ Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
Chain = DAG.getCopyToReg(Chain, DL, SP::O0, Argument, InGlue);
InGlue = Chain.getValue(1);
SDValue Callee = DAG.getTargetExternalSymbol("__tls_get_addr", PtrVT);
@@ -2219,7 +2219,7 @@ SDValue SparcTargetLowering::LowerGlobalTLSAddress(SDValue Op,
InGlue};
Chain = DAG.getNode(SPISD::TLS_CALL, DL, NodeTys, Ops);
InGlue = Chain.getValue(1);
- Chain = DAG.getCALLSEQ_END(Chain, 1, 0, InGlue, DL);
+ Chain = DAG.getCALLSEQ_END(Chain, 0, 0, InGlue, DL);
InGlue = Chain.getValue(1);
SDValue Ret = DAG.getCopyFromReg(Chain, DL, SP::O0, PtrVT, InGlue);
diff --git a/llvm/test/CodeGen/SPARC/tls-sp.ll b/llvm/test/CodeGen/SPARC/tls-sp.ll
new file mode 100644
index 0000000000000..de9af01398d23
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/tls-sp.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=sparc -relocation-model=pic < %s | FileCheck --check-prefix=SPARC %s
+; RUN: llc -mtriple=sparc64 -relocation-model=pic < %s | FileCheck --check-prefix=SPARC64 %s
+
+@x = external thread_local global i8
+
+;; Test that we don't over-allocate stack space when calling __tls_get_addr
+;; with the call frame pseudos able to be eliminated.
+define ptr @no_alloca() nounwind {
+; SPARC-LABEL: no_alloca:
+; SPARC: ! %bb.0: ! %entry
+; SPARC-NEXT: save %sp, -96, %sp
+; SPARC-NEXT: .Ltmp0:
+; SPARC-NEXT: call .Ltmp1
+; SPARC-NEXT: .Ltmp2:
+; SPARC-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
+; SPARC-NEXT: .Ltmp1:
+; SPARC-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
+; SPARC-NEXT: add %i0, %o7, %i0
+; SPARC-NEXT: sethi %tgd_hi22(x), %i1
+; SPARC-NEXT: add %i1, %tgd_lo10(x), %i1
+; SPARC-NEXT: add %i0, %i1, %o0, %tgd_add(x)
+; SPARC-NEXT: call __tls_get_addr, %tgd_call(x)
+; SPARC-NEXT: nop
+; SPARC-NEXT: ret
+; SPARC-NEXT: restore %g0, %o0, %o0
+;
+; SPARC64-LABEL: no_alloca:
+; SPARC64: ! %bb.0: ! %entry
+; SPARC64-NEXT: save %sp, -128, %sp
+; SPARC64-NEXT: .Ltmp0:
+; SPARC64-NEXT: rd %pc, %o7
+; SPARC64-NEXT: .Ltmp2:
+; SPARC64-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp2-.Ltmp0)), %i0
+; SPARC64-NEXT: .Ltmp1:
+; SPARC64-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp1-.Ltmp0)), %i0
+; SPARC64-NEXT: add %i0, %o7, %i0
+; SPARC64-NEXT: sethi %tgd_hi22(x), %i1
+; SPARC64-NEXT: add %i1, %tgd_lo10(x), %i1
+; SPARC64-NEXT: add %i0, %i1, %o0, %tgd_add(x)
+; SPARC64-NEXT: call __tls_get_addr, %tgd_call(x)
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore %g0, %o0, %o0
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @x)
+ ret ptr %0
+}
+
+;; Test that %sp is valid for the call to __tls_get_addr. We store to a dynamic
+;; alloca in order to prevent eliminating any call frame pseudos from the call.
+define ptr @dynamic_alloca(i64 %n) nounwind {
+; SPARC-LABEL: dynamic_alloca:
+; SPARC: ! %bb.0: ! %entry
+; SPARC-NEXT: save %sp, -96, %sp
+; SPARC-NEXT: .Ltmp3:
+; SPARC-NEXT: call .Ltmp4
+; SPARC-NEXT: .Ltmp5:
+; SPARC-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp5-.Ltmp3)), %i0
+; SPARC-NEXT: .Ltmp4:
+; SPARC-NEXT: or %i0, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.Ltmp3)), %i0
+; SPARC-NEXT: add %i0, %o7, %i0
+; SPARC-NEXT: sethi %tgd_hi22(x), %i2
+; SPARC-NEXT: add %i2, %tgd_lo10(x), %i2
+; SPARC-NEXT: add %i0, %i2, %o0, %tgd_add(x)
+; SPARC-NEXT: call __tls_get_addr, %tgd_call(x)
+; SPARC-NEXT: nop
+; SPARC-NEXT: add %i1, 7, %i0
+; SPARC-NEXT: and %i0, -8, %i0
+; SPARC-NEXT: sub %sp, %i0, %i0
+; SPARC-NEXT: add %i0, -8, %sp
+; SPARC-NEXT: mov 1, %i1
+; SPARC-NEXT: stb %i1, [%i0+88]
+; SPARC-NEXT: ret
+; SPARC-NEXT: restore %g0, %o0, %o0
+;
+; SPARC64-LABEL: dynamic_alloca:
+; SPARC64: ! %bb.0: ! %entry
+; SPARC64-NEXT: save %sp, -128, %sp
+; SPARC64-NEXT: .Ltmp3:
+; SPARC64-NEXT: rd %pc, %o7
+; SPARC64-NEXT: .Ltmp5:
+; SPARC64-NEXT: sethi %hi(_GLOBAL_OFFSET_TABLE_+(.Ltmp5-.Ltmp3)), %i1
+; SPARC64-NEXT: .Ltmp4:
+; SPARC64-NEXT: or %i1, %lo(_GLOBAL_OFFSET_TABLE_+(.Ltmp4-.Ltmp3)), %i1
+; SPARC64-NEXT: add %i1, %o7, %i1
+; SPARC64-NEXT: sethi %tgd_hi22(x), %i2
+; SPARC64-NEXT: add %i2, %tgd_lo10(x), %i2
+; SPARC64-NEXT: add %i1, %i2, %o0, %tgd_add(x)
+; SPARC64-NEXT: call __tls_get_addr, %tgd_call(x)
+; SPARC64-NEXT: nop
+; SPARC64-NEXT: add %i0, 15, %i0
+; SPARC64-NEXT: and %i0, -16, %i0
+; SPARC64-NEXT: sub %sp, %i0, %i0
+; SPARC64-NEXT: mov %i0, %sp
+; SPARC64-NEXT: mov 1, %i1
+; SPARC64-NEXT: stb %i1, [%i0+2175]
+; SPARC64-NEXT: ret
+; SPARC64-NEXT: restore %g0, %o0, %o0
+entry:
+ %0 = call ptr @llvm.threadlocal.address.p0(ptr @x)
+ %1 = alloca i8, i64 %n
+ store i8 1, ptr %1
+ ret ptr %0
+}
|
Who can review this? |
(cherry picked from commit dd36a69)
This argument is the number of bytes to adjust the stack by for the duration of the call. In most cases, PEI is able to eliminate the corresponding call frame pseudos, folding them into the initial stack frame allocation (rounded up to stack alignment), where it just ends up allocating more space than needed. However, in the rare case where this cannot be done, e.g. due to the use of a dynamic alloca, the 1 byte stack adjustment persists and results in a misaligned stack for the duration of the call. This has been the case ever since TLS support was added in cb1dca6 ("[Sparc] Add support for TLS in sparc."), and I can only assume that 1 was used erroneously thinking that it is the number of arguments (as there is 1 register argument for the call), not the number of bytes for on-stack arguments. Fixes: llvm#149808 (cherry picked from commit 4b99eb2)
@jrtc27 (or anyone else). If you would like to add a note about this fix in the release notes (completely optional). Please reply to this comment with a one or two sentence description of the fix. When you are done, please add the release:note label to this PR. |
Backport dd36a69 4b99eb2
Requested by: @jrtc27