Skip to content

Commit 0a71d0f

Browse files
committed
SimplifyCFG: allow jump-threading for switch_enum_data_addr instructions.
If the branch-block injects a certain enum case and the destination switches on that enum, it's worth jump threading. E.g. inject_enum_addr %enum : $*Optional<T>, #Optional.some ... // no memory writes here br DestBB DestBB: ... // no memory writes here switch_enum_addr %enum : $*Optional<T>, case #Optional.some ... This enables removing all code with optionals in a loop, which iterates over an array of address-only elements, e.g. func test<T>(_ items: [T]) { for i in items { print(i) } }
1 parent 28220ff commit 0a71d0f

File tree

4 files changed

+167
-11
lines changed

4 files changed

+167
-11
lines changed

lib/SILOptimizer/Transforms/SimplifyCFG.cpp

Lines changed: 66 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -916,16 +916,57 @@ static bool couldRemoveRelease(SILBasicBlock *SrcBB, SILValue SrcV,
916916
return IsReleaseOfDest;
917917
}
918918

919+
/// Returns true if any instruction in \p block may write memory.
920+
static bool blockMayWriteMemory(SILBasicBlock *block) {
921+
for (auto instAndIdx : llvm::enumerate(*block)) {
922+
if (instAndIdx.value().mayWriteToMemory())
923+
return true;
924+
// Only look at the first 20 instructions to avoid compile time problems for
925+
// corner cases of very large blocks without memory writes.
926+
// 20 instructions is more than enough.
927+
if (instAndIdx.index() > 50)
928+
return true;
929+
}
930+
return false;
931+
}
932+
933+
// Returns true if \p block contains an injected an enum case into \p enumAddr
934+
// which is valid at the end of the block.
935+
static bool hasInjectedEnumAtEndOfBlock(SILBasicBlock *block, SILValue enumAddr) {
936+
for (auto instAndIdx : llvm::enumerate(llvm::reverse(*block))) {
937+
SILInstruction &inst = instAndIdx.value();
938+
if (auto *injectInst = dyn_cast<InjectEnumAddrInst>(&inst)) {
939+
return injectInst->getOperand() == enumAddr;
940+
}
941+
if (inst.mayWriteToMemory())
942+
return false;
943+
// Only look at the first 20 instructions to avoid compile time problems for
944+
// corner cases of very large blocks without memory writes.
945+
// 20 instructions is more than enough.
946+
if (instAndIdx.index() > 50)
947+
return false;
948+
}
949+
return false;
950+
}
951+
919952
/// tryJumpThreading - Check to see if it looks profitable to duplicate the
920953
/// destination of an unconditional jump into the bottom of this block.
921954
bool SimplifyCFG::tryJumpThreading(BranchInst *BI) {
922955
auto *DestBB = BI->getDestBB();
923956
auto *SrcBB = BI->getParent();
957+
TermInst *destTerminator = DestBB->getTerminator();
924958
// If the destination block ends with a return, we don't want to duplicate it.
925959
// We want to maintain the canonical form of a single return where possible.
926-
if (DestBB->getTerminator()->isFunctionExiting())
960+
if (destTerminator->isFunctionExiting())
927961
return false;
928962

963+
// Jump threading only makes sense if there is an argument on the branch
964+
// (which is reacted on in the DestBB), or if this goes through a memory
965+
// location (switch_enum_addr is the only adress-instruction which we
966+
// currently handle).
967+
if (BI->getArgs().empty() && !isa<SwitchEnumAddrInst>(destTerminator))
968+
return false;
969+
929970
// We don't have a great cost model at the SIL level, so we don't want to
930971
// blissly duplicate tons of code with a goal of improved performance (we'll
931972
// leave that to LLVM). However, doing limited code duplication can lead to
@@ -956,11 +997,29 @@ bool SimplifyCFG::tryJumpThreading(BranchInst *BI) {
956997
}
957998
}
958999

959-
if (ThreadingBudget == 0 && isa<CondBranchInst>(DestBB->getTerminator())) {
960-
for (auto V : BI->getArgs()) {
961-
if (isa<IntegerLiteralInst>(V) || isa<FloatLiteralInst>(V)) {
1000+
if (ThreadingBudget == 0) {
1001+
if (isa<CondBranchInst>(destTerminator)) {
1002+
for (auto V : BI->getArgs()) {
1003+
if (isa<IntegerLiteralInst>(V) || isa<FloatLiteralInst>(V)) {
1004+
ThreadingBudget = 4;
1005+
break;
1006+
}
1007+
}
1008+
} else if (auto *SEA = dyn_cast<SwitchEnumAddrInst>(destTerminator)) {
1009+
// If the branch-block injects a certain enum case and the destination
1010+
// switches on that enum, it's worth jump threading. E.g.
1011+
//
1012+
// inject_enum_addr %enum : $*Optional<T>, #Optional.some
1013+
// ... // no memory writes here
1014+
// br DestBB
1015+
// DestBB:
1016+
// ... // no memory writes here
1017+
// switch_enum_addr %enum : $*Optional<T>, case #Optional.some ...
1018+
//
1019+
SILValue enumAddr = SEA->getOperand();
1020+
if (!blockMayWriteMemory(DestBB) &&
1021+
hasInjectedEnumAtEndOfBlock(SrcBB, enumAddr)) {
9621022
ThreadingBudget = 4;
963-
break;
9641023
}
9651024
}
9661025
}
@@ -976,7 +1035,7 @@ bool SimplifyCFG::tryJumpThreading(BranchInst *BI) {
9761035
// control flow. Still, we make an exception for switch_enum.
9771036
bool DestIsLoopHeader = (LoopHeaders.count(DestBB) != 0);
9781037
if (DestIsLoopHeader) {
979-
if (!isa<SwitchEnumInst>(DestBB->getTerminator()))
1038+
if (!isa<SwitchEnumInst>(destTerminator))
9801039
return false;
9811040
}
9821041

@@ -1286,8 +1345,7 @@ bool SimplifyCFG::simplifyBranchBlock(BranchInst *BI) {
12861345
// If this unconditional branch has BBArgs, check to see if duplicating the
12871346
// destination would allow it to be simplified. This is a simple form of jump
12881347
// threading.
1289-
if (!isVeryLargeFunction && !BI->getArgs().empty() &&
1290-
tryJumpThreading(BI))
1348+
if (!isVeryLargeFunction && tryJumpThreading(BI))
12911349
return true;
12921350

12931351
return Simplified;

test/SILOptimizer/enum_jump_thread.sil

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,3 +49,83 @@ bb5(%7 : $E2):
4949
return %7 : $E2
5050
}
5151

52+
// CHECK-LABEL: sil @test_enum_addr
53+
sil @test_enum_addr : $@convention(thin) () -> Builtin.Int32 {
54+
bb0:
55+
%2 = alloc_stack $E
56+
cond_br undef, bb1, bb2
57+
58+
// CHECK: bb1:
59+
// CHECK-NEXT: inject_enum_addr
60+
// CHECK-NEXT: switch_enum_addr
61+
bb1:
62+
inject_enum_addr %2 : $*E, #E.A!enumelt
63+
br bb3
64+
65+
// CHECK: bb2:
66+
// CHECK-NEXT: inject_enum_addr
67+
// CHECK-NEXT: switch_enum_addr
68+
bb2:
69+
inject_enum_addr %2 : $*E, #E.B!enumelt
70+
br bb3
71+
72+
bb3:
73+
switch_enum_addr %2 : $*E, case #E.A!enumelt: bb4, case #E.B!enumelt: bb5
74+
75+
bb4:
76+
%10 = integer_literal $Builtin.Int32, 1
77+
br bb6(%10 : $Builtin.Int32)
78+
79+
bb5:
80+
%11 = integer_literal $Builtin.Int32, 2
81+
br bb6(%11 : $Builtin.Int32)
82+
83+
bb6(%12 : $Builtin.Int32):
84+
dealloc_stack %2 : $*E
85+
return %12 : $Builtin.Int32
86+
// CHECK: } // end sil function 'test_enum_addr'
87+
}
88+
89+
// CHECK-LABEL: sil @dont_jumpthread_enum_addr
90+
sil @dont_jumpthread_enum_addr : $@convention(thin) (E) -> Builtin.Int32 {
91+
bb0(%0 : $E):
92+
%2 = alloc_stack $E
93+
%3 = alloc_stack $E
94+
cond_br undef, bb1, bb2
95+
96+
// CHECK: bb1:
97+
// CHECK-NEXT: inject_enum_addr
98+
// CHECK-NEXT: store
99+
// CHECK-NEXT: br bb3
100+
bb1:
101+
inject_enum_addr %2 : $*E, #E.A!enumelt
102+
store %0 to %2 : $*E
103+
br bb3
104+
105+
// CHECK: bb2:
106+
// CHECK-NEXT: inject_enum_addr
107+
// CHECK-NEXT: br bb3
108+
bb2:
109+
inject_enum_addr %3 : $*E, #E.A!enumelt
110+
br bb3
111+
112+
// CHECK: bb3:
113+
// CHECK-NEXT: switch_enum_addr
114+
bb3:
115+
switch_enum_addr %2 : $*E, case #E.A!enumelt: bb4, case #E.B!enumelt: bb5
116+
117+
bb4:
118+
%10 = integer_literal $Builtin.Int32, 1
119+
br bb6(%10 : $Builtin.Int32)
120+
121+
bb5:
122+
%11 = integer_literal $Builtin.Int32, 2
123+
br bb6(%11 : $Builtin.Int32)
124+
125+
bb6(%12 : $Builtin.Int32):
126+
dealloc_stack %3 : $*E
127+
dealloc_stack %2 : $*E
128+
return %12 : $Builtin.Int32
129+
// CHECK: } // end sil function 'dont_jumpthread_enum_addr'
130+
}
131+

test/SILOptimizer/generic_loop.swift

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
// RUN: %target-swift-frontend -primary-file %s -O -sil-verify-all -module-name=test -emit-sil | %FileCheck %s
2+
3+
// REQUIRES: swift_stdlib_no_asserts,optimized_stdlib
4+
5+
6+
// Check that we can eliminate all optionals from a loop which is iterating
7+
// over an array of address-only elements.
8+
9+
// CHECK-LABEL: sil @$s4test0A18_no_optionals_usedyySayxGlF : $@convention(thin) <T> (@guaranteed Array<T>) -> () {
10+
// CHECK-NOT: Optional
11+
// CHECK: } // end sil function '$s4test0A18_no_optionals_usedyySayxGlF'
12+
public func test_no_optionals_used<T>(_ items: [T]) {
13+
for i in items {
14+
print(i)
15+
}
16+
}
17+

test/SILOptimizer/sil_combine_concrete_existential.swift

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -95,10 +95,11 @@ struct SS: PPP {
9595

9696
// The first apply has been devirtualized and inlined. The second remains unspecialized.
9797
// CHECK-LABEL: sil @$s32sil_combine_concrete_existential37testWitnessReturnOptionalIndirectSelfyyF : $@convention(thin) () -> () {
98+
// CHECK: [[O1:%.*]] = open_existential_addr immutable_access %{{.*}} : $*PPP to $*@opened("{{.*}}") PPP
9899
// CHECK: switch_enum_addr %{{.*}} : $*Optional<@opened("{{.*}}") PPP>, case #Optional.some!enumelt: bb{{.*}}, case #Optional.none!enumelt: bb{{.*}}
99-
// CHECK: [[O:%.*]] = open_existential_addr immutable_access %{{.*}} : $*PPP to $*@opened("{{.*}}") PPP
100-
// CHECK: [[W:%.*]] = witness_method $@opened("{{.*}}") PPP, #PPP.returnsOptionalIndirect : <Self where Self : PPP> (Self) -> () -> Self?, [[O]] : $*@opened("{{.*}}") PPP : $@convention(witness_method: PPP) <τ_0_0 where τ_0_0 : PPP> (@in_guaranteed τ_0_0) -> @out Optional<τ_0_0>
101-
// CHECK: apply [[W]]<@opened("{{.*}}") PPP>(%{{.*}}, [[O]]) : $@convention(witness_method: PPP) <τ_0_0 where τ_0_0 : PPP> (@in_guaranteed τ_0_0) -> @out Optional<τ_0_0>
100+
// CHECK: [[O2:%.*]] = open_existential_addr immutable_access %{{.*}} : $*PPP to $*@opened("{{.*}}") PPP
101+
// CHECK: [[W:%.*]] = witness_method $@opened("{{.*}}") PPP, #PPP.returnsOptionalIndirect : <Self where Self : PPP> (Self) -> () -> Self?, [[O1]] : $*@opened("{{.*}}") PPP : $@convention(witness_method: PPP) <τ_0_0 where τ_0_0 : PPP> (@in_guaranteed τ_0_0) -> @out Optional<τ_0_0>
102+
// CHECK: apply [[W]]<@opened("{{.*}}") PPP>(%{{.*}}, [[O2]]) : $@convention(witness_method: PPP) <τ_0_0 where τ_0_0 : PPP> (@in_guaranteed τ_0_0) -> @out Optional<τ_0_0>
102103
// CHECK-LABEL: } // end sil function '$s32sil_combine_concrete_existential37testWitnessReturnOptionalIndirectSelfyyF'
103104
public func testWitnessReturnOptionalIndirectSelf() {
104105
let p: PPP = S()

0 commit comments

Comments
 (0)