Skip to content

SILOptimizer: two optimization improvements for address-only enums #34115

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Sep 30, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 44 additions & 7 deletions lib/SILOptimizer/SILCombiner/SILCombinerMiscVisitors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -453,6 +453,10 @@ bool SILCombiner::optimizeStackAllocatedEnum(AllocStackInst *AS) {
return false;

EnumElementDecl *element = nullptr;
unsigned numInits =0;
unsigned numTakes = 0;
SILBasicBlock *initBlock = nullptr;
SILBasicBlock *takeBlock = nullptr;
SILType payloadType;

// First step: check if the stack location is only used to hold one specific
Expand All @@ -463,6 +467,8 @@ bool SILCombiner::optimizeStackAllocatedEnum(AllocStackInst *AS) {
case SILInstructionKind::DebugValueAddrInst:
case SILInstructionKind::DestroyAddrInst:
case SILInstructionKind::DeallocStackInst:
case SILInstructionKind::InjectEnumAddrInst:
// We'll check init_enum_addr below.
break;
case SILInstructionKind::InitEnumDataAddrInst: {
auto *ieda = cast<InitEnumDataAddrInst>(user);
Expand All @@ -472,20 +478,17 @@ bool SILCombiner::optimizeStackAllocatedEnum(AllocStackInst *AS) {
element = el;
assert(!payloadType || payloadType == ieda->getType());
payloadType = ieda->getType();
break;
}
case SILInstructionKind::InjectEnumAddrInst: {
auto *el = cast<InjectEnumAddrInst>(user)->getElement();
if (element && el != element)
return false;
element = el;
numInits++;
initBlock = user->getParent();
break;
}
case SILInstructionKind::UncheckedTakeEnumDataAddrInst: {
auto *el = cast<UncheckedTakeEnumDataAddrInst>(user)->getElement();
if (element && el != element)
return false;
element = el;
numTakes++;
takeBlock = user->getParent();
break;
}
default:
Expand All @@ -495,6 +498,24 @@ bool SILCombiner::optimizeStackAllocatedEnum(AllocStackInst *AS) {
if (!element || !payloadType)
return false;

// If the enum has a single init-take pair in a single block, we know that
// the enum cannot contain any valid payload outside that init-take pair.
//
// This also means that we can ignore any inject_enum_addr of another enum
// case, because this can only inject a case without a payload.
bool singleInitTakePair =
(numInits == 1 && numTakes == 1 && initBlock == takeBlock);
if (!singleInitTakePair) {
// No single init-take pair: We cannot ignore inject_enum_addrs with a
// mismatching case.
for (auto *use : AS->getUses()) {
if (auto *inject = dyn_cast<InjectEnumAddrInst>(use->getUser())) {
if (inject->getElement() != element)
return false;
}
}
}

// Second step: replace the enum alloc_stack with a payload alloc_stack.
auto *newAlloc = Builder.createAllocStack(
AS->getLoc(), payloadType, AS->getVarInfo(), AS->hasDynamicLifetime());
Expand All @@ -508,6 +529,22 @@ bool SILCombiner::optimizeStackAllocatedEnum(AllocStackInst *AS) {
eraseInstFromFunction(*user);
break;
case SILInstructionKind::DestroyAddrInst:
if (singleInitTakePair) {
// It's not possible that the enum has a payload at the destroy_addr,
// because it must have already been taken by the take of the
// single init-take pair.
// We _have_ to remove the destroy_addr, because we also remove all
// inject_enum_addrs which might inject a payload-less case before
// the destroy_addr.
eraseInstFromFunction(*user);
} else {
// The enum payload can still be valid at the destroy_addr, so we have
// to keep the destroy_addr. Just replace the enum with the payload
// (and because it's not a singleInitTakePair, we can be sure that the
// enum cannot have any other case than the payload case).
use->set(newAlloc);
}
break;
case SILInstructionKind::DeallocStackInst:
use->set(newAlloc);
break;
Expand Down
74 changes: 66 additions & 8 deletions lib/SILOptimizer/Transforms/SimplifyCFG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -916,16 +916,57 @@ static bool couldRemoveRelease(SILBasicBlock *SrcBB, SILValue SrcV,
return IsReleaseOfDest;
}

/// Returns true if any instruction in \p block may write memory.
static bool blockMayWriteMemory(SILBasicBlock *block) {
for (auto instAndIdx : llvm::enumerate(*block)) {
if (instAndIdx.value().mayWriteToMemory())
return true;
// Only look at the first 20 instructions to avoid compile time problems for
// corner cases of very large blocks without memory writes.
// 20 instructions is more than enough.
if (instAndIdx.index() > 50)
return true;
}
return false;
}

// Returns true if \p block contains an injected an enum case into \p enumAddr
// which is valid at the end of the block.
static bool hasInjectedEnumAtEndOfBlock(SILBasicBlock *block, SILValue enumAddr) {
for (auto instAndIdx : llvm::enumerate(llvm::reverse(*block))) {
SILInstruction &inst = instAndIdx.value();
if (auto *injectInst = dyn_cast<InjectEnumAddrInst>(&inst)) {
return injectInst->getOperand() == enumAddr;
}
if (inst.mayWriteToMemory())
return false;
// Only look at the first 20 instructions to avoid compile time problems for
// corner cases of very large blocks without memory writes.
// 20 instructions is more than enough.
if (instAndIdx.index() > 50)
return false;
}
return false;
}

/// tryJumpThreading - Check to see if it looks profitable to duplicate the
/// destination of an unconditional jump into the bottom of this block.
bool SimplifyCFG::tryJumpThreading(BranchInst *BI) {
auto *DestBB = BI->getDestBB();
auto *SrcBB = BI->getParent();
TermInst *destTerminator = DestBB->getTerminator();
// If the destination block ends with a return, we don't want to duplicate it.
// We want to maintain the canonical form of a single return where possible.
if (DestBB->getTerminator()->isFunctionExiting())
if (destTerminator->isFunctionExiting())
return false;

// Jump threading only makes sense if there is an argument on the branch
// (which is reacted on in the DestBB), or if this goes through a memory
// location (switch_enum_addr is the only adress-instruction which we
// currently handle).
if (BI->getArgs().empty() && !isa<SwitchEnumAddrInst>(destTerminator))
return false;

// We don't have a great cost model at the SIL level, so we don't want to
// blissly duplicate tons of code with a goal of improved performance (we'll
// leave that to LLVM). However, doing limited code duplication can lead to
Expand Down Expand Up @@ -956,11 +997,29 @@ bool SimplifyCFG::tryJumpThreading(BranchInst *BI) {
}
}

if (ThreadingBudget == 0 && isa<CondBranchInst>(DestBB->getTerminator())) {
for (auto V : BI->getArgs()) {
if (isa<IntegerLiteralInst>(V) || isa<FloatLiteralInst>(V)) {
if (ThreadingBudget == 0) {
if (isa<CondBranchInst>(destTerminator)) {
for (auto V : BI->getArgs()) {
if (isa<IntegerLiteralInst>(V) || isa<FloatLiteralInst>(V)) {
ThreadingBudget = 4;
break;
}
}
} else if (auto *SEA = dyn_cast<SwitchEnumAddrInst>(destTerminator)) {
// If the branch-block injects a certain enum case and the destination
// switches on that enum, it's worth jump threading. E.g.
//
// inject_enum_addr %enum : $*Optional<T>, #Optional.some
// ... // no memory writes here
// br DestBB
// DestBB:
// ... // no memory writes here
// switch_enum_addr %enum : $*Optional<T>, case #Optional.some ...
//
SILValue enumAddr = SEA->getOperand();
if (!blockMayWriteMemory(DestBB) &&
hasInjectedEnumAtEndOfBlock(SrcBB, enumAddr)) {
ThreadingBudget = 4;
break;
}
}
}
Expand All @@ -976,7 +1035,7 @@ bool SimplifyCFG::tryJumpThreading(BranchInst *BI) {
// control flow. Still, we make an exception for switch_enum.
bool DestIsLoopHeader = (LoopHeaders.count(DestBB) != 0);
if (DestIsLoopHeader) {
if (!isa<SwitchEnumInst>(DestBB->getTerminator()))
if (!isa<SwitchEnumInst>(destTerminator))
return false;
}

Expand Down Expand Up @@ -1286,8 +1345,7 @@ bool SimplifyCFG::simplifyBranchBlock(BranchInst *BI) {
// If this unconditional branch has BBArgs, check to see if duplicating the
// destination would allow it to be simplified. This is a simple form of jump
// threading.
if (!isVeryLargeFunction && !BI->getArgs().empty() &&
tryJumpThreading(BI))
if (!isVeryLargeFunction && tryJumpThreading(BI))
return true;

return Simplified;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,6 @@ bb6:
// CHECK: [[IEDA:%.*]] = init_enum_data_addr [[OPT]] : $*Optional<HasFoo>, #Optional.some!enumelt
// CHECK: checked_cast_addr_br take_always S1<Int8> in [[VAL]] : $*S1<Int8> to HasFoo in [[IEDA]] : $*HasFoo, bb1, bb2
// bbs...
// CHECK: switch_enum_addr [[OPT]] : $*Optional<HasFoo>, case #Optional.some!enumelt: bb4, case #Optional.none!enumelt: bb5
// bbs...
// CHECK: [[UNWRAP:%.*]] = unchecked_take_enum_data_addr [[OPT]] : $*Optional<HasFoo>, #Optional.some!enumelt
// CHECK: copy_addr [take] [[UNWRAP]] to [initialization] [[EXIS]] : $*HasFoo
// CHECK: [[OPEN:%.*]] = open_existential_addr immutable_access [[EXIS]] : $*HasFoo to $*@opened("4E16CBC0-FD9F-11E8-A311-D0817AD9F6DD") HasFoo
Expand Down
80 changes: 80 additions & 0 deletions test/SILOptimizer/enum_jump_thread.sil
Original file line number Diff line number Diff line change
Expand Up @@ -49,3 +49,83 @@ bb5(%7 : $E2):
return %7 : $E2
}

// CHECK-LABEL: sil @test_enum_addr
sil @test_enum_addr : $@convention(thin) () -> Builtin.Int32 {
bb0:
%2 = alloc_stack $E
cond_br undef, bb1, bb2

// CHECK: bb1:
// CHECK-NEXT: inject_enum_addr
// CHECK-NEXT: switch_enum_addr
bb1:
inject_enum_addr %2 : $*E, #E.A!enumelt
br bb3

// CHECK: bb2:
// CHECK-NEXT: inject_enum_addr
// CHECK-NEXT: switch_enum_addr
bb2:
inject_enum_addr %2 : $*E, #E.B!enumelt
br bb3

bb3:
switch_enum_addr %2 : $*E, case #E.A!enumelt: bb4, case #E.B!enumelt: bb5

bb4:
%10 = integer_literal $Builtin.Int32, 1
br bb6(%10 : $Builtin.Int32)

bb5:
%11 = integer_literal $Builtin.Int32, 2
br bb6(%11 : $Builtin.Int32)

bb6(%12 : $Builtin.Int32):
dealloc_stack %2 : $*E
return %12 : $Builtin.Int32
// CHECK: } // end sil function 'test_enum_addr'
}

// CHECK-LABEL: sil @dont_jumpthread_enum_addr
sil @dont_jumpthread_enum_addr : $@convention(thin) (E) -> Builtin.Int32 {
bb0(%0 : $E):
%2 = alloc_stack $E
%3 = alloc_stack $E
cond_br undef, bb1, bb2

// CHECK: bb1:
// CHECK-NEXT: inject_enum_addr
// CHECK-NEXT: store
// CHECK-NEXT: br bb3
bb1:
inject_enum_addr %2 : $*E, #E.A!enumelt
store %0 to %2 : $*E
br bb3

// CHECK: bb2:
// CHECK-NEXT: inject_enum_addr
// CHECK-NEXT: br bb3
bb2:
inject_enum_addr %3 : $*E, #E.A!enumelt
br bb3

// CHECK: bb3:
// CHECK-NEXT: switch_enum_addr
bb3:
switch_enum_addr %2 : $*E, case #E.A!enumelt: bb4, case #E.B!enumelt: bb5

bb4:
%10 = integer_literal $Builtin.Int32, 1
br bb6(%10 : $Builtin.Int32)

bb5:
%11 = integer_literal $Builtin.Int32, 2
br bb6(%11 : $Builtin.Int32)

bb6(%12 : $Builtin.Int32):
dealloc_stack %3 : $*E
dealloc_stack %2 : $*E
return %12 : $Builtin.Int32
// CHECK: } // end sil function 'dont_jumpthread_enum_addr'
}

17 changes: 17 additions & 0 deletions test/SILOptimizer/generic_loop.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// RUN: %target-swift-frontend -primary-file %s -O -sil-verify-all -module-name=test -emit-sil | %FileCheck %s

// REQUIRES: swift_stdlib_no_asserts,optimized_stdlib


// Check that we can eliminate all optionals from a loop which is iterating
// over an array of address-only elements.

// CHECK-LABEL: sil @$s4test0A18_no_optionals_usedyySayxGlF : $@convention(thin) <T> (@guaranteed Array<T>) -> () {
// CHECK-NOT: Optional
// CHECK: } // end sil function '$s4test0A18_no_optionals_usedyySayxGlF'
public func test_no_optionals_used<T>(_ items: [T]) {
for i in items {
print(i)
}
}

7 changes: 4 additions & 3 deletions test/SILOptimizer/sil_combine_concrete_existential.swift
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,11 @@ struct SS: PPP {

// The first apply has been devirtualized and inlined. The second remains unspecialized.
// CHECK-LABEL: sil @$s32sil_combine_concrete_existential37testWitnessReturnOptionalIndirectSelfyyF : $@convention(thin) () -> () {
// CHECK: [[O1:%.*]] = open_existential_addr immutable_access %{{.*}} : $*PPP to $*@opened("{{.*}}") PPP
// CHECK: switch_enum_addr %{{.*}} : $*Optional<@opened("{{.*}}") PPP>, case #Optional.some!enumelt: bb{{.*}}, case #Optional.none!enumelt: bb{{.*}}
// CHECK: [[O:%.*]] = open_existential_addr immutable_access %{{.*}} : $*PPP to $*@opened("{{.*}}") PPP
// CHECK: [[W:%.*]] = witness_method $@opened("{{.*}}") PPP, #PPP.returnsOptionalIndirect : <Self where Self : PPP> (Self) -> () -> Self?, [[O]] : $*@opened("{{.*}}") PPP : $@convention(witness_method: PPP) <τ_0_0 where τ_0_0 : PPP> (@in_guaranteed τ_0_0) -> @out Optional<τ_0_0>
// CHECK: apply [[W]]<@opened("{{.*}}") PPP>(%{{.*}}, [[O]]) : $@convention(witness_method: PPP) <τ_0_0 where τ_0_0 : PPP> (@in_guaranteed τ_0_0) -> @out Optional<τ_0_0>
// CHECK: [[O2:%.*]] = open_existential_addr immutable_access %{{.*}} : $*PPP to $*@opened("{{.*}}") PPP
// CHECK: [[W:%.*]] = witness_method $@opened("{{.*}}") PPP, #PPP.returnsOptionalIndirect : <Self where Self : PPP> (Self) -> () -> Self?, [[O1]] : $*@opened("{{.*}}") PPP : $@convention(witness_method: PPP) <τ_0_0 where τ_0_0 : PPP> (@in_guaranteed τ_0_0) -> @out Optional<τ_0_0>
// CHECK: apply [[W]]<@opened("{{.*}}") PPP>(%{{.*}}, [[O2]]) : $@convention(witness_method: PPP) <τ_0_0 where τ_0_0 : PPP> (@in_guaranteed τ_0_0) -> @out Optional<τ_0_0>
// CHECK-LABEL: } // end sil function '$s32sil_combine_concrete_existential37testWitnessReturnOptionalIndirectSelfyyF'
public func testWitnessReturnOptionalIndirectSelf() {
let p: PPP = S()
Expand Down
Loading