Skip to content

Commit d431acf

Browse files
authored
Merge branch 'main' into cir/bit-op-folder
2 parents b22e90b + c1545b6 commit d431acf

File tree

20 files changed

+839
-1138
lines changed

20 files changed

+839
-1138
lines changed

flang/lib/Optimizer/OpenMP/MapInfoFinalization.cpp

Lines changed: 31 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -137,32 +137,41 @@ class MapInfoFinalizationPass
137137
!fir::factory::isOptionalArgument(descriptor.getDefiningOp()))
138138
return descriptor;
139139

140-
mlir::Value &slot = localBoxAllocas[descriptor.getDefiningOp()];
141-
if (slot) {
142-
return slot;
140+
mlir::Value &alloca = localBoxAllocas[descriptor.getDefiningOp()];
141+
mlir::Location loc = boxMap->getLoc();
142+
143+
if (!alloca) {
144+
// The fir::BoxOffsetOp only works with !fir.ref<!fir.box<...>> types, as
145+
// allowing it to access non-reference box operations can cause some
146+
// problematic SSA IR. However, in the case of assumed shape's the type
147+
// is not a !fir.ref, in these cases to retrieve the appropriate
148+
// !fir.ref<!fir.box<...>> to access the data we need to map we must
149+
// perform an alloca and then store to it and retrieve the data from the
150+
// new alloca.
151+
mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint();
152+
mlir::Block *allocaBlock = builder.getAllocaBlock();
153+
assert(allocaBlock && "No alloca block found for this top level op");
154+
builder.setInsertionPointToStart(allocaBlock);
155+
156+
mlir::Type allocaType = descriptor.getType();
157+
if (fir::isBoxAddress(allocaType))
158+
allocaType = fir::unwrapRefType(allocaType);
159+
alloca = fir::AllocaOp::create(builder, loc, allocaType);
160+
builder.restoreInsertionPoint(insPt);
143161
}
144162

145-
// The fir::BoxOffsetOp only works with !fir.ref<!fir.box<...>> types, as
146-
// allowing it to access non-reference box operations can cause some
147-
// problematic SSA IR. However, in the case of assumed shape's the type
148-
// is not a !fir.ref, in these cases to retrieve the appropriate
149-
// !fir.ref<!fir.box<...>> to access the data we need to map we must
150-
// perform an alloca and then store to it and retrieve the data from the new
151-
// alloca.
152-
mlir::OpBuilder::InsertPoint insPt = builder.saveInsertionPoint();
153-
mlir::Block *allocaBlock = builder.getAllocaBlock();
154-
mlir::Location loc = boxMap->getLoc();
155-
assert(allocaBlock && "No alloca block found for this top level op");
156-
builder.setInsertionPointToStart(allocaBlock);
157-
158-
mlir::Type allocaType = descriptor.getType();
159-
if (fir::isBoxAddress(allocaType))
160-
allocaType = fir::unwrapRefType(allocaType);
161-
auto alloca = fir::AllocaOp::create(builder, loc, allocaType);
162-
builder.restoreInsertionPoint(insPt);
163163
// We should only emit a store if the passed in data is present, it is
164164
// possible a user passes in no argument to an optional parameter, in which
165165
// case we cannot store or we'll segfault on the emitted memcpy.
166+
// TODO: We currently emit a present -> load/store every time we use a
167+
// mapped value that requires a local allocation, this isn't the most
168+
// efficient, although, it is more correct in a lot of situations. One
169+
// such situation is emitting a this series of instructions in separate
170+
// segments of a branch (e.g. two target regions in separate else/if branch
171+
// mapping the same function argument), however, it would be nice to be able
172+
// to optimize these situations e.g. raising the load/store out of the
173+
// branch if possible. But perhaps this is best left to lower level
174+
// optimisation passes.
166175
auto isPresent =
167176
fir::IsPresentOp::create(builder, loc, builder.getI1Type(), descriptor);
168177
builder.genIfOp(loc, {}, isPresent, false)
@@ -171,7 +180,7 @@ class MapInfoFinalizationPass
171180
fir::StoreOp::create(builder, loc, descriptor, alloca);
172181
})
173182
.end();
174-
return slot = alloca;
183+
return alloca;
175184
}
176185

177186
/// Function that generates a FIR operation accessing the descriptor's

flang/lib/Parser/openmp-parsers.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,11 +1222,14 @@ struct StrictlyStructuredBlockParser {
12221222
using resultType = Block;
12231223

12241224
std::optional<resultType> Parse(ParseState &state) const {
1225-
if (auto epc{Parser<ExecutionPartConstruct>{}.Parse(state)}) {
1226-
if (IsFortranBlockConstruct(*epc)) {
1227-
Block block;
1228-
block.emplace_back(std::move(*epc));
1229-
return std::move(block);
1225+
// Detect BLOCK construct without parsing the entire thing.
1226+
if (lookAhead(skipStuffBeforeStatement >> "BLOCK"_tok).Parse(state)) {
1227+
if (auto epc{Parser<ExecutionPartConstruct>{}.Parse(state)}) {
1228+
if (IsFortranBlockConstruct(*epc)) {
1229+
Block block;
1230+
block.emplace_back(std::move(*epc));
1231+
return std::move(block);
1232+
}
12301233
}
12311234
}
12321235
return std::nullopt;
@@ -1237,6 +1240,10 @@ struct LooselyStructuredBlockParser {
12371240
using resultType = Block;
12381241

12391242
std::optional<resultType> Parse(ParseState &state) const {
1243+
// Detect BLOCK construct without parsing the entire thing.
1244+
if (lookAhead(skipStuffBeforeStatement >> "BLOCK"_tok).Parse(state)) {
1245+
return std::nullopt;
1246+
}
12401247
Block body;
12411248
if (auto epc{attempt(Parser<ExecutionPartConstruct>{}).Parse(state)}) {
12421249
if (!IsFortranBlockConstruct(*epc)) {
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
!RUN: %flang_fc1 -emit-hlfir -fopenmp %s -o - | FileCheck %s
2+
3+
module mod
4+
contains
5+
subroutine foo(dt, switch)
6+
implicit none
7+
real(4), dimension(:), intent(inout) :: dt
8+
logical, intent(in) :: switch
9+
integer :: dim, idx
10+
11+
if (switch) then
12+
!$omp target teams distribute parallel do
13+
do idx = 1, 100
14+
dt(idx) = 20
15+
end do
16+
else
17+
!$omp target teams distribute parallel do
18+
do idx = 1, 100
19+
dt(idx) = 30
20+
end do
21+
end if
22+
end subroutine foo
23+
end module
24+
25+
! CHECK-LABEL: func.func @{{.*}}(
26+
! CHECK-SAME: %[[ARG0:.*]]: !fir.box<!fir.array<?xf32>> {fir.bindc_name = "dt"},
27+
! CHECK: %[[VAL_0:.*]] = fir.alloca !fir.box<!fir.array<?xf32>>
28+
! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[ARG0]] dummy_scope {{.*}}
29+
! CHECK: fir.if %{{.*}} {
30+
! CHECK: %[[VAL_2:.*]] = fir.is_present %[[VAL_1]]#1 : (!fir.box<!fir.array<?xf32>>) -> i1
31+
! CHECK: fir.if %[[VAL_2]] {
32+
! CHECK: fir.store %[[VAL_1]]#1 to %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
33+
! CHECK: }
34+
! CHECK: %[[VAL_3:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref<!fir.box<!fir.array<?xf32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>
35+
! CHECK: %[[VAL_4:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, f32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_3]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>> {name = ""}
36+
! CHECK: %[[VAL_5:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.box<!fir.array<?xf32>>) map_clauses(implicit, to) capture(ByRef) members(%[[VAL_4]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) -> !fir.ref<!fir.array<?xf32>> {name = "dt"}
37+
! CHECK: omp.target host_eval({{.*}}) map_entries({{.*}}%[[VAL_5]] -> {{.*}}, %[[VAL_4]] -> {{.*}} : {{.*}}) {
38+
! CHECK: } else {
39+
! CHECK: %[[VAL_6:.*]] = fir.is_present %[[VAL_1]]#1 : (!fir.box<!fir.array<?xf32>>) -> i1
40+
! CHECK: fir.if %[[VAL_6]] {
41+
! CHECK: fir.store %[[VAL_1]]#1 to %[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>
42+
! CHECK: }
43+
! CHECK: %[[VAL_7:.*]] = fir.box_offset %[[VAL_0]] base_addr : (!fir.ref<!fir.box<!fir.array<?xf32>>>) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>
44+
! CHECK: %[[VAL_8:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, f32) map_clauses(implicit, tofrom) capture(ByRef) var_ptr_ptr(%[[VAL_7]] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) bounds(%{{.*}}) -> !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>> {name = ""}
45+
! CHECK: %[[VAL_9:.*]] = omp.map.info var_ptr(%[[VAL_0]] : !fir.ref<!fir.box<!fir.array<?xf32>>>, !fir.box<!fir.array<?xf32>>) map_clauses(implicit, to) capture(ByRef) members(%[[VAL_8]] : [0] : !fir.llvm_ptr<!fir.ref<!fir.array<?xf32>>>) -> !fir.ref<!fir.array<?xf32>> {name = "dt"}
46+
! CHECK: omp.target host_eval({{.*}}) map_entries({{.*}}, %[[VAL_9]] ->{{.*}}, %[[VAL_8]] -> {{.*}} : {{.*}}) {

llvm/lib/CodeGen/BranchFolding.cpp

Lines changed: 44 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2083,22 +2083,59 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
20832083
if (TBB == FBB) {
20842084
MBB->splice(Loc, TBB, TBB->begin(), TIB);
20852085
} else {
2086+
// Merge the debug locations, and hoist and kill the debug instructions from
2087+
// both branches. FIXME: We could probably try harder to preserve some debug
2088+
// instructions (but at least this isn't producing wrong locations).
2089+
MachineInstrBuilder MIRBuilder(*MBB->getParent(), Loc);
2090+
auto HoistAndKillDbgInstr = [MBB, Loc](MachineBasicBlock::iterator DI) {
2091+
assert(DI->isDebugInstr() && "Expected a debug instruction");
2092+
if (DI->isDebugRef()) {
2093+
const TargetInstrInfo *TII =
2094+
MBB->getParent()->getSubtarget().getInstrInfo();
2095+
const MCInstrDesc &DBGV = TII->get(TargetOpcode::DBG_VALUE);
2096+
DI = BuildMI(*MBB->getParent(), DI->getDebugLoc(), DBGV, false, 0,
2097+
DI->getDebugVariable(), DI->getDebugExpression());
2098+
MBB->insert(Loc, &*DI);
2099+
return;
2100+
}
2101+
// Deleting a DBG_PHI results in an undef at the referenced DBG_INSTR_REF.
2102+
if (DI->isDebugPHI()) {
2103+
DI->eraseFromParent();
2104+
return;
2105+
}
2106+
2107+
DI->setDebugValueUndef();
2108+
DI->moveBefore(&*Loc);
2109+
};
2110+
20862111
// TIB and FIB point to the end of the regions to hoist/merge in TBB and
20872112
// FBB.
20882113
MachineBasicBlock::iterator FE = FIB;
20892114
MachineBasicBlock::iterator FI = FBB->begin();
20902115
for (MachineBasicBlock::iterator TI :
20912116
make_early_inc_range(make_range(TBB->begin(), TIB))) {
2092-
// Move debug instructions and pseudo probes without modifying them.
2093-
// FIXME: This is the wrong thing to do for debug locations, which
2094-
// should at least be killed (and hoisted from BOTH blocks).
2095-
if (TI->isDebugOrPseudoInstr()) {
2096-
TI->moveBefore(&*Loc);
2117+
// Hoist and kill debug instructions from FBB. After this loop FI points
2118+
// to the next non-debug instruction to hoist (checked in assert after the
2119+
// TBB debug instruction handling code).
2120+
while (FI->isDebugInstr()) {
2121+
assert(FI != FE && "Unexpected end of FBB range");
2122+
MachineBasicBlock::iterator FINext = std::next(FI);
2123+
HoistAndKillDbgInstr(FI);
2124+
FI = FINext;
2125+
}
2126+
2127+
// Kill debug instructions before moving.
2128+
if (TI->isDebugInstr()) {
2129+
HoistAndKillDbgInstr(TI);
20972130
continue;
20982131
}
20992132

2100-
// Get the next non-meta instruction in FBB.
2133+
// If FI is a debug instruction, skip forward to the next non-debug
2134+
// instruction.
21012135
FI = skipDebugInstructionsForward(FI, FE, false);
2136+
// Pseudo probes are excluded from the range when identifying foldable
2137+
// instructions, so we don't expect to see one now.
2138+
assert(!TI->isPseudoProbe() && "Unexpected pseudo probe in range");
21022139
// NOTE: The loop above checks CheckKillDead but we can't do that here as
21032140
// it modifies some kill markers after the check.
21042141
assert(TI->isIdenticalTo(*FI, MachineInstr::CheckDefs) &&
@@ -2111,6 +2148,7 @@ bool BranchFolder::HoistCommonCodeInSuccs(MachineBasicBlock *MBB) {
21112148
++FI;
21122149
}
21132150
}
2151+
21142152
FBB->erase(FBB->begin(), FIB);
21152153

21162154
if (UpdateLiveIns)

0 commit comments

Comments
 (0)