Skip to content

Commit 849f220

Browse files
committed
[OpenMP] Don't emit redundant zero-sized mapping nodes for overlapped structs
The handling of overlapped structure mapping in CGOpenMPRuntime.cpp can lead to redundant zero-sized mapping nodes at runtime. This patch fixes it using a combination of approaches: trivially adjacent struct members won't have a mapping node created between them, and for more complicated cases (inheritance) the physical layout of the struct/class is used to make sure that elements aren't missed. I've introduced a new class to track the state whilst iterating over the struct. This reduces a bit of redundancy in the code (accumulating CombinedInfo both during and after the loop), which I think is a bit neater. Before: omptarget --> Entry 0: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=48, Type=0x20, Name=unknown omptarget --> Entry 1: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=0, Type=0x1000000000003, Name=unknown omptarget --> Entry 2: Base=0x00007fff8d483830, Begin=0x00007fff8d483834, Size=0, Type=0x1000000000003, Name=unknown omptarget --> Entry 3: Base=0x00007fff8d483830, Begin=0x00007fff8d483838, Size=0, Type=0x1000000000003, Name=unknown omptarget --> Entry 4: Base=0x00007fff8d483830, Begin=0x00007fff8d48383c, Size=20, Type=0x1000000000003, Name=unknown omptarget --> Entry 5: Base=0x00007fff8d483830, Begin=0x00007fff8d483854, Size=0, Type=0x1000000000003, Name=unknown omptarget --> Entry 6: Base=0x00007fff8d483830, Begin=0x00007fff8d483858, Size=0, Type=0x1000000000003, Name=unknown omptarget --> Entry 7: Base=0x00007fff8d483830, Begin=0x00007fff8d48385c, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 8: Base=0x00007fff8d483830, Begin=0x00007fff8d483830, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 9: Base=0x00007fff8d483830, Begin=0x00007fff8d483834, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 10: Base=0x00007fff8d483830, Begin=0x00007fff8d483838, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 11: Base=0x00007fff8d483840, Begin=0x00005e7665275130, Size=32, Type=0x1000000000013, Name=unknown omptarget --> Entry 12: Base=0x00007fff8d483830, Begin=0x00007fff8d483850, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 13: Base=0x00007fff8d483830, Begin=0x00007fff8d483854, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 14: Base=0x00007fff8d483830, Begin=0x00007fff8d483858, Size=4, Type=0x1000000000003, Name=unknown After: omptarget --> Entry 0: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e0, Size=48, Type=0x20, Name=unknown omptarget --> Entry 1: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562ec, Size=20, Type=0x1000000000003, Name=unknown omptarget --> Entry 2: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f5630c, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 3: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e0, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 4: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e4, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 5: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f562e8, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 6: Base=0x00007fffd0f562f0, Begin=0x000058b6013fb130, Size=32, Type=0x1000000000013, Name=unknown omptarget --> Entry 7: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56300, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 8: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56304, Size=4, Type=0x1000000000003, Name=unknown omptarget --> Entry 9: Base=0x00007fffd0f562e0, Begin=0x00007fffd0f56308, Size=4, Type=0x1000000000003, Name=unknown For code: #include <cstdlib> #include <cstdio> struct S { int x; int y; int z; int *p1; int *p2; }; struct T : public S { int a; int b; int c; }; int main() { T v; v.p1 = (int*) calloc(8, sizeof(int)); v.p2 = (int*) calloc(8, sizeof(int)); #pragma omp target map(tofrom: v, v.x, v.y, v.z, v.p1[:8], v.a, v.b, v.c) { v.x++; v.y += 2; v.z += 3; v.p1[0] += 4; v.a += 7; v.b += 5; v.c += 6; } return 0; }
1 parent ad1cbc0 commit 849f220

File tree

8 files changed

+449
-72
lines changed

8 files changed

+449
-72
lines changed

clang/lib/CodeGen/CGOpenMPRuntime.cpp

Lines changed: 110 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -7080,6 +7080,110 @@ class MappableExprsHandler {
70807080
return ConstLength.getSExtValue() != 1;
70817081
}
70827082

7083+
/// A helper class to copy structures with overlapped elements, i.e. those
7084+
/// which have mappings of both "s" and "s.mem". Consecutive elements that
7085+
/// are not explicitly copied have mapping nodes synthesized for them,
7086+
/// taking care to avoid generating zero-sized copies.
7087+
class CopyOverlappedEntryGaps {
7088+
CodeGenFunction &CGF;
7089+
MapCombinedInfoTy &CombinedInfo;
7090+
OpenMPOffloadMappingFlags Flags;
7091+
const ValueDecl *MapDecl;
7092+
const Expr *MapExpr;
7093+
Address BP;
7094+
bool IsNonContiguous;
7095+
uint64_t DimSize;
7096+
// These elements track the position as the struct is iterated over
7097+
// (in order of increasing element address).
7098+
const RecordDecl *LastParent = nullptr;
7099+
uint64_t Cursor = 0;
7100+
unsigned LastIndex = -1u;
7101+
Address LB;
7102+
7103+
public:
7104+
CopyOverlappedEntryGaps(CodeGenFunction &_CGF,
7105+
MapCombinedInfoTy &_CombinedInfo,
7106+
OpenMPOffloadMappingFlags _Flags,
7107+
const ValueDecl *_MapDecl, const Expr *_MapExpr,
7108+
Address _BP, Address _LB, bool _IsNonContiguous,
7109+
uint64_t _DimSize)
7110+
: CGF(_CGF), CombinedInfo(_CombinedInfo), Flags(_Flags), MapDecl(_MapDecl),
7111+
MapExpr(_MapExpr), BP(_BP), LB(_LB),
7112+
IsNonContiguous(_IsNonContiguous), DimSize(_DimSize) { }
7113+
7114+
void ProcessField(const OMPClauseMappableExprCommon::MappableComponent &MC,
7115+
const FieldDecl *FD,
7116+
llvm::function_ref<LValue(CodeGenFunction &, const MemberExpr *)> EmitMemberExprBase) {
7117+
const RecordDecl *RD = FD->getParent();
7118+
const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(RD);
7119+
uint64_t FieldOffset = RL.getFieldOffset(FD->getFieldIndex());
7120+
uint64_t FieldSize = CGF.getContext().getTypeSize(FD->getType().getCanonicalType());
7121+
Address ComponentLB = Address::invalid();
7122+
7123+
if (FD->getType()->isLValueReferenceType()) {
7124+
const auto *ME =
7125+
cast<MemberExpr>(MC.getAssociatedExpression());
7126+
LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7127+
ComponentLB =
7128+
CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7129+
.getAddress();
7130+
} else {
7131+
ComponentLB =
7132+
CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7133+
.getAddress();
7134+
}
7135+
7136+
if (LastParent == nullptr) {
7137+
LastParent = RD;
7138+
}
7139+
if (FD->getParent() == LastParent) {
7140+
if (FD->getFieldIndex() != LastIndex + 1)
7141+
CopyUntilField(FD, ComponentLB);
7142+
} else {
7143+
LastParent = FD->getParent();
7144+
if (((int64_t)FieldOffset - (int64_t)Cursor) > 0)
7145+
CopyUntilField(FD, ComponentLB);
7146+
}
7147+
Cursor = FieldOffset + FieldSize;
7148+
LastIndex = FD->getFieldIndex();
7149+
LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
7150+
}
7151+
7152+
void CopyUntilField(const FieldDecl *FD, Address ComponentLB) {
7153+
llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7154+
llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7155+
llvm::Value *Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7156+
LBPtr);
7157+
CopySizedChunk(LBPtr, Size);
7158+
}
7159+
7160+
void CopyUntilEnd(Address HB) {
7161+
if (LastParent) {
7162+
const ASTRecordLayout &RL = CGF.getContext().getASTRecordLayout(LastParent);
7163+
if ((uint64_t)CGF.getContext().toBits(RL.getSize()) <= Cursor)
7164+
return;
7165+
}
7166+
llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7167+
llvm::Value *Size = CGF.Builder.CreatePtrDiff(
7168+
CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7169+
LBPtr);
7170+
CopySizedChunk(LBPtr, Size);
7171+
}
7172+
7173+
void CopySizedChunk(llvm::Value *Base, llvm::Value *Size) {
7174+
CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7175+
CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7176+
CombinedInfo.DevicePtrDecls.push_back(nullptr);
7177+
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7178+
CombinedInfo.Pointers.push_back(Base);
7179+
CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7180+
Size, CGF.Int64Ty, /*isSigned=*/true));
7181+
CombinedInfo.Types.push_back(Flags);
7182+
CombinedInfo.Mappers.push_back(nullptr);
7183+
CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize : 1);
7184+
}
7185+
};
7186+
70837187
/// Generate the base pointers, section pointers, sizes, map type bits, and
70847188
/// user-defined mappers (all included in \a CombinedInfo) for the provided
70857189
/// map type, map or motion modifiers, and expression components.
@@ -7570,63 +7674,22 @@ class MappableExprsHandler {
75707674
getMapTypeBits(MapType, MapModifiers, MotionModifiers, IsImplicit,
75717675
/*AddPtrFlag=*/false,
75727676
/*AddIsTargetParamFlag=*/false, IsNonContiguous);
7573-
llvm::Value *Size = nullptr;
7677+
CopyOverlappedEntryGaps CopyGaps(CGF, CombinedInfo, Flags, MapDecl,
7678+
MapExpr, BP, LB, IsNonContiguous,
7679+
DimSize);
75747680
// Do bitcopy of all non-overlapped structure elements.
75757681
for (OMPClauseMappableExprCommon::MappableExprComponentListRef
75767682
Component : OverlappedElements) {
7577-
Address ComponentLB = Address::invalid();
75787683
for (const OMPClauseMappableExprCommon::MappableComponent &MC :
75797684
Component) {
75807685
if (const ValueDecl *VD = MC.getAssociatedDeclaration()) {
7581-
const auto *FD = dyn_cast<FieldDecl>(VD);
7582-
if (FD && FD->getType()->isLValueReferenceType()) {
7583-
const auto *ME =
7584-
cast<MemberExpr>(MC.getAssociatedExpression());
7585-
LValue BaseLVal = EmitMemberExprBase(CGF, ME);
7586-
ComponentLB =
7587-
CGF.EmitLValueForFieldInitialization(BaseLVal, FD)
7588-
.getAddress();
7589-
} else {
7590-
ComponentLB =
7591-
CGF.EmitOMPSharedLValue(MC.getAssociatedExpression())
7592-
.getAddress();
7686+
if (const auto *FD = dyn_cast<FieldDecl>(VD)) {
7687+
CopyGaps.ProcessField(MC, FD, EmitMemberExprBase);
75937688
}
7594-
llvm::Value *ComponentLBPtr = ComponentLB.emitRawPointer(CGF);
7595-
llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7596-
Size = CGF.Builder.CreatePtrDiff(CGF.Int8Ty, ComponentLBPtr,
7597-
LBPtr);
7598-
break;
75997689
}
76007690
}
7601-
assert(Size && "Failed to determine structure size");
7602-
CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7603-
CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7604-
CombinedInfo.DevicePtrDecls.push_back(nullptr);
7605-
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7606-
CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7607-
CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
7608-
Size, CGF.Int64Ty, /*isSigned=*/true));
7609-
CombinedInfo.Types.push_back(Flags);
7610-
CombinedInfo.Mappers.push_back(nullptr);
7611-
CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7612-
: 1);
7613-
LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
76147691
}
7615-
CombinedInfo.Exprs.emplace_back(MapDecl, MapExpr);
7616-
CombinedInfo.BasePointers.push_back(BP.emitRawPointer(CGF));
7617-
CombinedInfo.DevicePtrDecls.push_back(nullptr);
7618-
CombinedInfo.DevicePointers.push_back(DeviceInfoTy::None);
7619-
CombinedInfo.Pointers.push_back(LB.emitRawPointer(CGF));
7620-
llvm::Value *LBPtr = LB.emitRawPointer(CGF);
7621-
Size = CGF.Builder.CreatePtrDiff(
7622-
CGF.Int8Ty, CGF.Builder.CreateConstGEP(HB, 1).emitRawPointer(CGF),
7623-
LBPtr);
7624-
CombinedInfo.Sizes.push_back(
7625-
CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
7626-
CombinedInfo.Types.push_back(Flags);
7627-
CombinedInfo.Mappers.push_back(nullptr);
7628-
CombinedInfo.NonContigInfo.Dims.push_back(IsNonContiguous ? DimSize
7629-
: 1);
7692+
CopyGaps.CopyUntilEnd(HB);
76307693
break;
76317694
}
76327695
llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());

clang/test/OpenMP/copy-gaps-1.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
2+
// expected-no-diagnostics
3+
4+
struct S {
5+
int x;
6+
int y;
7+
int z;
8+
int *p1;
9+
int *p2;
10+
};
11+
12+
struct T : public S {
13+
int a;
14+
int b;
15+
int c;
16+
};
17+
18+
int main() {
19+
T v;
20+
21+
#pragma omp target map(tofrom: v, v.x, v.y, v.z, v.p1[:8], v.a, v.b, v.c)
22+
{
23+
v.x++;
24+
v.y += 2;
25+
v.z += 3;
26+
v.p1[0] += 4;
27+
v.a += 7;
28+
v.b += 5;
29+
v.c += 6;
30+
}
31+
32+
return 0;
33+
}
34+
35+
// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [10 x i64] [i64 0, i64 0, i64 0, i64 4, i64 4, i64 4, i64 32, i64 4, i64 4, i64 4]
36+
// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [10 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000013]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
37+
38+
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
39+
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
40+
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
41+
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [10 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
42+
43+
// Check for filling of four non-constant size elements here: the whole struct
44+
// size, the (padded) region covering p1 & p2, and the padding at the end of
45+
// struct T.
46+
47+
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 0
48+
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
49+
// CHECK-DAG: [[P1P2:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 1
50+
// CHECK-DAG: store i64 %{{.+}}, ptr [[P1P2]], align 8
51+
// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [10 x i64], ptr [[SIZES]], i32 0, i32 2
52+
// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8

clang/test/OpenMP/copy-gaps-2.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
2+
// expected-no-diagnostics
3+
4+
struct S {
5+
int x;
6+
int y;
7+
int z;
8+
};
9+
10+
struct M : public S {
11+
int mid;
12+
};
13+
14+
struct T : public M {
15+
int a;
16+
int b;
17+
int c;
18+
};
19+
20+
int main() {
21+
T v;
22+
23+
#pragma omp target map(tofrom: v, v.y, v.z, v.a)
24+
{
25+
v.y++;
26+
v.z += 2;
27+
v.a += 3;
28+
v.mid += 5;
29+
}
30+
31+
return 0;
32+
}
33+
34+
// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 4, i64 4]
35+
// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
36+
37+
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
38+
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
39+
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
40+
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
41+
42+
// Fill four non-constant size elements here: the whole struct size, the region
43+
// covering v.x, the region covering v.mid and the region covering v.b and v.c.
44+
45+
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0
46+
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
47+
// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1
48+
// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
49+
// CHECK-DAG: [[MID:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2
50+
// CHECK-DAG: store i64 %{{.+}}, ptr [[MID]], align 8
51+
// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3
52+
// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8

clang/test/OpenMP/copy-gaps-3.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
2+
// expected-no-diagnostics
3+
4+
struct S {
5+
int x;
6+
int y;
7+
int z;
8+
};
9+
10+
struct T : public S {
11+
int a;
12+
int b;
13+
int c;
14+
};
15+
16+
int main() {
17+
T v;
18+
19+
// This one should have no gap between v.z & v.a.
20+
#pragma omp target map(tofrom: v, v.y, v.z, v.a)
21+
{
22+
v.y++;
23+
v.z += 2;
24+
v.a += 3;
25+
}
26+
27+
return 0;
28+
}
29+
30+
// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [6 x i64] [i64 0, i64 0, i64 0, i64 4, i64 4, i64 4]
31+
// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [6 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
32+
33+
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
34+
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
35+
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
36+
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [6 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
37+
38+
// Fill three non-constant size elements here: the whole struct size, the region
39+
// covering v.x, and the region covering v.b and v.c.
40+
41+
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 0
42+
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
43+
// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 1
44+
// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
45+
// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [6 x i64], ptr [[SIZES]], i32 0, i32 2
46+
// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8

clang/test/OpenMP/copy-gaps-4.cpp

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
// RUN: %clang_cc1 -verify -triple x86_64-pc-linux-gnu -fopenmp-targets=amdgcn-amd-amdhsa -fopenmp -emit-llvm %s -o - | FileCheck %s
2+
// expected-no-diagnostics
3+
4+
struct S {
5+
int x;
6+
int y;
7+
char z; // Hidden padding after here...
8+
};
9+
10+
struct T : public S {
11+
int a;
12+
int b;
13+
int c;
14+
};
15+
16+
int main() {
17+
T v;
18+
19+
#pragma omp target map(tofrom: v, v.y, v.z, v.a)
20+
{
21+
v.y++;
22+
v.z += 2;
23+
v.a += 3;
24+
}
25+
26+
return 0;
27+
}
28+
29+
// CHECK: [[CSTSZ:@.+]] = private {{.*}}constant [7 x i64] [i64 0, i64 0, i64 0, i64 0, i64 4, i64 1, i64 4]
30+
// CHECK: [[CSTTY:@.+]] = private {{.*}}constant [7 x i64] [i64 [[#0x20]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]], i64 [[#0x1000000000003]]]
31+
32+
// CHECK-DAG: call i32 @__tgt_target_kernel(ptr @{{.+}}, i64 -1, i32 -1, i32 0, ptr @.{{.+}}.region_id, ptr [[ARGS:%.+]])
33+
// CHECK-DAG: [[KSIZE:%.+]] = getelementptr inbounds {{.+}}[[ARGS]], i32 0, i32 4
34+
// CHECK-DAG: store ptr [[SZBASE:%.+]], ptr [[KSIZE]], align 8
35+
// CHECK-DAG: [[SZBASE]] = getelementptr inbounds [7 x i64], ptr [[SIZES:%[^,]*]], i32 0, i32 0
36+
37+
// Fill four non-constant size elements here: the whole struct size, the region
38+
// covering v.x, the region covering padding after v.z and the region covering
39+
// v.b and v.c.
40+
41+
// CHECK-DAG: [[STR:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 0
42+
// CHECK-DAG: store i64 %{{.+}}, ptr [[STR]], align 8
43+
// CHECK-DAG: [[X:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 1
44+
// CHECK-DAG: store i64 %{{.+}}, ptr [[X]], align 8
45+
// CHECK-DAG: [[PAD:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 2
46+
// CHECK-DAG: store i64 %{{.+}}, ptr [[PAD]], align 8
47+
// CHECK-DAG: [[BC:%.+]] = getelementptr inbounds [7 x i64], ptr [[SIZES]], i32 0, i32 3
48+
// CHECK-DAG: store i64 %{{.+}}, ptr [[BC]], align 8

0 commit comments

Comments
 (0)