Skip to content

[Offload] Define additional device info properties #152533

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions offload/liboffload/API/APIDefs.td
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,10 @@ class Enum : APIObject {
// all Etor values must be TaggedEtor records
bit is_typed = 0;

// This refers to whether the enumerator is used to name bits of a bit field,
// where consecutive values are bit-shifted rather than incremented.
bit is_bit_field = 0;

list<Etor> etors = [];
}

Expand Down
34 changes: 33 additions & 1 deletion offload/liboffload/API/Device.td
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,49 @@ def DeviceInfo : Enum {
let name = "ol_device_info_t";
let desc = "Supported device info.";
let is_typed = 1;
let etors =[
list<TaggedEtor> basic_etors =[
TaggedEtor<"TYPE", "ol_device_type_t", "type of the device">,
TaggedEtor<"PLATFORM", "ol_platform_handle_t", "the platform associated with the device">,
TaggedEtor<"NAME", "char[]", "Device name">,
TaggedEtor<"VENDOR", "char[]", "Device vendor">,
TaggedEtor<"DRIVER_VERSION", "char[]", "Driver version">,
TaggedEtor<"MAX_WORK_GROUP_SIZE", "uint32_t", "Maximum total work group size in work items">,
TaggedEtor<"MAX_WORK_GROUP_SIZE_PER_DIMENSION", "ol_dimensions_t", "Maximum work group size in each dimension">,
TaggedEtor<"VENDOR_ID", "uint32_t", "A unique vendor device identifier assigned by PCI-SIG">,
TaggedEtor<"NUM_COMPUTE_UNITS", "uint32_t", "The number of parallel compute units available to the device">,
TaggedEtor<"MAX_CLOCK_FREQUENCY", "uint32_t", "The maximum configured clock frequency of this device in MHz">,
TaggedEtor<"MEMORY_CLOCK_RATE", "uint32_t", "Memory clock frequency in MHz">,
TaggedEtor<"ADDRESS_BITS", "uint32_t", "Number of bits used to represent an address in device memory">,
TaggedEtor<"MAX_MEM_ALLOC_SIZE", "uint64_t", "The maximum size of memory object allocation in bytes">,
TaggedEtor<"GLOBAL_MEM_SIZE", "uint64_t", "The size of global device memory in bytes">,
];
list<TaggedEtor> fp_configs = !foreach(type, ["Single", "Double", "Half"], TaggedEtor<type # "_FP_CONFIG", "ol_device_fp_capability_flags_t", type # " precision floating point capability">);
list<TaggedEtor> native_vec_widths = !foreach(type, ["char","short","int","long","float","double","half"], TaggedEtor<"NATIVE_VECTOR_WIDTH_" # type, "uint32_t", "Native vector width for " # type>);
let etors = !listconcat(basic_etors, fp_configs, native_vec_widths);
}

def : Enum {
let name = "ol_device_fp_capability_flag_t";
let desc = "Device floating-point capability flags";
let is_bit_field = 1;
let etors =[
Etor<"CORRECTLY_ROUNDED_DIVIDE_SQRT", "Support correctly rounded divide and sqrt">,
Etor<"ROUND_TO_NEAREST", "Support round to nearest">,
Etor<"ROUND_TO_ZERO", "Support round to zero">,
Etor<"ROUND_TO_INF", "Support round to infinity">,
Etor<"INF_NAN", "Support INF to NAN">,
Etor<"DENORM", "Support denorm">,
Etor<"FMA", "Support fused multiply-add">,
Etor<"SOFT_FLOAT", "Basic floating point operations implemented in software">,
];
}

def : Typedef {
let name = "ol_device_fp_capability_flags_t";
let desc = "Device floating-point capability flags";
let value = "uint32_t";
}

def : FptrTypedef {
let name = "ol_device_iterate_cb_t";
let desc = "User-provided function to be used with `olIterateDevices`";
Expand Down
93 changes: 89 additions & 4 deletions offload/liboffload/src/OffloadImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -302,10 +302,50 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
};

// These are not implemented by the plugin interface
if (PropName == OL_DEVICE_INFO_PLATFORM)
switch (PropName) {
case OL_DEVICE_INFO_PLATFORM:
return Info.write<void *>(Device->Platform);
if (PropName == OL_DEVICE_INFO_TYPE)

case OL_DEVICE_INFO_TYPE:
return Info.write<ol_device_type_t>(OL_DEVICE_TYPE_GPU);

case OL_DEVICE_INFO_SINGLE_FP_CONFIG:
case OL_DEVICE_INFO_DOUBLE_FP_CONFIG: {
ol_device_fp_capability_flags_t flags{0};
flags |= OL_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT |
OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
OL_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
OL_DEVICE_FP_CAPABILITY_FLAG_DENORM |
OL_DEVICE_FP_CAPABILITY_FLAG_FMA;
return Info.write(flags);
}

case OL_DEVICE_INFO_HALF_FP_CONFIG:
return Info.write<ol_device_fp_capability_flags_t>(0);

case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR:
case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT:
case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT:
case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG:
case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT:
case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE:
return Info.write<uint32_t>(1);

case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF:
return Info.write<uint32_t>(0);

// None of the existing plugins specify a limit on a single allocation,
// so return the global memory size instead
case OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE:
PropName = OL_DEVICE_INFO_GLOBAL_MEM_SIZE;
break;

default:
break;
}

if (PropName >= OL_DEVICE_INFO_LAST)
return createOffloadError(ErrorCode::INVALID_ENUMERATION,
"getDeviceInfo enum '%i' is invalid", PropName);
Expand All @@ -316,6 +356,7 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
"plugin did not provide a response for this information");
auto Entry = *EntryOpt;

// Retrieve properties from the plugin interface
switch (PropName) {
case OL_DEVICE_INFO_NAME:
case OL_DEVICE_INFO_VENDOR:
Expand All @@ -327,7 +368,20 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
return Info.writeString(std::get<std::string>(Entry->Value).c_str());
}

case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE: {
case OL_DEVICE_INFO_GLOBAL_MEM_SIZE: {
// Uint64 values
if (!std::holds_alternative<uint64_t>(Entry->Value))
return makeError(ErrorCode::BACKEND_FAILURE,
"plugin returned incorrect type");
return Info.write(std::get<uint64_t>(Entry->Value));
}

case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE:
case OL_DEVICE_INFO_VENDOR_ID:
case OL_DEVICE_INFO_NUM_COMPUTE_UNITS:
case OL_DEVICE_INFO_ADDRESS_BITS:
case OL_DEVICE_INFO_MAX_CLOCK_FREQUENCY:
case OL_DEVICE_INFO_MEMORY_CLOCK_RATE: {
// Uint32 values
if (!std::holds_alternative<uint64_t>(Entry->Value))
return makeError(ErrorCode::BACKEND_FAILURE,
Expand Down Expand Up @@ -389,9 +443,40 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
case OL_DEVICE_INFO_DRIVER_VERSION:
return Info.writeString(LLVM_VERSION_STRING);
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE:
return Info.write<uint64_t>(1);
return Info.write<uint32_t>(1);
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION:
return Info.write<ol_dimensions_t>(ol_dimensions_t{1, 1, 1});
case OL_DEVICE_INFO_VENDOR_ID:
return Info.write<uint32_t>(0);
case OL_DEVICE_INFO_NUM_COMPUTE_UNITS:
return Info.write<uint32_t>(1);
case OL_DEVICE_INFO_SINGLE_FP_CONFIG:
case OL_DEVICE_INFO_DOUBLE_FP_CONFIG:
return Info.write<ol_device_fp_capability_flags_t>(
OL_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT |
OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
OL_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
OL_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
OL_DEVICE_FP_CAPABILITY_FLAG_DENORM | OL_DEVICE_FP_CAPABILITY_FLAG_FMA);
case OL_DEVICE_INFO_HALF_FP_CONFIG:
return Info.write<ol_device_fp_capability_flags_t>(0);
case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR:
case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_SHORT:
case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_INT:
case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_LONG:
case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_FLOAT:
case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_DOUBLE:
return Info.write<uint32_t>(1);
case OL_DEVICE_INFO_NATIVE_VECTOR_WIDTH_HALF:
return Info.write<uint32_t>(0);
case OL_DEVICE_INFO_MAX_CLOCK_FREQUENCY:
case OL_DEVICE_INFO_MEMORY_CLOCK_RATE:
case OL_DEVICE_INFO_ADDRESS_BITS:
return Info.write<uint32_t>(std::numeric_limits<uintptr_t>::digits);
case OL_DEVICE_INFO_MAX_MEM_ALLOC_SIZE:
case OL_DEVICE_INFO_GLOBAL_MEM_SIZE:
return Info.write<uint64_t>(0);
default:
return createOffloadError(ErrorCode::INVALID_ENUMERATION,
"getDeviceInfo enum '%i' is invalid", PropName);
Expand Down
6 changes: 6 additions & 0 deletions offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,16 @@ typedef enum {
HSA_ISA_INFO_NAME = 1
} hsa_isa_info_t;

typedef enum {
HSA_MACHINE_MODEL_SMALL = 0,
HSA_MACHINE_MODEL_LARGE = 1
} hsa_machine_model_t;

typedef enum {
HSA_AGENT_INFO_NAME = 0,
HSA_AGENT_INFO_VENDOR_NAME = 1,
HSA_AGENT_INFO_FEATURE = 2,
HSA_AGENT_INFO_MACHINE_MODEL = 3,
HSA_AGENT_INFO_PROFILE = 4,
HSA_AGENT_INFO_WAVEFRONT_SIZE = 6,
HSA_AGENT_INFO_WORKGROUP_MAX_DIM = 7,
Expand Down
1 change: 1 addition & 0 deletions offload/plugins-nextgen/amdgpu/dynamic_hsa/hsa_ext_amd.h
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ typedef enum hsa_amd_agent_info_s {
HSA_AMD_AGENT_INFO_CACHELINE_SIZE = 0xA001,
HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT = 0xA002,
HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY = 0xA003,
HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY = 0xA008,
HSA_AMD_AGENT_INFO_PRODUCT_NAME = 0xA009,
HSA_AMD_AGENT_INFO_MAX_WAVES_PER_CU = 0xA00A,
HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU = 0xA00B,
Expand Down
25 changes: 22 additions & 3 deletions offload/plugins-nextgen/amdgpu/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2642,6 +2642,15 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
if (Status == HSA_STATUS_SUCCESS)
Info.add("Vendor Name", TmpChar, "", DeviceInfo::VENDOR);

Info.add("Vendor ID", uint64_t{4130}, "", DeviceInfo::VENDOR_ID);

hsa_machine_model_t MachineModel;
Status = getDeviceAttrRaw(HSA_AGENT_INFO_MACHINE_MODEL, MachineModel);
if (Status == HSA_STATUS_SUCCESS)
Info.add("Memory Address Size",
uint64_t{MachineModel == HSA_MACHINE_MODEL_SMALL ? 32u : 64u},
"bits", DeviceInfo::ADDRESS_BITS);

hsa_device_type_t DevType;
Status = getDeviceAttrRaw(HSA_AGENT_INFO_DEVICE, DevType);
if (Status == HSA_STATUS_SUCCESS) {
Expand Down Expand Up @@ -2692,11 +2701,17 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {

Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_MAX_CLOCK_FREQUENCY, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
Info.add("Max Clock Freq", TmpUInt, "MHz");
Info.add("Max Clock Freq", TmpUInt, "MHz",
DeviceInfo::MAX_CLOCK_FREQUENCY);

Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_MEMORY_MAX_FREQUENCY, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
Info.add("Max Memory Clock Freq", TmpUInt, "MHz",
DeviceInfo::MEMORY_CLOCK_RATE);

Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_COMPUTE_UNIT_COUNT, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
Info.add("Compute Units", TmpUInt);
Info.add("Compute Units", TmpUInt, "", DeviceInfo::NUM_COMPUTE_UNITS);

Status = getDeviceAttrRaw(HSA_AMD_AGENT_INFO_NUM_SIMDS_PER_CU, TmpUInt);
if (Status == HSA_STATUS_SUCCESS)
Expand Down Expand Up @@ -2778,7 +2793,11 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {

Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_SIZE, TmpSt);
if (Status == HSA_STATUS_SUCCESS)
PoolNode.add("Size", TmpSt, "bytes");
PoolNode.add(
"Size", TmpSt, "bytes",
(Pool->isGlobal() && Pool->isCoarseGrained())
? std::optional<DeviceInfo>{DeviceInfo::GLOBAL_MEM_SIZE}
: std::nullopt);

Status = Pool->getAttrRaw(HSA_AMD_MEMORY_POOL_INFO_RUNTIME_ALLOC_ALLOWED,
TmpBool);
Expand Down
17 changes: 13 additions & 4 deletions offload/plugins-nextgen/cuda/src/rtl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -949,13 +949,20 @@ struct CUDADeviceTy : public GenericDeviceTy {

Info.add("Vendor Name", "NVIDIA", "", DeviceInfo::VENDOR);

Info.add("Vendor ID", uint64_t{4318}, "", DeviceInfo::VENDOR_ID);

Info.add("Memory Address Size", std::numeric_limits<CUdeviceptr>::digits,
"bits", DeviceInfo::ADDRESS_BITS);

Res = cuDeviceTotalMem(&TmpSt, Device);
if (Res == CUDA_SUCCESS)
Info.add("Global Memory Size", TmpSt, "bytes");
Info.add("Global Memory Size", TmpSt, "bytes",
DeviceInfo::GLOBAL_MEM_SIZE);

Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, TmpInt);
if (Res == CUDA_SUCCESS)
Info.add("Number of Multiprocessors", TmpInt);
Info.add("Number of Multiprocessors", TmpInt, "",
DeviceInfo::NUM_COMPUTE_UNITS);

Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_GPU_OVERLAP, TmpInt);
if (Res == CUDA_SUCCESS)
Expand Down Expand Up @@ -1016,7 +1023,8 @@ struct CUDADeviceTy : public GenericDeviceTy {

Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_CLOCK_RATE, TmpInt);
if (Res == CUDA_SUCCESS)
Info.add("Clock Rate", TmpInt, "kHz");
Info.add("Clock Rate", TmpInt / 1000, "MHz",
DeviceInfo::MAX_CLOCK_FREQUENCY);

Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_KERNEL_EXEC_TIMEOUT, TmpInt);
if (Res == CUDA_SUCCESS)
Expand Down Expand Up @@ -1053,7 +1061,8 @@ struct CUDADeviceTy : public GenericDeviceTy {

Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, TmpInt);
if (Res == CUDA_SUCCESS)
Info.add("Memory Clock Rate", TmpInt, "kHz");
Info.add("Memory Clock Rate", TmpInt / 1000, "MHz",
DeviceInfo::MEMORY_CLOCK_RATE);

Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, TmpInt);
if (Res == CUDA_SUCCESS)
Expand Down
10 changes: 8 additions & 2 deletions offload/tools/offload-tblgen/APIGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ static void ProcessEnum(const EnumRec &Enum, raw_ostream &OS) {
OS << formatv("/// @brief {0}\n", Enum.getDesc());
OS << formatv("typedef enum {0} {{\n", Enum.getName());

uint32_t EtorVal = 0;
// Bitfields start from 1, other enums from 0
uint32_t EtorVal = Enum.isBitField();
for (const auto &EnumVal : Enum.getValues()) {
if (Enum.isTyped()) {
OS << MakeComment(
Expand All @@ -141,7 +142,12 @@ static void ProcessEnum(const EnumRec &Enum, raw_ostream &OS) {
OS << MakeComment(EnumVal.getDesc());
}
OS << formatv(TAB_1 "{0}_{1} = {2},\n", Enum.getEnumValNamePrefix(),
EnumVal.getName(), EtorVal++);
EnumVal.getName(), EtorVal);
if (Enum.isBitField()) {
EtorVal <<= 1u;
} else {
++EtorVal;
}
}

// Add last_element/force uint32 val
Expand Down
14 changes: 10 additions & 4 deletions offload/tools/offload-tblgen/MiscGen.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,10 +107,16 @@ void EmitOffloadInfo(const RecordKeeper &Records, raw_ostream &OS) {

)";

auto ErrorCodeEnum = EnumRec{Records.getDef("DeviceInfo")};
uint32_t EtorVal = 0;
for (const auto &EnumVal : ErrorCodeEnum.getValues()) {
auto Enum = EnumRec{Records.getDef("DeviceInfo")};
// Bitfields start from 1, other enums from 0
uint32_t EtorVal = Enum.isBitField();
for (const auto &EnumVal : Enum.getValues()) {
OS << formatv(TAB_1 "OFFLOAD_DEVINFO({0}, \"{1}\", {2})\n",
EnumVal.getName(), EnumVal.getDesc(), EtorVal++);
EnumVal.getName(), EnumVal.getDesc(), EtorVal);
if (Enum.isBitField()) {
EtorVal <<= 1u;
} else {
++EtorVal;
}
}
}
2 changes: 2 additions & 0 deletions offload/tools/offload-tblgen/RecordTypes.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ class EnumRec {

bool isTyped() const { return rec->getValueAsBit("is_typed"); }

bool isBitField() const { return rec->getValueAsBit("is_bit_field"); }

private:
const Record *rec;
std::vector<EnumValueRec> vals;
Expand Down
Loading
Loading