diff --git a/sycl/include/sycl/aspects.hpp b/sycl/include/sycl/aspects.hpp index 418c77b943159..d2e389f4c9f8a 100644 --- a/sycl/include/sycl/aspects.hpp +++ b/sycl/include/sycl/aspects.hpp @@ -12,47 +12,18 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { +#define __SYCL_ASPECT(ASPECT, ID) ASPECT = ID, +#define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) \ + ASPECT __SYCL2020_DEPRECATED(MESSAGE) = ID, +#define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE) \ + __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) enum class __SYCL_TYPE(aspect) aspect { - host = 0, - cpu = 1, - gpu = 2, - accelerator = 3, - custom = 4, - fp16 = 5, - fp64 = 6, - int64_base_atomics __SYCL2020_DEPRECATED("use atomic64 instead") = 7, - int64_extended_atomics __SYCL2020_DEPRECATED("use atomic64 instead") = 8, - image = 9, - online_compiler = 10, - online_linker = 11, - queue_profiling = 12, - usm_device_allocations = 13, - usm_host_allocations = 14, - usm_shared_allocations = 15, - usm_restricted_shared_allocations = 16, - usm_system_allocations = 17, - usm_system_allocator __SYCL2020_DEPRECATED( - "use usm_system_allocations instead") = usm_system_allocations, - ext_intel_pci_address = 18, - ext_intel_gpu_eu_count = 19, - ext_intel_gpu_eu_simd_width = 20, - ext_intel_gpu_slices = 21, - ext_intel_gpu_subslices_per_slice = 22, - ext_intel_gpu_eu_count_per_subslice = 23, - ext_intel_max_mem_bandwidth = 24, - ext_intel_mem_channel = 25, - usm_atomic_host_allocations = 26, - usm_atomic_shared_allocations = 27, - atomic64 = 28, - ext_intel_device_info_uuid = 29, - ext_oneapi_srgb = 30, - ext_oneapi_native_assert = 31, - host_debuggable = 32, - ext_intel_gpu_hw_threads_per_eu = 33, - ext_oneapi_cuda_async_barrier = 34, - ext_oneapi_bfloat16 = 35, - ext_intel_free_memory = 36, +#include +#include }; +#undef __SYCL_ASPECT_DEPRECATED_ALIAS +#undef __SYCL_ASPECT_DEPRECATED +#undef __SYCL_ASPECT } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/include/sycl/detail/pi.h b/sycl/include/sycl/detail/pi.h index 492e43534dcdd..6aeb8728db90a 100644 --- a/sycl/include/sycl/detail/pi.h +++ b/sycl/include/sycl/detail/pi.h @@ -752,6 +752,9 @@ static const uint8_t PI_DEVICE_BINARY_OFFLOAD_KIND_SYCL = 4; #define __SYCL_PI_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS "SYCL/exported symbols" /// PropertySetRegistry::SYCL_DEVICE_GLOBALS defined in PropertySetIO.h #define __SYCL_PI_PROPERTY_SET_SYCL_DEVICE_GLOBALS "SYCL/device globals" +/// PropertySetRegistry::SYCL_DEVICE_REQUIREMENTS defined in PropertySetIO.h +#define __SYCL_PI_PROPERTY_SET_SYCL_DEVICE_REQUIREMENTS \ + "SYCL/device requirements" /// Program metadata tags recognized by the PI backends. For kernels the tag /// must appear after the kernel name. diff --git a/sycl/include/sycl/info/aspects.def b/sycl/include/sycl/info/aspects.def new file mode 100644 index 0000000000000..223b36496f6a8 --- /dev/null +++ b/sycl/include/sycl/info/aspects.def @@ -0,0 +1,35 @@ +__SYCL_ASPECT(host, 0) +__SYCL_ASPECT(cpu, 1) +__SYCL_ASPECT(gpu, 2) +__SYCL_ASPECT(accelerator, 3) +__SYCL_ASPECT(custom, 4) +__SYCL_ASPECT(fp16, 5) +__SYCL_ASPECT(fp64, 6) +__SYCL_ASPECT(image, 9) +__SYCL_ASPECT(online_compiler, 10) +__SYCL_ASPECT(online_linker, 11) +__SYCL_ASPECT(queue_profiling, 12) +__SYCL_ASPECT(usm_device_allocations, 13) +__SYCL_ASPECT(usm_host_allocations, 14) +__SYCL_ASPECT(usm_shared_allocations, 15) +__SYCL_ASPECT(usm_restricted_shared_allocations, 16) +__SYCL_ASPECT(usm_system_allocations, 17) +__SYCL_ASPECT(ext_intel_pci_address, 18) +__SYCL_ASPECT(ext_intel_gpu_eu_count, 19) +__SYCL_ASPECT(ext_intel_gpu_eu_simd_width, 20) +__SYCL_ASPECT(ext_intel_gpu_slices, 21) +__SYCL_ASPECT(ext_intel_gpu_subslices_per_slice, 22) +__SYCL_ASPECT(ext_intel_gpu_eu_count_per_subslice, 23) +__SYCL_ASPECT(ext_intel_max_mem_bandwidth, 24) +__SYCL_ASPECT(ext_intel_mem_channel, 25) +__SYCL_ASPECT(usm_atomic_host_allocations, 26) +__SYCL_ASPECT(usm_atomic_shared_allocations, 27) +__SYCL_ASPECT(atomic64, 28) +__SYCL_ASPECT(ext_intel_device_info_uuid, 29) +__SYCL_ASPECT(ext_oneapi_srgb, 30) +__SYCL_ASPECT(ext_oneapi_native_assert, 31) +__SYCL_ASPECT(host_debuggable, 32) +__SYCL_ASPECT(ext_intel_gpu_hw_threads_per_eu, 33) +__SYCL_ASPECT(ext_oneapi_cuda_async_barrier, 34) +__SYCL_ASPECT(ext_oneapi_bfloat16, 35) +__SYCL_ASPECT(ext_intel_free_memory, 36) diff --git a/sycl/include/sycl/info/aspects_deprecated.def b/sycl/include/sycl/info/aspects_deprecated.def new file mode 100644 index 0000000000000..83c5ed1a498da --- /dev/null +++ b/sycl/include/sycl/info/aspects_deprecated.def @@ -0,0 +1,4 @@ +__SYCL_ASPECT_DEPRECATED(int64_base_atomics, 7, "use atomic64 instead") +__SYCL_ASPECT_DEPRECATED(int64_extended_atomics, 8, "use atomic64 instead") +// Special macro for aspects that don't have own token +__SYCL_ASPECT_DEPRECATED_ALIAS(usm_system_allocator, usm_system_allocations, "use usm_system_allocations instead") diff --git a/sycl/include/sycl/stl.hpp b/sycl/include/sycl/stl.hpp index 13bcebf06a147..2ae34956977af 100644 --- a/sycl/include/sycl/stl.hpp +++ b/sycl/include/sycl/stl.hpp @@ -19,6 +19,7 @@ #include #include #include +#include #include namespace sycl { diff --git a/sycl/source/detail/device_binary_image.cpp b/sycl/source/detail/device_binary_image.cpp index 708e5ec44b8ec..38aab48e0229e 100644 --- a/sycl/source/detail/device_binary_image.cpp +++ b/sycl/source/detail/device_binary_image.cpp @@ -175,6 +175,7 @@ void RTDeviceBinaryImage::init(pi_device_binary Bin) { ProgramMetadata.init(Bin, __SYCL_PI_PROPERTY_SET_PROGRAM_METADATA); ExportedSymbols.init(Bin, __SYCL_PI_PROPERTY_SET_SYCL_EXPORTED_SYMBOLS); DeviceGlobals.init(Bin, __SYCL_PI_PROPERTY_SET_SYCL_DEVICE_GLOBALS); + DeviceRequirements.init(Bin, __SYCL_PI_PROPERTY_SET_SYCL_DEVICE_REQUIREMENTS); } DynRTDeviceBinaryImage::DynRTDeviceBinaryImage( diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index 8d0251b5bac36..662d5a4194790 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -190,6 +190,9 @@ class RTDeviceBinaryImage { const PropertyRange &getProgramMetadata() const { return ProgramMetadata; } const PropertyRange &getExportedSymbols() const { return ExportedSymbols; } const PropertyRange &getDeviceGlobals() const { return DeviceGlobals; } + const PropertyRange &getDeviceRequirements() const { + return DeviceRequirements; + } protected: void init(pi_device_binary Bin); @@ -207,6 +210,7 @@ class RTDeviceBinaryImage { RTDeviceBinaryImage::PropertyRange ProgramMetadata; RTDeviceBinaryImage::PropertyRange ExportedSymbols; RTDeviceBinaryImage::PropertyRange DeviceGlobals; + RTDeviceBinaryImage::PropertyRange DeviceRequirements; }; // Dynamically allocated device binary image, which de-allocates its binary diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 7ad1c1b649a63..51b2d9eb16c1d 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -543,14 +544,57 @@ RT::PiProgram ProgramManager::getBuiltPIProgram( DeviceImplPtr Dev = (MustBuildOnSubdevice == PI_TRUE) ? DeviceImpl : RootDevImpl; - auto BuildF = [this, &M, &KSId, &ContextImpl, &Dev, Prg, &CompileOpts, - &LinkOpts, &JITCompilationIsRequired, SpecConsts] { - auto Context = createSyclObjFromImpl(ContextImpl); - auto Device = createSyclObjFromImpl(Dev); + auto Context = createSyclObjFromImpl(ContextImpl); + auto Device = createSyclObjFromImpl(Dev); + const RTDeviceBinaryImage &Img = + getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired); + + // Check that device supports all aspects used by the kernel + const RTDeviceBinaryImage::PropertyRange &ARange = + Img.getDeviceRequirements(); + +#define __SYCL_ASPECT(ASPECT, ID) \ + case aspect::ASPECT: \ + return #ASPECT; +#define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID) +// We don't need "case aspect::usm_allocator" here because it will duplicate +// "case aspect::usm_system_allocations", therefore leave this macro empty +#define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE) + auto getAspectNameStr = [](aspect AspectNum) -> std::string { + switch (AspectNum) { +#include +#include + default: + throw sycl::exception( + errc::kernel_not_supported, + "Unknown aspect " + std::to_string(static_cast(AspectNum))); + } + }; +#undef __SYCL_ASPECT_DEPRECATED_ALIAS +#undef __SYCL_ASPECT_DEPRECATED +#undef __SYCL_ASPECT - const RTDeviceBinaryImage &Img = - getDeviceImage(M, KSId, Context, Device, JITCompilationIsRequired); + for (RTDeviceBinaryImage::PropertyRange::ConstIterator It : ARange) { + using namespace std::literals; + if ((*It)->Name != "aspects"sv) + continue; + ByteArray Aspects = DeviceBinaryProperty(*It).asByteArray(); + // 8 because we need to skip 64-bits of size of the byte array + auto *AIt = reinterpret_cast(&Aspects[8]); + auto *AEnd = + reinterpret_cast(&Aspects[0] + Aspects.size()); + while (AIt != AEnd) { + auto Aspect = static_cast(*AIt); + if (!Dev->has(Aspect)) + throw sycl::exception(errc::kernel_not_supported, + "Required aspect " + getAspectNameStr(Aspect) + + " is not supported on the device"); + ++AIt; + } + } + auto BuildF = [this, &Img, &Context, &ContextImpl, &Device, Prg, &CompileOpts, + &LinkOpts, SpecConsts, &KernelName] { applyOptionsFromImage(CompileOpts, LinkOpts, Img); const detail::plugin &Plugin = ContextImpl->getPlugin();