diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 4d3e841f731e1..624078b260687 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -592,6 +592,13 @@ constexpr pi_map_flags PI_MAP_WRITE_INVALIDATE_REGION = using pi_mem_properties = pi_bitfield; constexpr pi_mem_properties PI_MEM_PROPERTIES_CHANNEL = CL_MEM_CHANNEL_INTEL; +// NOTE: this is made 64-bit to match the size of cl_mem_properties_intel to +// make the translation to OpenCL transparent. +using pi_usm_mem_properties = pi_bitfield; +constexpr pi_usm_mem_properties PI_MEM_ALLOC_FLAGS = CL_MEM_ALLOC_FLAGS_INTEL; +constexpr pi_usm_mem_properties PI_MEM_USM_ALLOC_BUFFER_LOCATION = + CL_MEM_ALLOC_BUFFER_LOCATION_INTEL; + // NOTE: queue properties are implemented this way to better support bit // manipulations using pi_queue_properties = pi_bitfield; @@ -1609,10 +1616,6 @@ typedef enum { PI_MEM_TYPE_SHARED = CL_MEM_TYPE_SHARED_INTEL } _pi_usm_type; -typedef enum : pi_bitfield { - PI_MEM_ALLOC_FLAGS = CL_MEM_ALLOC_FLAGS_INTEL -} _pi_usm_mem_properties; - // Flag is used for piProgramUSMEnqueuePrefetch. PI_USM_MIGRATION_TBD0 is a // placeholder for future developments and should not change the behaviour of // piProgramUSMEnqueuePrefetch @@ -1624,7 +1627,6 @@ using pi_usm_capability_query = _pi_usm_capability_query; using pi_usm_capabilities = _pi_usm_capabilities; using pi_mem_info = _pi_mem_info; using pi_usm_type = _pi_usm_type; -using pi_usm_mem_properties = _pi_usm_mem_properties; using pi_usm_migration_flags = _pi_usm_migration_flags; /// Allocates host memory accessible by the device. diff --git a/sycl/include/CL/sycl/detail/property_helper.hpp b/sycl/include/CL/sycl/detail/property_helper.hpp index 12bc497ee2a70..ecd942f310201 100644 --- a/sycl/include/CL/sycl/detail/property_helper.hpp +++ b/sycl/include/CL/sycl/detail/property_helper.hpp @@ -47,7 +47,8 @@ enum PropWithDataKind { ImageUseMutex = 2, ImageContextBound = 3, BufferMemChannel = 4, - PropWithDataKindSize = 5 + AccPropBufferLocation = 5, + PropWithDataKindSize = 6 }; // Base class for dataless properties, needed to check that the type of an diff --git a/sycl/include/CL/sycl/feature_test.hpp.in b/sycl/include/CL/sycl/feature_test.hpp.in index a7f0ca071ee46..fb9f04cf9e62d 100644 --- a/sycl/include/CL/sycl/feature_test.hpp.in +++ b/sycl/include/CL/sycl/feature_test.hpp.in @@ -67,6 +67,7 @@ namespace sycl { #define SYCL_EXT_INTEL_KERNEL_ARGS_RESTRICT 1 #define SYCL_EXT_INTEL_MEM_CHANNEL_PROPERTY 1 #define SYCL_EXT_INTEL_USM_ADDRESS_SPACES 1 +#define SYCL_EXT_INTEL_RUNTIME_BUFFER_LOCATION 1 #define SYCL_EXT_ONEAPI_BACKEND_LEVEL_ZERO 1 #cmakedefine01 SYCL_BUILD_PI_CUDA #if SYCL_BUILD_PI_CUDA diff --git a/sycl/include/CL/sycl/usm.hpp b/sycl/include/CL/sycl/usm.hpp index 381588ac85213..0c9f2705e5fe1 100644 --- a/sycl/include/CL/sycl/usm.hpp +++ b/sycl/include/CL/sycl/usm.hpp @@ -10,6 +10,7 @@ #include #include #include +#include #include diff --git a/sycl/include/sycl/ext/intel/experimental/usm_properties.hpp b/sycl/include/sycl/ext/intel/experimental/usm_properties.hpp new file mode 100644 index 0000000000000..53f6a1145cdab --- /dev/null +++ b/sycl/include/sycl/ext/intel/experimental/usm_properties.hpp @@ -0,0 +1,37 @@ +#pragma once + +#include +#include +#include + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace ext { +namespace intel { +namespace experimental { +namespace property { +namespace usm { + +class buffer_location + : public sycl::detail::PropertyWithData< + sycl::detail::PropWithDataKind::AccPropBufferLocation> { +public: + buffer_location(uint64_t Location) : MLocation(Location) {} + uint64_t get_buffer_location() const { return MLocation; } + +private: + uint64_t MLocation; +}; + +} // namespace usm +} // namespace property +} // namespace experimental +} // namespace intel +} // namespace ext + +template <> +struct is_property + : std::true_type {}; + +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/source/detail/usm/usm_impl.cpp b/sycl/source/detail/usm/usm_impl.cpp index 0650877131c50..bcea4ec56863e 100644 --- a/sycl/source/detail/usm/usm_impl.cpp +++ b/sycl/source/detail/usm/usm_impl.cpp @@ -93,11 +93,13 @@ void *alignedAllocHost(size_t Alignment, size_t Size, const context &Ctxt, void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt, const device &Dev, alloc Kind, - const detail::code_location &CL) { + const detail::code_location &CL, + const property_list &PropList = {}) { XPTI_CREATE_TRACEPOINT(CL); void *RetVal = nullptr; if (Size == 0) return nullptr; + if (Ctxt.is_host()) { if (Kind == alloc::unknown) { RetVal = nullptr; @@ -125,8 +127,25 @@ void *alignedAlloc(size_t Alignment, size_t Size, const context &Ctxt, switch (Kind) { case alloc::device: { Id = detail::getSyclObjImpl(Dev)->getHandleRef(); - Error = Plugin.call_nocheck( - &RetVal, C, Id, nullptr, Size, Alignment); + // Parse out buffer location property + // Buffer location is only supported on FPGA devices + bool IsBufferLocSupported = + Dev.has_extension("cl_intel_mem_alloc_buffer_location"); + if (IsBufferLocSupported && + PropList.has_property()) { + auto location = PropList + .get_property() + .get_buffer_location(); + pi_usm_mem_properties props[3] = {PI_MEM_USM_ALLOC_BUFFER_LOCATION, + location, 0}; + Error = Plugin.call_nocheck( + &RetVal, C, Id, props, Size, Alignment); + } else { + Error = Plugin.call_nocheck( + &RetVal, C, Id, nullptr, Size, Alignment); + } break; } case alloc::shared: { @@ -193,8 +212,10 @@ void *malloc_device(size_t Size, const device &Dev, const context &Ctxt, } void *malloc_device(size_t Size, const device &Dev, const context &Ctxt, - const property_list &, const detail::code_location CL) { - return malloc_device(Size, Dev, Ctxt, CL); + const property_list &PropList, + const detail::code_location CL) { + return detail::usm::alignedAlloc(0, Size, Ctxt, Dev, alloc::device, CL, + PropList); } void *malloc_device(size_t Size, const queue &Q, diff --git a/sycl/test/extensions/usm/usm_alloc_utility.cpp b/sycl/test/extensions/usm/usm_alloc_utility.cpp index ddae20778d80f..d45ffafa41713 100644 --- a/sycl/test/extensions/usm/usm_alloc_utility.cpp +++ b/sycl/test/extensions/usm/usm_alloc_utility.cpp @@ -83,7 +83,10 @@ int main() { array = (int *)malloc_device(N * sizeof(int), q); check_and_free(array, dev, ctxt); - array = (int *)malloc_device(N * sizeof(int), q, property_list{}); + array = malloc_device( + N, q, + property_list{ + ext::intel::experimental::property::usm::buffer_location(2)}); check_and_free(array, dev, ctxt); array = (int *)aligned_alloc_device(alignof(long long), N * sizeof(int),