Skip to content

Commit 7e5800e

Browse files
committed
Return the global memory with enough capacity as default mem_id
1 parent 0ae3326 commit 7e5800e

File tree

3 files changed

+75
-18
lines changed

3 files changed

+75
-18
lines changed

include/acl_mem.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ cl_int acl_reserve_buffer_block(cl_mem mem, acl_mem_region_t *region,
6969

7070
int acl_get_default_memory(const acl_device_def_t &dev);
7171
int acl_get_default_device_global_memory(const acl_device_def_t &dev);
72+
int acl_get_fit_device_global_memory(const acl_device_def_t &dev,
73+
const size_t size);
7274

7375
void acl_mem_destructor_callback(
7476
cl_mem memobj); // The function that calls the user registered callbacks via

include/acl_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -947,6 +947,7 @@ typedef struct _cl_mem {
947947

948948
// If this is a heterogeneous buffer, what is the index of the memory it uses
949949
unsigned int mem_id;
950+
bool buffer_location_set;
950951

951952
// Is this buffer an SVM buffer
952953
int is_svm;

src/acl_mem.cpp

Lines changed: 72 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -264,25 +264,32 @@ static int acl_do_physical_buffer_allocation(unsigned physical_device_id,
264264

265265
acl_assert_locked();
266266

267-
// When mem_id == 0 it indicates the mem_id is not finalized yet so need to
268-
// set it to a real value here.
267+
// When mem_id == 0 and buffer location is not set, it indicates the mem_id
268+
// is not finalized yet so need to set it to a real value here.
269269
bool glob_mem = mem->block_allocation->region == &(acl_platform.global_mem);
270-
if (glob_mem && mem->mem_id == 0) {
270+
if (glob_mem && mem->mem_id == 0 && !mem->buffer_location_set) {
271271
// Memory migration between SVM and device global memory is not supported.
272-
// If device supports both, do physical buffer allocation on device global
273-
// memory only.
274-
if (acl_svm_device_supports_physical_memory(physical_device_id) &&
275-
acl_svm_device_supports_any_svm(physical_device_id)) {
272+
// When the device supports device global memory, if SVM is also supported,
273+
// do physical buffer allocation on device global memory only; else if SVM
274+
// is not supported, we can only allocate on device global memory.
275+
if (acl_svm_device_supports_physical_memory(physical_device_id)) {
276276
assert(
277277
acl_platform.device[physical_device_id]
278278
.def.autodiscovery_def.num_global_mem_systems > 0 &&
279279
"Device is not configured to support SVM and device global memory.");
280-
int tmp_mem_id = acl_get_default_device_global_memory(
281-
acl_platform.device[physical_device_id].def);
282-
assert(tmp_mem_id >= 0 &&
283-
"Device does not have any device global memory.");
280+
int tmp_mem_id = acl_get_fit_device_global_memory(
281+
acl_platform.device[physical_device_id].def, mem->size);
282+
assert(tmp_mem_id >= 0 && "Device does not have any device global memory "
283+
"for the allocation size.");
284284
mem->mem_id = (unsigned int)tmp_mem_id;
285285
}
286+
} else if (mem->buffer_location_set) {
287+
// Sanity check if the specified buffer location is large enough
288+
assert(
289+
ACL_RANGE_SIZE(acl_platform.device[physical_device_id]
290+
.def.autodiscovery_def.global_mem_defs[mem->mem_id]
291+
.get_usable_range()) >= mem->size &&
292+
"Specified buffer location does not fit the requested allocation size");
286293
}
287294

288295
int result = 0;
@@ -420,6 +427,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
420427
unsigned int idevice;
421428
cl_uint bank_id = 0;
422429
cl_uint tmp_mem_id = 0;
430+
bool buffer_location_set = false;
423431
std::scoped_lock lock{acl_mutex_wrapper};
424432

425433
#ifdef MEM_DEBUG_MSG
@@ -436,6 +444,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
436444
bank_id = (cl_uint) * (properties + 1);
437445
} break;
438446
case CL_MEM_ALLOC_BUFFER_LOCATION_INTEL: {
447+
buffer_location_set = true;
439448
tmp_mem_id = (cl_uint) * (properties + 1);
440449

441450
// In FullSystem flow, buffer location is always the index of the global
@@ -612,6 +621,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
612621
"Could not allocate a cl_mem object");
613622
}
614623
mem->mem_id = tmp_mem_id;
624+
mem->buffer_location_set = buffer_location_set;
615625

616626
mem->block_allocation = new_block;
617627
mem->block_allocation->mem_obj = mem;
@@ -4161,8 +4171,15 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA(
41614171
physical_id = command_queue->device->def.physical_device_id;
41624172
// SVM memory is not associated with any device.
41634173
// Migration should only be moving device global memories.
4164-
int tmp_mem_id =
4165-
acl_get_default_device_global_memory(command_queue->device->def);
4174+
// First find the largest mem object.
4175+
size_t mem_objects_largest_size = 0;
4176+
for (i = 0; i < num_mem_objects; ++i) {
4177+
if (mem_objects[i]->size > mem_objects_largest_size) {
4178+
mem_objects_largest_size = mem_objects[i]->size;
4179+
}
4180+
}
4181+
int tmp_mem_id = acl_get_fit_device_global_memory(command_queue->device->def,
4182+
mem_objects_largest_size);
41664183
if (tmp_mem_id < 0) {
41674184
ERR_RET(CL_OUT_OF_RESOURCES, command_queue->context,
41684185
"Can not find default global memory system");
@@ -4901,11 +4918,8 @@ int acl_mem_is_valid(cl_mem mem) {
49014918
}
49024919

49034920
// Iterate through device's global memories and return the ID of the first
4904-
// device private global memory.
4905-
// This is needed over acl_get_default_memory(dev) because device can have
4906-
// both device private and shared virtual memory.
4907-
// Returns the id of the memory that clCreateBuffer would allocate in by default
4908-
// (i.e. when CL_MEM_USE_HOST_PTR is not used) or -1 if no such memory exists.
4921+
// device private global memory
4922+
// TODO: Used for ARM board device query, need to verify correctness
49094923
int acl_get_default_device_global_memory(const acl_device_def_t &dev) {
49104924
int lowest_gmem_idx = -1;
49114925
cl_ulong lowest_gmem_begin = 0xFFFFFFFFFFFFFFFF;
@@ -4931,6 +4945,46 @@ int acl_get_default_device_global_memory(const acl_device_def_t &dev) {
49314945
return lowest_gmem_idx;
49324946
}
49334947

4948+
// Iterate through device's global memories and return the ID of the first
4949+
// device private global memory that has enough capacity to fit the allocation.
4950+
// This is needed over acl_get_default_memory(dev) because device can have
4951+
// both device private and shared virtual memory.
4952+
// Returns the id of the memory that clCreateBuffer would allocate in by default
4953+
// (i.e. when CL_MEM_USE_HOST_PTR is not used) or -1 if no such memory exists.
4954+
int acl_get_fit_device_global_memory(const acl_device_def_t &dev,
4955+
const size_t size) {
4956+
int lowest_gmem_idx = -1;
4957+
cl_ulong lowest_gmem_begin = 0xFFFFFFFFFFFFFFFF;
4958+
acl_assert_locked();
4959+
4960+
// If the device has no physical memory then clCreateBuffer will fall back to
4961+
// allocating device global memory in the default memory.
4962+
if (!acl_svm_device_supports_physical_memory(dev.physical_device_id))
4963+
return acl_get_default_memory(dev);
4964+
for (unsigned gmem_idx = 0;
4965+
gmem_idx < dev.autodiscovery_def.num_global_mem_systems; gmem_idx++) {
4966+
acl_system_global_mem_allocation_type_t alloc_type =
4967+
dev.autodiscovery_def.global_mem_defs[gmem_idx].allocation_type;
4968+
bool is_device_alloc =
4969+
!alloc_type || (alloc_type & ACL_GLOBAL_MEM_DEVICE_ALLOCATION);
4970+
bool is_device_private =
4971+
dev.autodiscovery_def.global_mem_defs[gmem_idx].type ==
4972+
ACL_GLOBAL_MEM_DEVICE_PRIVATE;
4973+
if (is_device_private && is_device_alloc &&
4974+
(size_t)dev.autodiscovery_def.global_mem_defs[gmem_idx].range.begin <
4975+
lowest_gmem_begin &&
4976+
ACL_RANGE_SIZE(dev.autodiscovery_def.global_mem_defs[gmem_idx]
4977+
.get_usable_range()) >= size) {
4978+
lowest_gmem_begin =
4979+
(size_t)dev.autodiscovery_def.global_mem_defs[gmem_idx].range.begin;
4980+
lowest_gmem_idx = static_cast<int>(gmem_idx);
4981+
}
4982+
}
4983+
4984+
// This can return -1, but that means there's no device private memory
4985+
return lowest_gmem_idx;
4986+
}
4987+
49344988
// Memory systems are listed with the default memory at the first index
49354989
int acl_get_default_memory(const acl_device_def_t &) { return 0; }
49364990

0 commit comments

Comments
 (0)