Skip to content

Commit a0d2f16

Browse files
committed
Return the global memory with enough capacity as default mem_id
1 parent a678e6c commit a0d2f16

File tree

3 files changed

+66
-16
lines changed

3 files changed

+66
-16
lines changed

include/acl_mem.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,8 @@ cl_int acl_reserve_buffer_block(cl_mem mem, acl_mem_region_t *region,
6969

7070
int acl_get_default_memory(const acl_device_def_t &dev);
7171
int acl_get_default_device_global_memory(const acl_device_def_t &dev);
72+
int acl_get_fit_device_global_memory(const acl_device_def_t &dev,
73+
const size_t size);
7274

7375
void acl_mem_destructor_callback(
7476
cl_mem memobj); // The function that calls the user registered callbacks via

include/acl_types.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -940,6 +940,7 @@ typedef struct _cl_mem {
940940

941941
// If this is a heterogeneous buffer, what is the index of the memory it uses
942942
unsigned int mem_id;
943+
bool buffer_location_set;
943944

944945
// Is this buffer an SVM buffer
945946
int is_svm;

src/acl_mem.cpp

Lines changed: 63 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -264,21 +264,21 @@ static int acl_do_physical_buffer_allocation(unsigned physical_device_id,
264264

265265
acl_assert_locked();
266266

267-
// When mem_id == 0 it indicates the mem_id is not finalized yet so need to
268-
// set it to a real value here.
267+
// When mem_id == 0 and buffer location is not set, it indicates the mem_id
268+
// is not finalized yet so need to set it to a real value here.
269269
bool glob_mem = mem->block_allocation->region == &(acl_platform.global_mem);
270-
if (glob_mem && mem->mem_id == 0) {
270+
if (glob_mem && mem->mem_id == 0 && !mem->buffer_location_set) {
271271
// Memory migration between SVM and device global memory is not supported.
272-
// If device supports both, do physical buffer allocation on device global
273-
// memory only.
274-
if (acl_svm_device_supports_physical_memory(physical_device_id) &&
275-
acl_svm_device_supports_any_svm(physical_device_id)) {
272+
// When the device supports device global memory, if SVM is also supported,
273+
// do physical buffer allocation on device global memory only; else if SVM
274+
// is not supported, we can only allocate on device global memory.
275+
if (acl_svm_device_supports_physical_memory(physical_device_id)) {
276276
assert(
277277
acl_platform.device[physical_device_id]
278278
.def.autodiscovery_def.num_global_mem_systems > 0 &&
279279
"Device is not configured to support SVM and device global memory.");
280-
int tmp_mem_id = acl_get_default_device_global_memory(
281-
acl_platform.device[physical_device_id].def);
280+
int tmp_mem_id = acl_get_fit_device_global_memory(
281+
acl_platform.device[physical_device_id].def, mem->size);
282282
assert(tmp_mem_id >= 0 &&
283283
"Device does not have any device global memory.");
284284
mem->mem_id = (unsigned int)tmp_mem_id;
@@ -420,6 +420,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
420420
unsigned int idevice;
421421
cl_uint bank_id = 0;
422422
cl_uint tmp_mem_id = 0;
423+
bool buffer_location_set = false;
423424
std::scoped_lock lock{acl_mutex_wrapper};
424425

425426
#ifdef MEM_DEBUG_MSG
@@ -436,6 +437,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
436437
bank_id = (cl_uint) * (properties + 1);
437438
} break;
438439
case CL_MEM_ALLOC_BUFFER_LOCATION_INTEL: {
440+
buffer_location_set = true;
439441
tmp_mem_id = (cl_uint) * (properties + 1);
440442

441443
// In FullSystem flow, buffer location is always the index of the global
@@ -612,6 +614,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
612614
"Could not allocate a cl_mem object");
613615
}
614616
mem->mem_id = tmp_mem_id;
617+
mem->buffer_location_set = buffer_location_set;
615618

616619
mem->block_allocation = new_block;
617620
mem->block_allocation->mem_obj = mem;
@@ -4161,8 +4164,15 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA(
41614164
physical_id = command_queue->device->def.physical_device_id;
41624165
// SVM memory is not associated with any device.
41634166
// Migration should only be moving device global memories.
4164-
int tmp_mem_id =
4165-
acl_get_default_device_global_memory(command_queue->device->def);
4167+
// First find the largest mem object.
4168+
size_t mem_objects_largest_size = 0;
4169+
for (i = 0; i < num_mem_objects; ++i) {
4170+
if (mem_objects[i]->size > mem_objects_largest_size) {
4171+
mem_objects_largest_size = mem_objects[i]->size;
4172+
}
4173+
}
4174+
int tmp_mem_id = acl_get_fit_device_global_memory(command_queue->device->def,
4175+
mem_objects_largest_size);
41664176
if (tmp_mem_id < 0) {
41674177
ERR_RET(CL_OUT_OF_RESOURCES, command_queue->context,
41684178
"Can not find default global memory system");
@@ -4901,11 +4911,8 @@ int acl_mem_is_valid(cl_mem mem) {
49014911
}
49024912

49034913
// Iterate through device's global memories and return the ID of the first
4904-
// device private global memory.
4905-
// This is needed over acl_get_default_memory(dev) because device can have
4906-
// both device private and shared virtual memory.
4907-
// Returns the id of the memory that clCreateBuffer would allocate in by default
4908-
// (i.e. when CL_MEM_USE_HOST_PTR is not used) or -1 if no such memory exists.
4914+
// device private global memory
4915+
// TODO: Used for ARM board device query, need to verify correctness
49094916
int acl_get_default_device_global_memory(const acl_device_def_t &dev) {
49104917
int lowest_gmem_idx = -1;
49114918
cl_ulong lowest_gmem_begin = 0xFFFFFFFFFFFFFFFF;
@@ -4931,6 +4938,46 @@ int acl_get_default_device_global_memory(const acl_device_def_t &dev) {
49314938
return lowest_gmem_idx;
49324939
}
49334940

4941+
// Iterate through device's global memories and return the ID of the first
4942+
// device private global memory that has enough capacity to fit the allocation.
4943+
// This is needed over acl_get_default_memory(dev) because device can have
4944+
// both device private and shared virtual memory.
4945+
// Returns the id of the memory that clCreateBuffer would allocate in by default
4946+
// (i.e. when CL_MEM_USE_HOST_PTR is not used) or -1 if no such memory exists.
4947+
int acl_get_fit_device_global_memory(const acl_device_def_t &dev,
4948+
const size_t size) {
4949+
int lowest_gmem_idx = -1;
4950+
cl_ulong lowest_gmem_begin = 0xFFFFFFFFFFFFFFFF;
4951+
acl_assert_locked();
4952+
4953+
// If the device has no physical memory then clCreateBuffer will fall back to
4954+
// allocating device global memory in the default memory.
4955+
if (!acl_svm_device_supports_physical_memory(dev.physical_device_id))
4956+
return acl_get_default_memory(dev);
4957+
for (unsigned gmem_idx = 0;
4958+
gmem_idx < dev.autodiscovery_def.num_global_mem_systems; gmem_idx++) {
4959+
acl_system_global_mem_allocation_type_t alloc_type =
4960+
dev.autodiscovery_def.global_mem_defs[gmem_idx].allocation_type;
4961+
bool is_device_alloc =
4962+
!alloc_type || (alloc_type & ACL_GLOBAL_MEM_DEVICE_ALLOCATION);
4963+
bool is_device_private =
4964+
dev.autodiscovery_def.global_mem_defs[gmem_idx].type ==
4965+
ACL_GLOBAL_MEM_DEVICE_PRIVATE;
4966+
if (is_device_private && is_device_alloc &&
4967+
(size_t)dev.autodiscovery_def.global_mem_defs[gmem_idx].range.begin <
4968+
lowest_gmem_begin &&
4969+
ACL_RANGE_SIZE(dev.autodiscovery_def.global_mem_defs[gmem_idx]
4970+
.get_usable_range()) >= size) {
4971+
lowest_gmem_begin =
4972+
(size_t)dev.autodiscovery_def.global_mem_defs[gmem_idx].range.begin;
4973+
lowest_gmem_idx = static_cast<int>(gmem_idx);
4974+
}
4975+
}
4976+
4977+
// This can return -1, but that means there's no device private memory
4978+
return lowest_gmem_idx;
4979+
}
4980+
49344981
// Memory systems are listed with the default memory at the first index
49354982
int acl_get_default_memory(const acl_device_def_t &) { return 0; }
49364983

0 commit comments

Comments
 (0)