@@ -264,25 +264,32 @@ static int acl_do_physical_buffer_allocation(unsigned physical_device_id,
264
264
265
265
acl_assert_locked ();
266
266
267
- // When mem_id == 0 it indicates the mem_id is not finalized yet so need to
268
- // set it to a real value here.
267
+ // When mem_id == 0 and buffer location is not set, it indicates the mem_id
268
+ // is not finalized yet so need to set it to a real value here.
269
269
bool glob_mem = mem->block_allocation ->region == &(acl_platform.global_mem );
270
- if (glob_mem && mem->mem_id == 0 ) {
270
+ if (glob_mem && mem->mem_id == 0 && !mem-> buffer_location_set ) {
271
271
// Memory migration between SVM and device global memory is not supported.
272
- // If device supports both, do physical buffer allocation on device global
273
- // memory only.
274
- if ( acl_svm_device_supports_physical_memory (physical_device_id) &&
275
- acl_svm_device_supports_any_svm (physical_device_id)) {
272
+ // When the device supports device global memory, if SVM is also supported,
273
+ // do physical buffer allocation on device global memory only; else if SVM
274
+ // is not supported, we can only allocate on device global memory.
275
+ if ( acl_svm_device_supports_physical_memory (physical_device_id)) {
276
276
assert (
277
277
acl_platform.device [physical_device_id]
278
278
.def .autodiscovery_def .num_global_mem_systems > 0 &&
279
279
" Device is not configured to support SVM and device global memory." );
280
- int tmp_mem_id = acl_get_default_device_global_memory (
281
- acl_platform.device [physical_device_id].def );
282
- assert (tmp_mem_id >= 0 &&
283
- " Device does not have any device global memory ." );
280
+ int tmp_mem_id = acl_get_fit_device_global_memory (
281
+ acl_platform.device [physical_device_id].def , mem-> size );
282
+ assert (tmp_mem_id >= 0 && " Device does not have any device global memory "
283
+ " for the allocation size ." );
284
284
mem->mem_id = (unsigned int )tmp_mem_id;
285
285
}
286
+ } else if (mem->buffer_location_set ) {
287
+ // Sanity check if the specified buffer location is large enough
288
+ assert (
289
+ ACL_RANGE_SIZE (acl_platform.device [physical_device_id]
290
+ .def .autodiscovery_def .global_mem_defs [mem->mem_id ]
291
+ .get_usable_range ()) >= mem->size &&
292
+ " Specified buffer location does not fit the requested allocation size" );
286
293
}
287
294
288
295
int result = 0 ;
@@ -420,6 +427,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
420
427
unsigned int idevice;
421
428
cl_uint bank_id = 0 ;
422
429
cl_uint tmp_mem_id = 0 ;
430
+ bool buffer_location_set = false ;
423
431
std::scoped_lock lock{acl_mutex_wrapper};
424
432
425
433
#ifdef MEM_DEBUG_MSG
@@ -436,6 +444,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
436
444
bank_id = (cl_uint) * (properties + 1 );
437
445
} break ;
438
446
case CL_MEM_ALLOC_BUFFER_LOCATION_INTEL: {
447
+ buffer_location_set = true ;
439
448
tmp_mem_id = (cl_uint) * (properties + 1 );
440
449
441
450
// In FullSystem flow, buffer location is always the index of the global
@@ -612,6 +621,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
612
621
" Could not allocate a cl_mem object" );
613
622
}
614
623
mem->mem_id = tmp_mem_id;
624
+ mem->buffer_location_set = buffer_location_set;
615
625
616
626
mem->block_allocation = new_block;
617
627
mem->block_allocation ->mem_obj = mem;
@@ -4161,8 +4171,15 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA(
4161
4171
physical_id = command_queue->device ->def .physical_device_id ;
4162
4172
// SVM memory is not associated with any device.
4163
4173
// Migration should only be moving device global memories.
4164
- int tmp_mem_id =
4165
- acl_get_default_device_global_memory (command_queue->device ->def );
4174
+ // First find the largest mem object.
4175
+ size_t mem_objects_largest_size = 0 ;
4176
+ for (i = 0 ; i < num_mem_objects; ++i) {
4177
+ if (mem_objects[i]->size > mem_objects_largest_size) {
4178
+ mem_objects_largest_size = mem_objects[i]->size ;
4179
+ }
4180
+ }
4181
+ int tmp_mem_id = acl_get_fit_device_global_memory (command_queue->device ->def ,
4182
+ mem_objects_largest_size);
4166
4183
if (tmp_mem_id < 0 ) {
4167
4184
ERR_RET (CL_OUT_OF_RESOURCES, command_queue->context ,
4168
4185
" Can not find default global memory system" );
@@ -4901,11 +4918,8 @@ int acl_mem_is_valid(cl_mem mem) {
4901
4918
}
4902
4919
4903
4920
// Iterate through device's global memories and return the ID of the first
4904
- // device private global memory.
4905
- // This is needed over acl_get_default_memory(dev) because device can have
4906
- // both device private and shared virtual memory.
4907
- // Returns the id of the memory that clCreateBuffer would allocate in by default
4908
- // (i.e. when CL_MEM_USE_HOST_PTR is not used) or -1 if no such memory exists.
4921
+ // device private global memory
4922
+ // TODO: Used for ARM board device query, need to verify correctness
4909
4923
int acl_get_default_device_global_memory (const acl_device_def_t &dev) {
4910
4924
int lowest_gmem_idx = -1 ;
4911
4925
cl_ulong lowest_gmem_begin = 0xFFFFFFFFFFFFFFFF ;
@@ -4931,6 +4945,46 @@ int acl_get_default_device_global_memory(const acl_device_def_t &dev) {
4931
4945
return lowest_gmem_idx;
4932
4946
}
4933
4947
4948
+ // Iterate through device's global memories and return the ID of the first
4949
+ // device private global memory that has enough capacity to fit the allocation.
4950
+ // This is needed over acl_get_default_memory(dev) because device can have
4951
+ // both device private and shared virtual memory.
4952
+ // Returns the id of the memory that clCreateBuffer would allocate in by default
4953
+ // (i.e. when CL_MEM_USE_HOST_PTR is not used) or -1 if no such memory exists.
4954
+ int acl_get_fit_device_global_memory (const acl_device_def_t &dev,
4955
+ const size_t size) {
4956
+ int lowest_gmem_idx = -1 ;
4957
+ cl_ulong lowest_gmem_begin = 0xFFFFFFFFFFFFFFFF ;
4958
+ acl_assert_locked ();
4959
+
4960
+ // If the device has no physical memory then clCreateBuffer will fall back to
4961
+ // allocating device global memory in the default memory.
4962
+ if (!acl_svm_device_supports_physical_memory (dev.physical_device_id ))
4963
+ return acl_get_default_memory (dev);
4964
+ for (unsigned gmem_idx = 0 ;
4965
+ gmem_idx < dev.autodiscovery_def .num_global_mem_systems ; gmem_idx++) {
4966
+ acl_system_global_mem_allocation_type_t alloc_type =
4967
+ dev.autodiscovery_def .global_mem_defs [gmem_idx].allocation_type ;
4968
+ bool is_device_alloc =
4969
+ !alloc_type || (alloc_type & ACL_GLOBAL_MEM_DEVICE_ALLOCATION);
4970
+ bool is_device_private =
4971
+ dev.autodiscovery_def .global_mem_defs [gmem_idx].type ==
4972
+ ACL_GLOBAL_MEM_DEVICE_PRIVATE;
4973
+ if (is_device_private && is_device_alloc &&
4974
+ (size_t )dev.autodiscovery_def .global_mem_defs [gmem_idx].range .begin <
4975
+ lowest_gmem_begin &&
4976
+ ACL_RANGE_SIZE (dev.autodiscovery_def .global_mem_defs [gmem_idx]
4977
+ .get_usable_range ()) >= size) {
4978
+ lowest_gmem_begin =
4979
+ (size_t )dev.autodiscovery_def .global_mem_defs [gmem_idx].range .begin ;
4980
+ lowest_gmem_idx = static_cast <int >(gmem_idx);
4981
+ }
4982
+ }
4983
+
4984
+ // This can return -1, but that means there's no device private memory
4985
+ return lowest_gmem_idx;
4986
+ }
4987
+
4934
4988
// Memory systems are listed with the default memory at the first index
4935
4989
int acl_get_default_memory (const acl_device_def_t &) { return 0 ; }
4936
4990
0 commit comments