@@ -264,21 +264,21 @@ static int acl_do_physical_buffer_allocation(unsigned physical_device_id,
264
264
265
265
acl_assert_locked ();
266
266
267
- // When mem_id == 0 it indicates the mem_id is not finalized yet so need to
268
- // set it to a real value here.
267
+ // When mem_id == 0 and buffer location is not set, it indicates the mem_id
268
+ // is not finalized yet so need to set it to a real value here.
269
269
bool glob_mem = mem->block_allocation ->region == &(acl_platform.global_mem );
270
- if (glob_mem && mem->mem_id == 0 ) {
270
+ if (glob_mem && mem->mem_id == 0 && !mem-> buffer_location_set ) {
271
271
// Memory migration between SVM and device global memory is not supported.
272
- // If device supports both, do physical buffer allocation on device global
273
- // memory only.
274
- if ( acl_svm_device_supports_physical_memory (physical_device_id) &&
275
- acl_svm_device_supports_any_svm (physical_device_id)) {
272
+ // When the device supports device global memory, if SVM is also supported,
273
+ // do physical buffer allocation on device global memory only; else if SVM
274
+ // is not supported, we can only allocate on device global memory.
275
+ if ( acl_svm_device_supports_physical_memory (physical_device_id)) {
276
276
assert (
277
277
acl_platform.device [physical_device_id]
278
278
.def .autodiscovery_def .num_global_mem_systems > 0 &&
279
279
" Device is not configured to support SVM and device global memory." );
280
- int tmp_mem_id = acl_get_default_device_global_memory (
281
- acl_platform.device [physical_device_id].def );
280
+ int tmp_mem_id = acl_get_fit_device_global_memory (
281
+ acl_platform.device [physical_device_id].def , mem-> size );
282
282
assert (tmp_mem_id >= 0 &&
283
283
" Device does not have any device global memory." );
284
284
mem->mem_id = (unsigned int )tmp_mem_id;
@@ -420,6 +420,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
420
420
unsigned int idevice;
421
421
cl_uint bank_id = 0 ;
422
422
cl_uint tmp_mem_id = 0 ;
423
+ bool buffer_location_set = false ;
423
424
std::scoped_lock lock{acl_mutex_wrapper};
424
425
425
426
#ifdef MEM_DEBUG_MSG
@@ -436,6 +437,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
436
437
bank_id = (cl_uint) * (properties + 1 );
437
438
} break ;
438
439
case CL_MEM_ALLOC_BUFFER_LOCATION_INTEL: {
440
+ buffer_location_set = true ;
439
441
tmp_mem_id = (cl_uint) * (properties + 1 );
440
442
441
443
// In FullSystem flow, buffer location is always the index of the global
@@ -612,6 +614,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL(
612
614
" Could not allocate a cl_mem object" );
613
615
}
614
616
mem->mem_id = tmp_mem_id;
617
+ mem->buffer_location_set = buffer_location_set;
615
618
616
619
mem->block_allocation = new_block;
617
620
mem->block_allocation ->mem_obj = mem;
@@ -4161,8 +4164,15 @@ ACL_EXPORT CL_API_ENTRY cl_int CL_API_CALL clEnqueueMigrateMemObjectsIntelFPGA(
4161
4164
physical_id = command_queue->device ->def .physical_device_id ;
4162
4165
// SVM memory is not associated with any device.
4163
4166
// Migration should only be moving device global memories.
4164
- int tmp_mem_id =
4165
- acl_get_default_device_global_memory (command_queue->device ->def );
4167
+ // First find the largest mem object.
4168
+ size_t mem_objects_largest_size = 0 ;
4169
+ for (i = 0 ; i < num_mem_objects; ++i) {
4170
+ if (mem_objects[i]->size > mem_objects_largest_size) {
4171
+ mem_objects_largest_size = mem_objects[i]->size ;
4172
+ }
4173
+ }
4174
+ int tmp_mem_id = acl_get_fit_device_global_memory (command_queue->device ->def ,
4175
+ mem_objects_largest_size);
4166
4176
if (tmp_mem_id < 0 ) {
4167
4177
ERR_RET (CL_OUT_OF_RESOURCES, command_queue->context ,
4168
4178
" Can not find default global memory system" );
@@ -4901,11 +4911,8 @@ int acl_mem_is_valid(cl_mem mem) {
4901
4911
}
4902
4912
4903
4913
// Iterate through device's global memories and return the ID of the first
4904
- // device private global memory.
4905
- // This is needed over acl_get_default_memory(dev) because device can have
4906
- // both device private and shared virtual memory.
4907
- // Returns the id of the memory that clCreateBuffer would allocate in by default
4908
- // (i.e. when CL_MEM_USE_HOST_PTR is not used) or -1 if no such memory exists.
4914
+ // device private global memory
4915
+ // TODO: Used for ARM board device query, need to verify correctness
4909
4916
int acl_get_default_device_global_memory (const acl_device_def_t &dev) {
4910
4917
int lowest_gmem_idx = -1 ;
4911
4918
cl_ulong lowest_gmem_begin = 0xFFFFFFFFFFFFFFFF ;
@@ -4931,6 +4938,46 @@ int acl_get_default_device_global_memory(const acl_device_def_t &dev) {
4931
4938
return lowest_gmem_idx;
4932
4939
}
4933
4940
4941
+ // Iterate through device's global memories and return the ID of the first
4942
+ // device private global memory that has enough capacity to fit the allocation.
4943
+ // This is needed over acl_get_default_memory(dev) because device can have
4944
+ // both device private and shared virtual memory.
4945
+ // Returns the id of the memory that clCreateBuffer would allocate in by default
4946
+ // (i.e. when CL_MEM_USE_HOST_PTR is not used) or -1 if no such memory exists.
4947
+ int acl_get_fit_device_global_memory (const acl_device_def_t &dev,
4948
+ const size_t size) {
4949
+ int lowest_gmem_idx = -1 ;
4950
+ cl_ulong lowest_gmem_begin = 0xFFFFFFFFFFFFFFFF ;
4951
+ acl_assert_locked ();
4952
+
4953
+ // If the device has no physical memory then clCreateBuffer will fall back to
4954
+ // allocating device global memory in the default memory.
4955
+ if (!acl_svm_device_supports_physical_memory (dev.physical_device_id ))
4956
+ return acl_get_default_memory (dev);
4957
+ for (unsigned gmem_idx = 0 ;
4958
+ gmem_idx < dev.autodiscovery_def .num_global_mem_systems ; gmem_idx++) {
4959
+ acl_system_global_mem_allocation_type_t alloc_type =
4960
+ dev.autodiscovery_def .global_mem_defs [gmem_idx].allocation_type ;
4961
+ bool is_device_alloc =
4962
+ !alloc_type || (alloc_type & ACL_GLOBAL_MEM_DEVICE_ALLOCATION);
4963
+ bool is_device_private =
4964
+ dev.autodiscovery_def .global_mem_defs [gmem_idx].type ==
4965
+ ACL_GLOBAL_MEM_DEVICE_PRIVATE;
4966
+ if (is_device_private && is_device_alloc &&
4967
+ (size_t )dev.autodiscovery_def .global_mem_defs [gmem_idx].range .begin <
4968
+ lowest_gmem_begin &&
4969
+ ACL_RANGE_SIZE (dev.autodiscovery_def .global_mem_defs [gmem_idx]
4970
+ .get_usable_range ()) >= size) {
4971
+ lowest_gmem_begin =
4972
+ (size_t )dev.autodiscovery_def .global_mem_defs [gmem_idx].range .begin ;
4973
+ lowest_gmem_idx = static_cast <int >(gmem_idx);
4974
+ }
4975
+ }
4976
+
4977
+ // This can return -1, but that means there's no device private memory
4978
+ return lowest_gmem_idx;
4979
+ }
4980
+
4934
4981
// Memory systems are listed with the default memory at the first index
4935
4982
int acl_get_default_memory (const acl_device_def_t &) { return 0 ; }
4936
4983
0 commit comments