Skip to content

Commit 0bb2017

Browse files
committed
Update clGetDeviceInfo to follow OpenCL 1.2 spec
1 parent 34af2d2 commit 0bb2017

File tree

2 files changed

+79
-11
lines changed

2 files changed

+79
-11
lines changed

src/acl_device.cpp

Lines changed: 78 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
209209
RESULT_INT(0);
210210
break;
211211
case CL_DEVICE_GLOBAL_MEM_SIZE: {
212+
#ifdef __arm__
213+
// TODO: legacy code here, need to verify correctness with ARM board
212214
auto gmem_id = acl_get_default_device_global_memory(device->def);
213215
if (gmem_id < 0) {
214216
RESULT_INT(0);
@@ -217,10 +219,20 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
217219
cl_ulong size =
218220
ACL_RANGE_SIZE(device->def.autodiscovery_def.global_mem_defs[gmem_id]
219221
.get_usable_range());
220-
#ifdef __arm__
221222
// on SoC board, two DDR systems are not equivalent
222223
// so only half can be accessed with a single alloc.
223224
size /= 2;
225+
#else
226+
cl_ulong size = 0;
227+
for (unsigned gmem_idx = 0;
228+
gmem_idx < device->def.autodiscovery_def.num_global_mem_systems;
229+
gmem_idx++) {
230+
if (device->def.autodiscovery_def.global_mem_defs[gmem_idx].type ==
231+
ACL_GLOBAL_MEM_DEVICE_PRIVATE) {
232+
size += ACL_RANGE_SIZE(
233+
device->def.autodiscovery_def.global_mem_defs[gmem_idx].range);
234+
}
235+
}
224236
#endif
225237
RESULT_ULONG(size);
226238
break;
@@ -251,13 +263,9 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
251263
RESULT_UINT(acl_platform.max_constant_args);
252264
break;
253265

254-
// "desktop" profile says global memory must be at least 128MB
255-
// "embedded" profile says global memory must be at least 1MB
256266
case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: {
257-
// Constant memory is global memory.
258-
// However conformance_test_api min_max_constant_buffer_size
259-
// expects to allocate two buffers of the size we say here.
260-
// So be a shade conservative and cut it down by 4.
267+
#ifdef __arm__
268+
// TODO: legacy code here, need to verify correctness with ARM board
261269
auto gmem_id = acl_get_default_device_global_memory(device->def);
262270
if (gmem_id < 0) {
263271
RESULT_INT(0);
@@ -267,13 +275,46 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
267275
ACL_RANGE_SIZE(device->def.autodiscovery_def.global_mem_defs[gmem_id]
268276
.get_usable_range()) /
269277
4;
270-
#ifdef __arm__
271-
// see above
278+
// Cut by 2 again, see comment for CL_DEVICE_GLOBAL_MEM_SIZE
272279
size /= 2;
280+
#else
281+
// Return the maximum size of a single allocation to the constant memory
282+
// (i.e., global memory)
283+
cl_ulong size = 0;
284+
for (unsigned gmem_idx = 0;
285+
gmem_idx < device->def.autodiscovery_def.num_global_mem_systems;
286+
gmem_idx++) {
287+
if (device->def.autodiscovery_def.global_mem_defs[gmem_idx].type ==
288+
ACL_GLOBAL_MEM_DEVICE_PRIVATE) {
289+
cl_ulong curr_size = 0;
290+
// TODO: investigate if ACL_MEM_ALIGN of 0x400 is still required to
291+
// perform device allocations to memory with 0 starting address
292+
if (device->def.autodiscovery_def.global_mem_defs[gmem_idx]
293+
.allocation_type &
294+
ACL_GLOBAL_MEM_DEVICE_ALLOCATION) {
295+
curr_size = ACL_RANGE_SIZE(
296+
device->def.autodiscovery_def.global_mem_defs[gmem_idx]
297+
.get_usable_range());
298+
} else {
299+
curr_size = ACL_RANGE_SIZE(
300+
device->def.autodiscovery_def.global_mem_defs[gmem_idx].range);
301+
}
302+
if (curr_size > size) {
303+
size = curr_size;
304+
}
305+
}
306+
}
307+
// Note: devices not of type CL_DEVICE_TYPE_CUSTOM and conformant
308+
// to OpenCL 1.2 spec will return size at least of 64KB here
309+
// TODO: the OpenCL conformance test api:test_min_max_constant_buffer_size
310+
// expects to allocate two buffers of the size returned, need to
311+
// confirm conformance here
273312
#endif
274313
RESULT_ULONG(size);
275314
} break;
276315
case CL_DEVICE_MAX_MEM_ALLOC_SIZE: {
316+
#ifdef __arm__
317+
// TODO: legacy code here, need to verify correctness with ARM board
277318
auto gmem_id = acl_get_default_device_global_memory(device->def);
278319
if (gmem_id < 0) {
279320
RESULT_INT(0);
@@ -282,7 +323,6 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
282323
cl_ulong size =
283324
ACL_RANGE_SIZE(device->def.autodiscovery_def.global_mem_defs[gmem_id]
284325
.get_usable_range());
285-
#ifdef __arm__
286326
// on SoC board, two DDR systems are not equivalent
287327
// so only half can be accessed with a single alloc.
288328

@@ -294,6 +334,34 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
294334
} else {
295335
size = size / 8;
296336
}
337+
#else
338+
cl_ulong size = 0;
339+
for (unsigned gmem_idx = 0;
340+
gmem_idx < device->def.autodiscovery_def.num_global_mem_systems;
341+
gmem_idx++) {
342+
if (device->def.autodiscovery_def.global_mem_defs[gmem_idx].type ==
343+
ACL_GLOBAL_MEM_DEVICE_PRIVATE) {
344+
cl_ulong curr_size = 0;
345+
// TODO: investigate if ACL_MEM_ALIGN of 0x400 is still required to
346+
// perform device allocations to memory with 0 starting address
347+
if (device->def.autodiscovery_def.global_mem_defs[gmem_idx]
348+
.allocation_type &
349+
ACL_GLOBAL_MEM_DEVICE_ALLOCATION) {
350+
curr_size = ACL_RANGE_SIZE(
351+
device->def.autodiscovery_def.global_mem_defs[gmem_idx]
352+
.get_usable_range());
353+
} else {
354+
curr_size = ACL_RANGE_SIZE(
355+
device->def.autodiscovery_def.global_mem_defs[gmem_idx].range);
356+
}
357+
if (curr_size > size) {
358+
size = curr_size;
359+
}
360+
}
361+
}
362+
// Note: devices not of type CL_DEVICE_TYPE_CUSTOM and
363+
// conformant to OpenCL 1.2 spec will return size at least of
364+
// max(CL_DEVICE_GLOBAL_MEM_SIZE/4, 1*1024*1024) here
297365
#endif
298366
RESULT_ULONG(size);
299367
} break;

src/acl_mem.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5663,7 +5663,7 @@ void acl_mem_migrate_buffer(void *user_data, acl_device_op_t *op) {
56635663
memory_migration.src_mem_list[index].destination_mem_id;
56645664

56655665
#ifdef MEM_DEBUG_MSG
5666-
printf("object %d, mem %zx, count %d:", index, (size_t)src_mem,
5666+
printf("object %d, mem %zx, count %d:\n", index, (size_t)src_mem,
56675667
src_mem->reserved_allocations_count[dest_device][dest_mem_id]);
56685668
#endif
56695669

0 commit comments

Comments
 (0)