Skip to content

Commit ab79b20

Browse files
committed
Update clGetDeviceInfo to follow OpenCL 1.2 spec
1 parent 34af2d2 commit ab79b20

File tree

2 files changed

+60
-11
lines changed

2 files changed

+60
-11
lines changed

src/acl_device.cpp

Lines changed: 59 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
// SPDX-License-Identifier: BSD-3-Clause
33

44
// System headers.
5+
#include <algorithm>
56
#include <assert.h>
67
#include <string.h>
78

@@ -209,6 +210,8 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
209210
RESULT_INT(0);
210211
break;
211212
case CL_DEVICE_GLOBAL_MEM_SIZE: {
213+
#ifdef __arm__
214+
// TODO: legacy code here, need to verify correctness with ARM board
212215
auto gmem_id = acl_get_default_device_global_memory(device->def);
213216
if (gmem_id < 0) {
214217
RESULT_INT(0);
@@ -217,10 +220,20 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
217220
cl_ulong size =
218221
ACL_RANGE_SIZE(device->def.autodiscovery_def.global_mem_defs[gmem_id]
219222
.get_usable_range());
220-
#ifdef __arm__
221223
// on SoC board, two DDR systems are not equivalent
222224
// so only half can be accessed with a single alloc.
223225
size /= 2;
226+
#else
227+
cl_ulong size = 0;
228+
for (unsigned gmem_idx = 0;
229+
gmem_idx < device->def.autodiscovery_def.num_global_mem_systems;
230+
gmem_idx++) {
231+
if (device->def.autodiscovery_def.global_mem_defs[gmem_idx].type ==
232+
ACL_GLOBAL_MEM_DEVICE_PRIVATE) {
233+
size += ACL_RANGE_SIZE(
234+
device->def.autodiscovery_def.global_mem_defs[gmem_idx].range);
235+
}
236+
}
224237
#endif
225238
RESULT_ULONG(size);
226239
break;
@@ -251,13 +264,9 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
251264
RESULT_UINT(acl_platform.max_constant_args);
252265
break;
253266

254-
// "desktop" profile says global memory must be at least 128MB
255-
// "embedded" profile says global memory must be at least 1MB
256267
case CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE: {
257-
// Constant memory is global memory.
258-
// However conformance_test_api min_max_constant_buffer_size
259-
// expects to allocate two buffers of the size we say here.
260-
// So be a shade conservative and cut it down by 4.
268+
#ifdef __arm__
269+
// TODO: legacy code here, need to verify correctness with ARM board
261270
auto gmem_id = acl_get_default_device_global_memory(device->def);
262271
if (gmem_id < 0) {
263272
RESULT_INT(0);
@@ -267,13 +276,32 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
267276
ACL_RANGE_SIZE(device->def.autodiscovery_def.global_mem_defs[gmem_id]
268277
.get_usable_range()) /
269278
4;
270-
#ifdef __arm__
271-
// see above
279+
// Cut by 2 again, see comment for CL_DEVICE_GLOBAL_MEM_SIZE
272280
size /= 2;
281+
#else
282+
// Constant memory is global memory.
283+
cl_ulong size = 0;
284+
for (unsigned gmem_idx = 0;
285+
gmem_idx < device->def.autodiscovery_def.num_global_mem_systems;
286+
gmem_idx++) {
287+
if (device->def.autodiscovery_def.global_mem_defs[gmem_idx].type ==
288+
ACL_GLOBAL_MEM_DEVICE_PRIVATE) {
289+
size += ACL_RANGE_SIZE(
290+
device->def.autodiscovery_def.global_mem_defs[gmem_idx].range);
291+
}
292+
}
293+
// Note that OpenCL 1.2 specifies the minimum value for devices not of
294+
// type CL_DEVICE_TYPE_CUSTOM to be 64KB, however, the OpenCL conformance
295+
// test api:test_min_max_constant_buffer_size expects to allocate two
296+
// buffers of the size returned here, so following the spec may result
297+
// in conformance test failures.
298+
size = std::max(size, (cl_ulong)64 * 1024);
273299
#endif
274300
RESULT_ULONG(size);
275301
} break;
276302
case CL_DEVICE_MAX_MEM_ALLOC_SIZE: {
303+
#ifdef __arm__
304+
// TODO: legacy code here, need to verify correctness with ARM board
277305
auto gmem_id = acl_get_default_device_global_memory(device->def);
278306
if (gmem_id < 0) {
279307
RESULT_INT(0);
@@ -282,7 +310,6 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
282310
cl_ulong size =
283311
ACL_RANGE_SIZE(device->def.autodiscovery_def.global_mem_defs[gmem_id]
284312
.get_usable_range());
285-
#ifdef __arm__
286313
// on SoC board, two DDR systems are not equivalent
287314
// so only half can be accessed with a single alloc.
288315

@@ -294,6 +321,28 @@ CL_API_ENTRY cl_int CL_API_CALL clGetDeviceInfoIntelFPGA(
294321
} else {
295322
size = size / 8;
296323
}
324+
#else
325+
cl_ulong global_mem_size = 0;
326+
cl_ulong size = 0;
327+
for (unsigned gmem_idx = 0;
328+
gmem_idx < device->def.autodiscovery_def.num_global_mem_systems;
329+
gmem_idx++) {
330+
if (device->def.autodiscovery_def.global_mem_defs[gmem_idx].type ==
331+
ACL_GLOBAL_MEM_DEVICE_PRIVATE) {
332+
global_mem_size += ACL_RANGE_SIZE(
333+
device->def.autodiscovery_def.global_mem_defs[gmem_idx].range);
334+
cl_ulong curr_size = ACL_RANGE_SIZE(
335+
device->def.autodiscovery_def.global_mem_defs[gmem_idx]
336+
.get_usable_range());
337+
if (curr_size > size) {
338+
size = curr_size;
339+
}
340+
}
341+
}
342+
// OpenCL 1.2: min value = max(CL_DEVICE_GLOBAL_MEM_SIZE/4, 1*1024*1024)
343+
// for devices that are not of type CL_DEVICE_TYPE_CUSTOM
344+
size = std::max(size,
345+
std::max(global_mem_size / 4, (cl_ulong)1 * 1024 * 1024));
297346
#endif
298347
RESULT_ULONG(size);
299348
} break;

src/acl_mem.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5663,7 +5663,7 @@ void acl_mem_migrate_buffer(void *user_data, acl_device_op_t *op) {
56635663
memory_migration.src_mem_list[index].destination_mem_id;
56645664

56655665
#ifdef MEM_DEBUG_MSG
5666-
printf("object %d, mem %zx, count %d:", index, (size_t)src_mem,
5666+
printf("object %d, mem %zx, count %d:\n", index, (size_t)src_mem,
56675667
src_mem->reserved_allocations_count[dest_device][dest_mem_id]);
56685668
#endif
56695669

0 commit comments

Comments
 (0)