diff --git a/src/acl_mem.cpp b/src/acl_mem.cpp index 62258405..59481e9b 100644 --- a/src/acl_mem.cpp +++ b/src/acl_mem.cpp @@ -416,6 +416,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( cl_bool context_has_device_with_physical_mem; unsigned int idevice; cl_uint bank_id = 0; + cl_uint tmp_mem_id = 0; acl_lock(); #ifdef MEM_DEBUG_MSG @@ -431,6 +432,9 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( } bank_id = (cl_uint) * (properties + 1); } break; + case CL_MEM_ALLOC_BUFFER_LOCATION_INTEL: { + tmp_mem_id = (cl_uint) * (properties + 1); + } break; default: { UNLOCK_BAIL_INFO(CL_INVALID_DEVICE, context, "Invalid properties"); } @@ -553,6 +557,7 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( UNLOCK_BAIL_INFO(CL_OUT_OF_HOST_MEMORY, context, "Could not allocate a cl_mem object"); } + mem->mem_id = tmp_mem_id; mem->block_allocation = new_block; mem->block_allocation->mem_obj = mem; @@ -784,7 +789,6 @@ CL_API_ENTRY cl_mem clCreateBufferWithPropertiesINTEL( mem->context = context; mem->flags = flags; mem->size = size; - mem->mem_id = 0; mem->bank_id = 0; if (is_SOC_device()) { @@ -1254,7 +1258,7 @@ CL_API_ENTRY cl_mem CL_API_CALL clCreateSubBufferIntelFPGA( mem->context = context; mem->flags = sub_flags; - mem->mem_id = 0; + mem->mem_id = buffer->mem_id; if (is_SOC_device()) { // HPS DDR is system managed for SoC. @@ -1372,6 +1376,9 @@ CL_API_ENTRY cl_int CL_API_CALL clGetMemObjectInfoIntelFPGA( context = mem->context; switch (param_name) { + case CL_MEM_ALLOC_BUFFER_LOCATION_INTEL: + RESULT_UINT(mem->mem_id); + break; case CL_MEM_TYPE: RESULT_ENUM(mem->mem_object_type); break; @@ -4417,6 +4424,10 @@ void acl_resize_reserved_allocations_for_device(cl_mem mem, unsigned int num_global_mem_systems = def.autodiscovery_def.num_global_mem_systems; + // When we don't know how many memory systems will exist + // Load as much as needed. + num_global_mem_systems = std::max(num_global_mem_systems, mem->mem_id + 1); + // For the simulation flow we don't know how many memory systems will exist // until we load the .aocx, which may not happen until somewhat later. // Reserving space is quite cheap, so reserve space for many memory systems. diff --git a/src/acl_usm.cpp b/src/acl_usm.cpp index df7f4b0d..23b37d9d 100644 --- a/src/acl_usm.cpp +++ b/src/acl_usm.cpp @@ -255,8 +255,8 @@ clDeviceMemAllocINTEL(cl_context context, cl_device_id device, cl_int status; // Use cl_mem for convenience - cl_mem usm_device_buffer = - clCreateBufferIntelFPGA(context, CL_MEM_READ_WRITE, size, NULL, &status); + cl_mem usm_device_buffer = clCreateBufferWithPropertiesINTEL( + context, NULL, CL_MEM_READ_WRITE, size, NULL, &status); if (status != CL_SUCCESS) { UNLOCK_BAIL_INFO(status, context, "Failed to allocate device memory"); } diff --git a/test/acl_mem_test.cpp b/test/acl_mem_test.cpp index 497e2d4c..d9df11e7 100644 --- a/test/acl_mem_test.cpp +++ b/test/acl_mem_test.cpp @@ -2799,6 +2799,52 @@ TEST(acl_mem, case_205751_overlapping_alloc) { CHECK_EQUAL(CL_SUCCESS, clReleaseMemObject(c)); } +TEST(acl_mem, buffer_location_property) { + ACL_LOCKED(acl_print_debug_msg("begin buffer_location_property\n")); + // Test assumes more than 1 global memory space + // Allocate a small buffer (a), then try to allocate two buffers (b, c) of + // size bank_size. Expect the second allocation to fail. + cl_mem a; + cl_int status = CL_SUCCESS; + size_t total_size = ACL_RANGE_SIZE( + m_device[0]->def.autodiscovery_def.global_mem_defs[0].range); + size_t bank_size = total_size / 2; + size_t small_size = bank_size / 1024; + + cl_mem_properties_intel props[] = {CL_MEM_ALLOC_BUFFER_LOCATION_INTEL, 0, 0}; + a = clCreateBufferWithPropertiesINTEL(m_context, props, 0, bank_size, 0, + &status); + ACL_LOCKED(CHECK(acl_mem_is_valid(a))); + CHECK_EQUAL(CL_SUCCESS, status); + assert(a); + CHECK_EQUAL(1, acl_ref_count(a)); + cl_uint read_mem_id = 4; + size_t size_ret; + CHECK_EQUAL(CL_SUCCESS, + clGetMemObjectInfo(a, CL_MEM_ALLOC_BUFFER_LOCATION_INTEL, + sizeof(cl_uint), &read_mem_id, &size_ret)); + CHECK_EQUAL(0, read_mem_id); + + cl_buffer_region test_region = {0, 2}; + cl_mem subbuffer = + clCreateSubBuffer(a, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, + &test_region, &status); + ACL_LOCKED(CHECK(acl_mem_is_valid(subbuffer))); + CHECK_EQUAL(CL_SUCCESS, status); + assert(subbuffer); + CHECK_EQUAL(2, acl_ref_count(a)); + read_mem_id = 4; + CHECK_EQUAL(CL_SUCCESS, + clGetMemObjectInfo(subbuffer, CL_MEM_ALLOC_BUFFER_LOCATION_INTEL, + sizeof(cl_uint), &read_mem_id, &size_ret)); + CHECK_EQUAL(0, read_mem_id); + + ACL_LOCKED(CHECK_EQUAL(acl_bind_buffer_to_device(m_cq->device, a), 1)); + + CHECK_EQUAL(CL_SUCCESS, clReleaseMemObject(subbuffer)); + CHECK_EQUAL(CL_SUCCESS, clReleaseMemObject(a)); +} + MT_TEST(acl_mem, map_buf_bad_flags) { ACL_LOCKED(acl_print_debug_msg("begin buf_bad_flags\n")); cl_int status = CL_SUCCESS;