diff --git a/src/acl_kernel_if.cpp b/src/acl_kernel_if.cpp index c0b836c5..9501716a 100644 --- a/src/acl_kernel_if.cpp +++ b/src/acl_kernel_if.cpp @@ -715,6 +715,29 @@ int acl_kernel_if_init(acl_kernel_if *kern, acl_bsp_io bsp_io, auto parse_result = acl_load_device_def_from_str( std::string(acl_shipped_board_cfgs[1]), sysdef->device[0].autodiscovery_def, err_msg); + // Fill in definition for all device global memory + // Simulator does not have any global memory interface information until the + // actual aocx is loaded. (Note this is only a problem for simulator not + // hardware run, in hardware run, we can communicate with BSP to query + // memory interface information). In the flow today, the USM device + // allocation call happens before aocx is loaded. The aocx is loaded when + // clCreateProgram is called, which typically happen on first kernel launch + // in sycl runtime. In order to prevent the USM device allocation from + // failing on mutli global memory system, initialize as much global memory + // system as possible for simulation flow. However there are a few downside: + // 1. The address range/size may not be exactly the same as the one that is + // in aocx, but this is not too large of a problem because runtime first fit + // allocation algorithm will fill the lowest address range first. Unless + // user requested more than what is availble. + // 2. it potentially occupied more space than required + // 3. will not error out when user requested a non-existing device global + // memory because we are using ACL_MAX_GLOBAL_MEM for num_global_mem_systems + sysdef->device[0].autodiscovery_def.num_global_mem_systems = + ACL_MAX_GLOBAL_MEM; + for (int i = 0; i < ACL_MAX_GLOBAL_MEM; i++) { + sysdef->device[0].autodiscovery_def.global_mem_defs[i] = + sysdef->device[0].autodiscovery_def.global_mem_defs[0]; + } if (parse_result) sysdef->num_devices = 1; // Override the device name to the simulator. diff --git a/src/acl_mem.cpp b/src/acl_mem.cpp index 59481e9b..bb153f01 100644 --- a/src/acl_mem.cpp +++ b/src/acl_mem.cpp @@ -4424,19 +4424,6 @@ void acl_resize_reserved_allocations_for_device(cl_mem mem, unsigned int num_global_mem_systems = def.autodiscovery_def.num_global_mem_systems; - // When we don't know how many memory systems will exist - // Load as much as needed. - num_global_mem_systems = std::max(num_global_mem_systems, mem->mem_id + 1); - - // For the simulation flow we don't know how many memory systems will exist - // until we load the .aocx, which may not happen until somewhat later. - // Reserving space is quite cheap, so reserve space for many memory systems. - int offline_mode = 0; - (void)acl_get_offline_device_user_setting(&offline_mode); - if (offline_mode == ACL_CONTEXT_MPSIM) { - num_global_mem_systems = std::max(num_global_mem_systems, 128u); - } - #ifdef MEM_DEBUG_MSG printf( "resizing reserved_allocations, physical_device_id:%u, target_size:%u \n",