Skip to content

Commit f7af8f9

Browse files
committed
Implement simulation pre-program autodiscovery string load
1 parent 3d4e79f commit f7af8f9

File tree

5 files changed

+6
-261
lines changed

5 files changed

+6
-261
lines changed

include/acl_mem.h

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,10 +32,6 @@ int acl_submit_mem_transfer_device_op(cl_event event);
3232

3333
int acl_submit_migrate_mem_device_op(cl_event event);
3434

35-
int acl_realloc_buffer_for_simulator(cl_mem mem,
36-
const unsigned int physical_device_id,
37-
const unsigned int mem_id);
38-
3935
// Actually execute the memory transfer device operation.
4036
// In the normal case source and destination are different, in which case
4137
// the HAL is called and the transfer is non-blocking.

src/acl_kernel.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2918,12 +2918,6 @@ static cl_int l_copy_and_adjust_arguments_for_device(
29182918
[needed_mem_id]);
29192919
#endif
29202920

2921-
if (acl_platform.offline_mode == ACL_CONTEXT_MPSIM) {
2922-
if (!acl_realloc_buffer_for_simulator(mem_obj, needed_physical_id,
2923-
needed_mem_id)) {
2924-
return CL_MEM_OBJECT_ALLOCATION_FAILURE;
2925-
}
2926-
}
29272921
// copy the address of the reserved allocation into the invocation
29282922
// image:
29292923
const void *mem_addr =

src/acl_kernel_if.cpp

Lines changed: 6 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -716,43 +716,6 @@ int acl_kernel_if_init(acl_kernel_if *kern, acl_bsp_io bsp_io,
716716

717717
kern->autorun_profiling_kernel_id = -1;
718718

719-
// The simulator doesn't have any kernel interface information until the aocx
720-
// is loaded, which happens later.
721-
if (acl_platform.offline_mode == ACL_CONTEXT_MPSIM) {
722-
std::string err_msg;
723-
auto parse_result = acl_load_device_def_from_str(
724-
acl_shipped_board_cfgs[0].cfg, sysdef->device[0].autodiscovery_def,
725-
err_msg);
726-
// Fill in definition for all device global memory
727-
// Simulator does not have any global memory interface information until the
728-
// actual aocx is loaded. (Note this is only a problem for simulator not
729-
// hardware run, in hardware run, we can communicate with BSP to query
730-
// memory interface information). In the flow today, the USM device
731-
// allocation call happens before aocx is loaded. The aocx is loaded when
732-
// clCreateProgram is called, which typically happen on first kernel launch
733-
// in sycl runtime. In order to prevent the USM device allocation from
734-
// failing on mutli global memory system, initialize as much global memory
735-
// system as possible for simulation flow. However there are a few downside:
736-
// 1. The address range/size may not be exactly the same as the one that is
737-
// in aocx, but this is not too large of a problem because runtime first fit
738-
// allocation algorithm will fill the lowest address range first. Unless
739-
// user requested more than what is availble.
740-
// 2. it potentially occupied more space than required
741-
// 3. will not error out when user requested a non-existing device global
742-
// memory because we are using ACL_MAX_GLOBAL_MEM for num_global_mem_systems
743-
sysdef->device[0].autodiscovery_def.num_global_mem_systems =
744-
ACL_MAX_GLOBAL_MEM;
745-
for (int i = 0; i < ACL_MAX_GLOBAL_MEM; i++) {
746-
sysdef->device[0].autodiscovery_def.global_mem_defs[i] =
747-
sysdef->device[0].autodiscovery_def.global_mem_defs[0];
748-
}
749-
if (parse_result)
750-
sysdef->num_devices = 1;
751-
// Override the device name to the simulator.
752-
sysdef->device[0].autodiscovery_def.name = ACL_MPSIM_DEVICE_NAME;
753-
return 0;
754-
}
755-
756719
if (check_version_id(kern) != 0) {
757720
kern->io.printf("Hardware version ID differs from version expected by "
758721
"software. Either:\n");
@@ -846,8 +809,12 @@ int acl_kernel_if_init(acl_kernel_if *kern, acl_bsp_io bsp_io,
846809
return -1;
847810
}
848811

849-
result = acl_kernel_if_update(
850-
sysdef->device[kern->physical_device_id].autodiscovery_def, kern);
812+
if (acl_platform.offline_mode == ACL_CONTEXT_MPSIM) {
813+
sysdef->device[kern->physical_device_id].autodiscovery_def.name = ACL_MPSIM_DEVICE_NAME;
814+
} else {
815+
result = acl_kernel_if_update(
816+
sysdef->device[kern->physical_device_id].autodiscovery_def, kern);
817+
}
851818

852819
return result;
853820
}

src/acl_mem.cpp

Lines changed: 0 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -6843,99 +6843,6 @@ void acl_copy_device_buffers_from_host_after_programming(
68436843
}
68446844
}
68456845

6846-
// Simulator does not have any global memory interface information before
6847-
// reprogram, the runtime initializes device def to have the same global
6848-
// memory address range obtained from a autodiscovery string predefined in
6849-
// acl_shipped_board_cfgs.h
6850-
// When a buffer is created with the buffer location property specifying a
6851-
// global memory whose address range lies beyond the range defined in the
6852-
// default autodiscovery string, and is written before the device reprogram,
6853-
// the write will bind the buffer to the wrong address range, causing issues
6854-
// when running the kernel
6855-
// The following function do a memory copy for the buffers binded to the
6856-
// wrong address range to the right one after the global memory information
6857-
// becomes available and before the kernel launch
6858-
// Returns 1 on success and 0 on failure
6859-
int acl_realloc_buffer_for_simulator(cl_mem mem,
6860-
const unsigned int physical_device_id,
6861-
const unsigned int mem_id) {
6862-
// Only reallocate and migrate if mem resides in global memory
6863-
if (mem->block_allocation->region != &(acl_platform.global_mem)) {
6864-
return 1;
6865-
}
6866-
6867-
const acl_addr_range_t global_mem_range =
6868-
acl_platform.device[physical_device_id]
6869-
.def.autodiscovery_def.global_mem_defs[mem_id]
6870-
.get_usable_range();
6871-
6872-
// Save old address
6873-
int mem_on_host;
6874-
void *const old_mem_address = l_get_address_of_writable_copy(
6875-
mem, physical_device_id, &mem_on_host, CL_FALSE);
6876-
6877-
// The mem copy is only needed if the buffer is bound to the device
6878-
// before global memory range is confirmed (i.e., before reprogram), and
6879-
// assumed address range before reprogram is different from actual
6880-
// Therefore, check if:
6881-
// 1. allocation is deferred (if so auto migration will happen)
6882-
// 2. buffer is on host
6883-
// 3. buffer appears to be "at the destination"
6884-
// 4. block allocation is outside the global memory range
6885-
if (!mem->allocation_deferred &&
6886-
!(mem->mem_cpy_host_ptr_pending || mem_on_host) &&
6887-
(mem->block_allocation ==
6888-
mem->reserved_allocations[physical_device_id][mem_id]) &&
6889-
(ACL_STRIP_PHYSICAL_ID(mem->block_allocation->range.begin) >=
6890-
global_mem_range.next ||
6891-
ACL_STRIP_PHYSICAL_ID(mem->block_allocation->range.next) <
6892-
global_mem_range.begin)) {
6893-
6894-
// mem_id should align if block allocation is the same as reserved
6895-
// allocation
6896-
assert(mem->mem_id == mem_id);
6897-
6898-
// Okay to set this to NULL, memory tracked in mem->block_allocation
6899-
mem->reserved_allocations[physical_device_id][mem_id] = NULL;
6900-
// We will reallocate block, so remove it from linked list first
6901-
acl_block_allocation_t **block_ptr =
6902-
&(mem->block_allocation->region->first_block);
6903-
// try to find the mem->block_allocation in the linked list, error if
6904-
// the block is not found before reaching the end of list
6905-
while (true) {
6906-
acl_block_allocation_t *const block = *block_ptr;
6907-
assert(block != NULL);
6908-
if (block == mem->block_allocation) {
6909-
*block_ptr = block->next_block_in_region;
6910-
break;
6911-
}
6912-
// Advance to the next block in the region
6913-
block_ptr = &(block->next_block_in_region);
6914-
}
6915-
// Reallocate buffer range
6916-
if (!acl_do_physical_buffer_allocation(physical_device_id, mem)) {
6917-
return 0;
6918-
}
6919-
6920-
void *const new_mem_address =
6921-
mem->reserved_allocations[physical_device_id][mem_id]->range.begin;
6922-
const acl_hal_t *const hal = acl_get_hal();
6923-
6924-
#ifdef MEM_DEBUG_MSG
6925-
printf("reallocating mem obj for simulation after getting global mem "
6926-
"info, device %u ([0]%zx -> [0]%zx) ",
6927-
physical_device_id, (size_t)(ACL_STRIP_PHYSICAL_ID(old_mem_address)),
6928-
(size_t)(ACL_STRIP_PHYSICAL_ID(new_mem_address)));
6929-
#endif
6930-
6931-
// do blocking copy, this is for simulation only so performance is
6932-
// probably not a huge concern
6933-
hal->copy_globalmem_to_globalmem(0, old_mem_address, new_mem_address,
6934-
mem->size);
6935-
}
6936-
return 1;
6937-
}
6938-
69396846
static void acl_print_all_mem_in_region(acl_mem_region_t *region);
69406847
void acl_print_all_mem(void) {
69416848
acl_assert_locked();

test/acl_mem_test.cpp

Lines changed: 0 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -2906,125 +2906,6 @@ TEST(acl_mem, buffer_location_property) {
29062906
CHECK_EQUAL(CL_SUCCESS, clReleaseMemObject(a));
29072907
}
29082908

2909-
TEST(acl_mem, simulation_copy_buffer) {
2910-
// Test mocks a simulation run where a predefined autodiscovery string
2911-
// is loaded at the beginning of the run with default global memory
2912-
// set-up that doesn't match actual. It checks whether the function
2913-
// acl_realloc_buffer_for_simulator moves buffer to the right global
2914-
// memory address range after a fake reprogram updates the global
2915-
// memory configuration.
2916-
cl_mem buffer;
2917-
cl_int status = CL_SUCCESS;
2918-
int input_data = 0xaaaaaaaa;
2919-
int output_data = 0x55555555;
2920-
size_t total_size = ACL_RANGE_SIZE(
2921-
m_device[0]->def.autodiscovery_def.global_mem_defs[0].range);
2922-
size_t global_mem_size = total_size / 2;
2923-
2924-
// save original autodiscovery def
2925-
acl_device_def_autodiscovery_t orig_def = m_device[0]->def.autodiscovery_def;
2926-
// create a fake multi global memory system where unit test global
2927-
// memory is split into 2 halves for the 2 global memories
2928-
acl_device_def_autodiscovery_t actual_def =
2929-
m_device[0]->def.autodiscovery_def;
2930-
actual_def.num_global_mem_systems = 2;
2931-
actual_def.global_mem_defs[1].range.next =
2932-
actual_def.global_mem_defs[0].range.next;
2933-
actual_def.global_mem_defs[0].range.next =
2934-
(char *)actual_def.global_mem_defs[0].range.begin + global_mem_size;
2935-
actual_def.global_mem_defs[1].range.begin =
2936-
actual_def.global_mem_defs[0].range.next;
2937-
2938-
// simulate loading from a predefined autodiscovery string in
2939-
// acl_shipped_board_cfgs.h
2940-
m_device[0]->def.autodiscovery_def.num_global_mem_systems =
2941-
ACL_MAX_GLOBAL_MEM;
2942-
for (int i = 0; i < ACL_MAX_GLOBAL_MEM; i++) {
2943-
m_device[0]->def.autodiscovery_def.global_mem_defs[i] =
2944-
actual_def.global_mem_defs[0];
2945-
}
2946-
2947-
// Create memory with buffer location property
2948-
cl_mem_properties_intel props[] = {CL_MEM_ALLOC_BUFFER_LOCATION_INTEL, 1, 0};
2949-
buffer = clCreateBufferWithPropertiesINTEL(m_context, props, 0, sizeof(int),
2950-
0, &status);
2951-
ACL_LOCKED(CHECK(acl_mem_is_valid(buffer)));
2952-
CHECK_EQUAL(CL_SUCCESS, status);
2953-
assert(buffer);
2954-
CHECK_EQUAL(1, acl_ref_count(buffer));
2955-
2956-
// Check if the buffer has the right mem id
2957-
cl_uint read_mem_id = 4; // set to a dummy value
2958-
size_t size_ret;
2959-
CHECK_EQUAL(CL_SUCCESS,
2960-
clGetMemObjectInfo(buffer, CL_MEM_ALLOC_BUFFER_LOCATION_INTEL,
2961-
sizeof(cl_uint), &read_mem_id, &size_ret));
2962-
CHECK_EQUAL(1, read_mem_id);
2963-
2964-
// Enqueue write binds buffer to wrong global memory address range
2965-
status = clEnqueueWriteBuffer(m_cq, buffer, CL_TRUE, 0, sizeof(int),
2966-
&input_data, 0, NULL, NULL);
2967-
CHECK_EQUAL(CL_SUCCESS, status);
2968-
CHECK(ACL_STRIP_PHYSICAL_ID(buffer->block_allocation->range.begin) >=
2969-
m_device[0]
2970-
->def.autodiscovery_def.global_mem_defs[1]
2971-
.get_usable_range()
2972-
.begin);
2973-
CHECK(ACL_STRIP_PHYSICAL_ID(buffer->block_allocation->range.next) <
2974-
m_device[0]
2975-
->def.autodiscovery_def.global_mem_defs[1]
2976-
.get_usable_range()
2977-
.next);
2978-
2979-
// Pretend a reprogram happened for simulation, update global memory info
2980-
m_device[0]->def.autodiscovery_def = actual_def;
2981-
CHECK_EQUAL(2, m_device[0]->def.autodiscovery_def.num_global_mem_systems);
2982-
CHECK(m_device[0]
2983-
->def.autodiscovery_def.global_mem_defs[0]
2984-
.get_usable_range()
2985-
.begin != m_device[0]
2986-
->def.autodiscovery_def.global_mem_defs[1]
2987-
.get_usable_range()
2988-
.begin);
2989-
CHECK(m_device[0]
2990-
->def.autodiscovery_def.global_mem_defs[0]
2991-
.get_usable_range()
2992-
.next != m_device[0]
2993-
->def.autodiscovery_def.global_mem_defs[1]
2994-
.get_usable_range()
2995-
.next);
2996-
CHECK(ACL_STRIP_PHYSICAL_ID(buffer->block_allocation->range.begin) <
2997-
m_device[0]
2998-
->def.autodiscovery_def.global_mem_defs[1]
2999-
.get_usable_range()
3000-
.begin);
3001-
3002-
// Now call the migration function
3003-
ACL_LOCKED(CHECK_EQUAL(acl_realloc_buffer_for_simulator(buffer, 0, 1), 1));
3004-
CHECK(ACL_STRIP_PHYSICAL_ID(buffer->block_allocation->range.begin) >=
3005-
m_device[0]
3006-
->def.autodiscovery_def.global_mem_defs[1]
3007-
.get_usable_range()
3008-
.begin);
3009-
CHECK(ACL_STRIP_PHYSICAL_ID(buffer->block_allocation->range.next) <
3010-
m_device[0]
3011-
->def.autodiscovery_def.global_mem_defs[1]
3012-
.get_usable_range()
3013-
.next);
3014-
3015-
// Enqueue a blocking read to the right location and check data
3016-
status = clEnqueueReadBuffer(m_cq, buffer, CL_TRUE, 0, sizeof(int),
3017-
&output_data, 0, NULL, NULL);
3018-
CHECK_EQUAL(CL_SUCCESS, status);
3019-
3020-
// Check data preservation
3021-
CHECK_EQUAL(input_data, output_data);
3022-
3023-
// restore and clean up
3024-
m_device[0]->def.autodiscovery_def = orig_def;
3025-
CHECK_EQUAL(CL_SUCCESS, clReleaseMemObject(buffer));
3026-
}
3027-
30282909
MT_TEST(acl_mem, map_buf_bad_flags) {
30292910
ACL_LOCKED(acl_print_debug_msg("begin buf_bad_flags\n"));
30302911
cl_int status = CL_SUCCESS;

0 commit comments

Comments
 (0)