From 8ea53b9eea30e554de02eb99a4df35021e07157e Mon Sep 17 00:00:00 2001 From: Sophie Mao Date: Thu, 5 Jan 2023 06:48:45 -0800 Subject: [PATCH] Cache context offline device setting specified by environment variables Currently runtime queries context offline device setting from environment variables whenever this setting is needed, therefore introduces performance overhead. The caching is done to avoid this performance overhead. --- include/acl_types.h | 4 ++++ src/acl_context.cpp | 5 +---- src/acl_device_binary.cpp | 15 ++++++--------- src/acl_globals.cpp | 1 + src/acl_hal_mmd.cpp | 2 +- src/acl_kernel.cpp | 4 +--- src/acl_kernel_if.cpp | 4 +--- src/acl_platform.cpp | 16 ++++++---------- src/acl_program.cpp | 4 +--- src/acl_usm.cpp | 8 ++------ 10 files changed, 24 insertions(+), 39 deletions(-) diff --git a/include/acl_types.h b/include/acl_types.h index 28685ae2..60d1c2e7 100644 --- a/include/acl_types.h +++ b/include/acl_types.h @@ -1576,6 +1576,10 @@ typedef struct _cl_platform_id // The setting of environment variable CL_CONTEXT_OFFLINE_DEVICE_INTELFPGA, if // any. std::string offline_device; + // Cache context offline mode specified by environment variables + // CL_CONTEXT_OFFLINE_DEVICE_INTELFPGA, CL_CONTEXT_MPSIM_DEVICE_INTELFPGA + // or CL_CONTEXT_MSIM_DEVICE_INTELFPGA + int offline_mode; // Should we track and automatically release leaked objects? // This helps immensely with the OpenCL conformance tests which tend to diff --git a/src/acl_context.cpp b/src/acl_context.cpp index e9fb97bb..cc3d6067 100644 --- a/src/acl_context.cpp +++ b/src/acl_context.cpp @@ -553,7 +553,6 @@ static cl_int l_finalize_context(cl_context context, cl_uint num_devices, static cl_int l_load_properties(cl_context context, const cl_context_properties *properties) { const char *default_compile_cmd = 0; - int env_override = 0; acl_assert_locked(); // Set defaults. @@ -717,8 +716,6 @@ static cl_int l_load_properties(cl_context context, // Always terminate list. After all, 'properties' might be empty! context->properties[context->num_property_entries++] = 0; - (void)acl_get_offline_device_user_setting(&env_override); - context->compiles_programs_incompletely = 0; switch (context->compiler_mode) { case static_cast( @@ -788,7 +785,7 @@ static cl_int l_load_properties(cl_context context, // We need backing store for the buffers. context->device_buffers_have_backing_store = 1; - if (env_override == ACL_CONTEXT_MPSIM) { + if (acl_platform.offline_mode == ACL_CONTEXT_MPSIM) { // Simulator should support save/restore buffers around programming if // reprogramming on-the-fly is supported context->saves_and_restores_buffers_for_reprogramming = 1; diff --git a/src/acl_device_binary.cpp b/src/acl_device_binary.cpp index a3cef603..a7be90f3 100644 --- a/src/acl_device_binary.cpp +++ b/src/acl_device_binary.cpp @@ -175,13 +175,10 @@ cl_int acl_device_binary_t::load_binary_pkg(int validate_compile_options, #define FAILREAD_MSG "Could not read parts of the program binary." size_t data_len = 0; - int env_override = 0; - acl_assert_locked(); - (void)acl_get_offline_device_user_setting(&env_override); - - if (env_override == ACL_CONTEXT_MPSIM && !validate_compile_options && + if (acl_platform.offline_mode == ACL_CONTEXT_MPSIM && + !validate_compile_options && context->compiler_mode != CL_CONTEXT_COMPILER_MODE_OFFLINE_INTELFPGA && get_binary_len() < 1024) { // IF the binary is ridiculously small (arbitrary number) we are going @@ -258,7 +255,7 @@ cl_int acl_device_binary_t::load_binary_pkg(int validate_compile_options, // runtime. if (acl_pkg_section_exists(pkg, ".acl.rand_hash", &data_len) && dev_prog->device->loaded_bin == nullptr && - env_override != ACL_CONTEXT_MPSIM) { + acl_platform.offline_mode != ACL_CONTEXT_MPSIM) { std::vector pkg_rand_hash(data_len + 1); AND_CHECK(acl_pkg_read_section(pkg, ".acl.rand_hash", pkg_rand_hash.data(), data_len + 1), @@ -305,7 +302,7 @@ cl_int acl_device_binary_t::load_binary_pkg(int validate_compile_options, // For simulator flow, we treat as if the device has already been // programmed and check device global memory layout against // dev_prog->device->last_bin - if (env_override == ACL_CONTEXT_MPSIM) { + if (acl_platform.offline_mode == ACL_CONTEXT_MPSIM) { if (validate_memory_layout && dev_prog->device->last_bin) { AND_CHECK(get_devdef().autodiscovery_def.num_global_mem_systems <= 1 || @@ -357,7 +354,7 @@ cl_int acl_device_binary_t::load_binary_pkg(int validate_compile_options, is_simulator = 0; if (status == CL_SUCCESS && acl_pkg_section_exists(pkg, ".acl.simulator_object", &data_len)) { - if (env_override != ACL_CONTEXT_MPSIM) { + if (acl_platform.offline_mode != ACL_CONTEXT_MPSIM) { acl_context_callback( context, "aocx contains simulated kernel, but simulation mode not set!"); @@ -382,7 +379,7 @@ cl_int acl_device_binary_t::load_binary_pkg(int validate_compile_options, context, "aocx contains unsupported legacy opencl emulated kernel for windows!"); } - if (status == CL_SUCCESS && env_override == ACL_CONTEXT_MPSIM && + if (status == CL_SUCCESS && acl_platform.offline_mode == ACL_CONTEXT_MPSIM && !is_simulator) { acl_context_callback(context, "Simulation mode set but aocx is for hardware!"); diff --git a/src/acl_globals.cpp b/src/acl_globals.cpp index 64bbb1de..01a13121 100644 --- a/src/acl_globals.cpp +++ b/src/acl_globals.cpp @@ -224,6 +224,7 @@ void acl_reset(void) { l_reset_present_board(); acl_platform.offline_device = ""; + acl_platform.offline_mode = ACL_CONTEXT_OFFLINE_AND_AUTODISCOVERY; acl_platform.num_devices = 0; for (unsigned i = 0; i < ACL_MAX_DEVICE; ++i) { acl_platform.device[i] = _cl_device_id(); diff --git a/src/acl_hal_mmd.cpp b/src/acl_hal_mmd.cpp index c72d20fc..b9e55a2f 100644 --- a/src/acl_hal_mmd.cpp +++ b/src/acl_hal_mmd.cpp @@ -1248,7 +1248,7 @@ acl_mmd_get_system_definition(acl_system_def_t *sys, #endif // Dynamically load board mmd & symbols - acl_get_offline_device_user_setting(&use_offline_only); + (void)acl_get_offline_device_user_setting(&use_offline_only); if (use_offline_only == ACL_CONTEXT_MPSIM) { // Substitute the simulator MMD layer. diff --git a/src/acl_kernel.cpp b/src/acl_kernel.cpp index a37d889b..1a1c0987 100644 --- a/src/acl_kernel.cpp +++ b/src/acl_kernel.cpp @@ -2918,9 +2918,7 @@ static cl_int l_copy_and_adjust_arguments_for_device( [needed_mem_id]); #endif - int env_override = 0; - (void)acl_get_offline_device_user_setting(&env_override); - if (env_override == ACL_CONTEXT_MPSIM) { + if (acl_platform.offline_mode == ACL_CONTEXT_MPSIM) { if (!acl_realloc_buffer_for_simulator(mem_obj, needed_physical_id, needed_mem_id)) { return CL_MEM_OBJECT_ALLOCATION_FAILURE; diff --git a/src/acl_kernel_if.cpp b/src/acl_kernel_if.cpp index 0223d30b..347f7827 100644 --- a/src/acl_kernel_if.cpp +++ b/src/acl_kernel_if.cpp @@ -706,7 +706,6 @@ int acl_kernel_if_init(acl_kernel_if *kern, acl_bsp_io bsp_io, char description_size_lsb[KERNEL_ROM_SIZE_BYTES_READ + 1]; unsigned int size_location, version, size; int result = 0; - int use_offline_only = 0; acl_assert_locked(); assert(acl_bsp_io_is_valid(&bsp_io)); @@ -723,8 +722,7 @@ int acl_kernel_if_init(acl_kernel_if *kern, acl_bsp_io bsp_io, // The simulator doesn't have any kernel interface information until the aocx // is loaded, which happens later. - acl_get_offline_device_user_setting(&use_offline_only); - if (use_offline_only == ACL_CONTEXT_MPSIM) { + if (acl_platform.offline_mode == ACL_CONTEXT_MPSIM) { std::string err_msg; auto parse_result = acl_load_device_def_from_str( acl_shipped_board_cfgs[0].cfg, sysdef->device[0].autodiscovery_def, diff --git a/src/acl_platform.cpp b/src/acl_platform.cpp index 85890517..46986cec 100644 --- a/src/acl_platform.cpp +++ b/src/acl_platform.cpp @@ -288,7 +288,6 @@ const char *acl_platform_extensions() { // Initialize the internal bookkeeping based on the system definition // provided to us. void acl_init_platform(void) { - int offline_mode = 0; acl_assert_locked(); acl_platform.dispatch = &acl_icd_dispatch; @@ -304,9 +303,9 @@ void acl_init_platform(void) { // Set offline_device property const char *offline_device = - acl_get_offline_device_user_setting(&offline_mode); + acl_get_offline_device_user_setting(&acl_platform.offline_mode); if (offline_device) { - if (offline_mode == ACL_CONTEXT_MPSIM) { + if (acl_platform.offline_mode == ACL_CONTEXT_MPSIM) { acl_platform.offline_device = ACL_MPSIM_DEVICE_NAME; } else { acl_platform.offline_device = offline_device; @@ -383,7 +382,7 @@ void acl_init_platform(void) { // having. acl_platform.initial_board_def = acl_present_board_def(); - switch (offline_mode) { + switch (acl_platform.offline_mode) { case ACL_CONTEXT_OFFLINE_AND_AUTODISCOVERY: acl_platform.num_devices = acl_platform.initial_board_def->num_devices + @@ -409,7 +408,7 @@ void acl_init_platform(void) { l_add_device(static_cast(i)); } - l_initialize_offline_devices(offline_mode); + l_initialize_offline_devices(acl_platform.offline_mode); // Device operation queue. acl_init_device_op_queue(&acl_platform.device_op_queue); @@ -516,15 +515,12 @@ void acl_init_platform(void) { void acl_finalize_init_platform(unsigned int num_devices, const cl_device_id *devices) { - int offline_mode = 0; int have_single_bank_with_shared_memory; acl_assert_locked(); assert(num_devices > 0); - (void)acl_get_offline_device_user_setting(&offline_mode); - - l_initialize_devices(acl_present_board_def(), offline_mode, num_devices, - devices); + l_initialize_devices(acl_present_board_def(), acl_platform.offline_mode, + num_devices, devices); if (is_SOC_device()) { size_t cur_num_banks = diff --git a/src/acl_program.cpp b/src/acl_program.cpp index 9528c209..0e893f72 100644 --- a/src/acl_program.cpp +++ b/src/acl_program.cpp @@ -1638,9 +1638,7 @@ void acl_program_device(void *user_data, acl_device_op_t *op) { dev_prog->device->def.autodiscovery_def = dev_bin->get_devdef().autodiscovery_def; - int offline_mode = 0; - (void)acl_get_offline_device_user_setting(&offline_mode); - if (offline_mode == ACL_CONTEXT_MPSIM) { + if (acl_platform.offline_mode == ACL_CONTEXT_MPSIM) { // Override the device name to the simulator. // In function acl_device_binary_t::load_binary_pkg, the name member will // be checked against the .acl.board section of the aocx file, which would diff --git a/src/acl_usm.cpp b/src/acl_usm.cpp index 5a121d89..298b046a 100644 --- a/src/acl_usm.cpp +++ b/src/acl_usm.cpp @@ -148,9 +148,7 @@ CL_API_ENTRY void *CL_API_CALL clHostMemAllocINTEL( { auto mmd_properties_it = mmd_properties.begin(); if (mem_id) { - int use_offline_only; - acl_get_offline_device_user_setting(&use_offline_only); - if (use_offline_only == ACL_CONTEXT_MPSIM) { + if (acl_platform.offline_mode == ACL_CONTEXT_MPSIM) { *mmd_properties_it++ = AOCL_MMD_MEM_PROPERTIES_BUFFER_LOCATION; *mmd_properties_it++ = *mem_id; } @@ -433,9 +431,7 @@ clSharedMemAllocINTEL(cl_context context, cl_device_id device, { auto mmd_properties_it = mmd_properties.begin(); if (mem_id) { - int use_offline_only; - acl_get_offline_device_user_setting(&use_offline_only); - if (use_offline_only == ACL_CONTEXT_MPSIM) { + if (acl_platform.offline_mode == ACL_CONTEXT_MPSIM) { *mmd_properties_it++ = AOCL_MMD_MEM_PROPERTIES_BUFFER_LOCATION; *mmd_properties_it++ = *mem_id; }