From 775e73cbf504afe4df1060a4e7d771fc40172b26 Mon Sep 17 00:00:00 2001 From: "bain.syrowik" Date: Thu, 4 Nov 2021 10:17:43 -0700 Subject: [PATCH 1/2] Disable conflicting min and max macros on Windows --- CMakeLists.txt | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index 27064777..3fe7ef85 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -127,6 +127,13 @@ include(CTest) # See https://cmake.org/cmake/help/latest/module/GNUInstallDirs.html include(GNUInstallDirs) +# Windows defines its own versions of min() and max() which conflict with the +# versions from the standard library. Disable the Windows versions by defining +# the NOMINMAX macro. This must be set before including acl.h, which +# eventually leads to the inclusion of windows.h, where the min and max macros +# are defined. +set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS NOMINMAX) + # acl_hostxml includes acl.h, but we don't want to create a cyclic dependency, # so we create a header only library here add_library(acl_headers INTERFACE) From 4b17c4d1f9f1e2a5226772bc0c358d9bce071f91 Mon Sep 17 00:00:00 2001 From: "bain.syrowik" Date: Thu, 4 Nov 2021 10:17:43 -0700 Subject: [PATCH 2/2] Fix simulation of multi-memory systems When the runtime is initialized it needs to advertise the available devices. For hardware this is fine because we can query the hardware via PCIe, but for the simulation flow we don't know what the board should look like until we receive an aocx file. To work around this restriction, we initialize the simulation flow using the autodiscovery string for the a10gx board. This is often fine, but in cases where we want to simulate a board with more memory systems we will encounter an issue. In particular, targeting different `buffer_location`s in the source will often result in this assertion: ``` /src/acl_device_op.cpp:677: int l_is_noop_migration(acl_device_op_t*): Assertion `src_mem->reserved_allocations[dest_device].size() > dest_mem_id' failed. ``` Ideally we would want to allocate this space dynamically based on the .aocx file; however, we get the .aocx file quite late for simulation flow, after we have already created buffers. --- CHANGELOG.md | 4 ++++ src/acl_mem.cpp | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 84a77a93..968ae7be 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,9 +8,13 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Deprecated ### Removed ### Fixed + +- Simulation of systems with multiple global memories ([#29]). + ### Security [Unreleased]: https://github.com/intel/fpga-runtime-for-opencl/compare/v2022.1...HEAD +[#29]: https://github.com/intel/fpga-runtime-for-opencl/pull/29 ## [2022.1] - 2021-12-01 diff --git a/src/acl_mem.cpp b/src/acl_mem.cpp index a4213ef9..01d511af 100644 --- a/src/acl_mem.cpp +++ b/src/acl_mem.cpp @@ -4408,6 +4408,15 @@ void acl_resize_reserved_allocations_for_device(cl_mem mem, unsigned int num_global_mem_systems = def.autodiscovery_def.num_global_mem_systems; + // For the simulation flow we don't know how many memory systems will exist + // until we load the .aocx, which may not happen until somewhat later. + // Reserving space is quite cheap, so reserve space for many memory systems. + int offline_mode = 0; + (void)acl_get_offline_device_user_setting(&offline_mode); + if (offline_mode == ACL_CONTEXT_MPSIM) { + num_global_mem_systems = std::max(num_global_mem_systems, 128u); + } + #ifdef MEM_DEBUG_MSG printf( "resizing reserved_allocations, physical_device_id:%u, target_size:%u \n",