diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000000..cc6faab66f --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,37 @@ +* @pvchupin + +# Use runtime team as the umbrella for most of the tests +/SYCL/ @intel/llvm-reviewers-runtime + +# SYCL sub-directory matchers are grouped by code owner first, followed by +# alphabetical order within the group. Please, keep this ordering. + +# Group algorithms +/SYCL/GroupAlgorithm/ @Pennycook @intel/llvm-reviewers-runtime +/SYCL/GroupLocalMemory/ @Pennycook @intel/llvm-reviewers-runtime +/SYCL/SubGroup/ @Pennycook @intel/llvm-reviewers-runtime +/SYCL/SubGroupMask/ @Pennycook @intel/llvm-reviewers-runtime + +# Plugin interface for Level Zero +/SYCL/Plugin/*level-zero* @intel/dpcpp-l0-pi-reviewers +/SYCL/Plugin/*level_zero* @intel/dpcpp-l0-pi-reviewers + +# Explicit SIMD +/SYCL/ESIMD/ @intel/dpcpp-esimd-reviewers + +# BFloat16 conversion +/SYCL/BFloat16/ @intel/dpcpp-tools-reviewers + +# Compiler tests +/SYCL/AOT/ @intel/dpcpp-tools-reviewers +/SYCL/DeviceCodeSplit/ @intel/dpcpp-tools-reviewers +/SYCL/SeparateCompile/ @intel/dpcpp-tools-reviewers + +# Printf +/SYCL/Printf/ @intel/dpcpp-tools-reviewers + +# Specialization constant +/SYCL/SpecConstants/ @intel/dpcpp-tools-reviewers + +# invoke_simd +/SYCL/InvokeSimd/ @rolandschulz @kbobrovs diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml new file mode 100644 index 0000000000..3381c69ae6 --- /dev/null +++ b/.github/workflows/clang-format.yml @@ -0,0 +1,34 @@ +name: clang-format-check + +on: + pull_request: + branches: + - intel + +jobs: + build: + runs-on: ubuntu-latest + + container: + image: ghcr.io/intel/llvm/sycl_ubuntu2004_nightly:no-drivers + + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 2 + + - name: Run clang-format for the patch + shell: bash {0} + run: | + git config --global --add safe.directory /__w/llvm-test-suite/llvm-test-suite + git clang-format ${GITHUB_SHA}^1 + git diff > ./clang-format.patch + + # Add patch with formatting fixes to CI job artifacts + - uses: actions/upload-artifact@v1 + with: + name: clang-format-patch + path: ./clang-format.patch + + - name: Check if clang-format patch is empty + run: bash -c "if [ -s ./clang-format.patch ]; then cat ./clang-format.patch; exit 1; fi" diff --git a/.gitignore b/.gitignore index 6d2484bd88..cdf1641c8f 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ # External/* /test-suite-externals *.pyc +/build* diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000000..bf40e636a0 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,88 @@ +# Contributing + +## License + +This project is licensed under the terms of the Apache License v2.0 with LLVM +Exceptions license ([LICENSE.txt](LICENSE.TXT)) to ensure our ability to +contribute this project to the LLVM test suite project under the same license. + +By contributing to this project, you agree to the Apache License v2.0 with LLVM +Exceptions and copyright terms there in and release your contribution under +these terms. + +## Contribution process + +### Development + +For any changes not related to DPC++, but rather to LLVM in general, it is +strongly encouraged that you submit the patch to https://github.com/llvm/llvm-test-suite directly. +See [LLVM contribution guidelines](https://llvm.org/docs/Contributing.html) +for more information. + +- Create a personal fork of the project on GitHub + - For the DPC++ end-to-end test development, use **intel** branch as baseline + for your changes. +- Prepare your patch + - follow [LLVM coding standards](https://llvm.org/docs/CodingStandards.html) + - [clang-format](https://clang.llvm.org/docs/ClangFormat.html) and + [clang-tidy](https://clang.llvm.org/extra/clang-tidy/) tools can be + integrated into your workflow to ensure formatting and stylistic + compliance of your changes. + - use + + ```bash + wget https://raw.githubusercontent.com/intel/llvm/sycl/clang/tools/clang-format/git-clang-format + python git-clang-format `git merge-base origin/intel HEAD` + ``` + + to check the format of your current changes against the `origin/intel` + branch. + - `-f` to also correct unstaged changes + - `--diff` to only print the diff without applying + +### Testing + +- See [SYCL/README.md](SYCL/README.md) for instructions. + +### Commit message + +- When writing your commit message, please make sure to follow + [LLVM developer policies]( + https://llvm.org/docs/DeveloperPolicy.html#commit-messages) on the subject. +- For any DPC++-related commit, the `[SYCL]` tag should be present in the + commit message title. To a reasonable extent, additional tags can be used + to signify the component changed, e.g.: `[LIT]`, `[NFC]`, `[Doc]`. + +### Review and acceptance testing + +- Create a pull request for your changes following [Creating a pull request +instructions](https://help.github.com/articles/creating-a-pull-request/). + - PR description should follow same rules as commit message. It is used as + commit message on the final merge. +- Changes addressing comments made during code review should be added as a + separate commits to the same PR. +- CI will run checks which are prerequisites for submitting PR: + - clang-format-check/build checks that the patch matches coding style + (see [clang-format](https://clang.llvm.org/docs/ClangFormat.html)); + - Jenkins/pre-ci-cuda - runs all related tests on CUDA backend for GPU device + on Ubuntu 18.04; + - Jenkins/pre-ci-linux - runs all related tests on Ubuntu 18.04 machine with + Level_Zero backend (GPU device) and OpenCL backend (CPU, GPU and FPGA + emulator devices); + - Jenkins/pre-ci-windows - runs all related tests on Windows Server 2019 with + Level_Zero backend (GPU device) and OpenCL backend (CPU, GPU and FPGA + emulator devices). + +The last three checks are done for the latest available nightly build for DPC++ +compiler and runtime from [intel/llvm](https://github.com/intel/llvm). The +build happens around 18:00 UTC if there are new commits since previous build. + +Once the PR is approved and all checks have passed, the pull request is +ready for merge. + +### Merge + +Project maintainers merge pull requests by "Squash and merge". PR description +is used as final commit message. + +\*Other names and brands may be claimed as the property of others. diff --git a/README.md b/README.md new file mode 100644 index 0000000000..cd55942499 --- /dev/null +++ b/README.md @@ -0,0 +1,27 @@ +# LLVM* test suite repository + +Please see the LLVM testing infrastructure guide: + + https://llvm.org/docs/TestSuiteGuide.html + +for more information on the contents of this repository. + +## Introduction + +Intel staging area for LLVM test suite contribution. Home for oneAPI Data +Parallel C++ compiler tests extending LLVM test suite. + +## License + +See [LICENSE.txt](LICENSE.TXT) for details. + +## Contributing + +See [CONTRIBUTING.md](CONTRIBUTING.md) for details. + +## Related projects documentation + +* oneAPI Data Parallel C++ compiler - See + [DPC++ Documentation](https://intel.github.io/llvm-docs/) + +\*Other names and brands may be claimed as the property of others. diff --git a/SYCL/.clang-format b/SYCL/.clang-format new file mode 100644 index 0000000000..b9df8e7314 --- /dev/null +++ b/SYCL/.clang-format @@ -0,0 +1,2 @@ +BasedOnStyle: LLVM +CommentPragmas: "(RUN|FAIL|REQUIRES|UNSUPPORTED|CHECK[A-Za-z0-9_-]*) *:|expected-" diff --git a/SYCL/AOT/Inputs/aot.cpp b/SYCL/AOT/Inputs/aot.cpp new file mode 100644 index 0000000000..4800f76017 --- /dev/null +++ b/SYCL/AOT/Inputs/aot.cpp @@ -0,0 +1,74 @@ +//==--- aot.cpp - Simple vector addition (AOT compilation example) --------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +#include + +#include +#include + +constexpr sycl::access::mode sycl_read = sycl::access::mode::read; +constexpr sycl::access::mode sycl_write = sycl::access::mode::write; + +template class SimpleVadd; + +template +void simple_vadd(const std::array &VA, const std::array &VB, + std::array &VC) { + sycl::queue deviceQueue([](sycl::exception_list ExceptionList) { + for (std::exception_ptr ExceptionPtr : ExceptionList) { + try { + std::rethrow_exception(ExceptionPtr); + } catch (sycl::exception &E) { + std::cerr << E.what(); + } catch (...) { + std::cerr << "Unknown async exception was caught." << std::endl; + } + } + }); + + sycl::range<1> numOfItems{N}; + sycl::buffer bufferA(VA.data(), numOfItems); + sycl::buffer bufferB(VB.data(), numOfItems); + sycl::buffer bufferC(VC.data(), numOfItems); + + deviceQueue.submit([&](sycl::handler &cgh) { + auto accessorA = bufferA.template get_access(cgh); + auto accessorB = bufferB.template get_access(cgh); + auto accessorC = bufferC.template get_access(cgh); + + cgh.parallel_for>(numOfItems, [=](sycl::id<1> wiID) { + accessorC[wiID] = accessorA[wiID] + accessorB[wiID]; + }); + }); + + deviceQueue.wait_and_throw(); +} + +int main() { + const size_t array_size = 4; + std::array A = {{1, 2, 3, 4}}, B = {{1, 2, 3, 4}}, + C; + std::array D = {{1.f, 2.f, 3.f, 4.f}}, + E = {{1.f, 2.f, 3.f, 4.f}}, F; + simple_vadd(A, B, C); + simple_vadd(D, E, F); + for (unsigned int i = 0; i < array_size; i++) { + if (C[i] != A[i] + B[i]) { + std::cout << "The results are incorrect (element " << i << " is " << C[i] + << "!\n"; + return 1; + } + if (F[i] != D[i] + E[i]) { + std::cout << "The results are incorrect (element " << i << " is " << F[i] + << "!\n"; + return 1; + } + } + std::cout << "The results are correct!\n"; + return 0; +} diff --git a/SYCL/AOT/accelerator.cpp b/SYCL/AOT/accelerator.cpp new file mode 100644 index 0000000000..602ba415f4 --- /dev/null +++ b/SYCL/AOT/accelerator.cpp @@ -0,0 +1,12 @@ +//=-- accelerator.cpp - compilation for fpga emulator dev using opencl-aot --=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +// REQUIRES: opencl-aot, accelerator + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_fpga %S/Inputs/aot.cpp -o %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out diff --git a/SYCL/AOT/cpu.cpp b/SYCL/AOT/cpu.cpp new file mode 100644 index 0000000000..c6707e38b0 --- /dev/null +++ b/SYCL/AOT/cpu.cpp @@ -0,0 +1,15 @@ +//==--- cpu.cpp - AOT compilation for cpu devices using opencl-aot --------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +// REQUIRES: opencl-aot, cpu + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 %S/Inputs/aot.cpp -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out + +// Test that opencl-aot can handle multiple build options. +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64 %S/Inputs/aot.cpp -Xsycl-target-backend "--bo=-g" -Xsycl-target-backend "--bo=-cl-opt-disable" -o %t2.out diff --git a/SYCL/AOT/gpu.cpp b/SYCL/AOT/gpu.cpp new file mode 100644 index 0000000000..587afecc99 --- /dev/null +++ b/SYCL/AOT/gpu.cpp @@ -0,0 +1,14 @@ +//==--- gpu.cpp - AOT compilation for gen devices using GEN compiler ------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// + +// REQUIRES: ocloc, gpu +// UNSUPPORTED: cuda +// CUDA is not compatible with SPIR. +// +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %S/Inputs/aot.cpp -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out diff --git a/SYCL/AOT/multiple-devices.cpp b/SYCL/AOT/multiple-devices.cpp new file mode 100644 index 0000000000..04f88eadc3 --- /dev/null +++ b/SYCL/AOT/multiple-devices.cpp @@ -0,0 +1,56 @@ +//==-- multiple-devices.cpp - Appropriate AOT-compiled image selection -----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: opencl-aot, ocloc, cpu, gpu, accelerator +// UNSUPPORTED: cuda +// CUDA is not compatible with SPIR. + +// Produce a fat object for all targets (generic SPIR-V, CPU, GPU, FPGA) +// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_x86_64,spir64_gen,spir64_fpga %S/Inputs/aot.cpp -c -o %t.o + +// CPU, GPU, FPGA +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen,spir64_fpga -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %t.o -o %t_all_aot.out +// RUN: %CPU_RUN_PLACEHOLDER %t_all_aot.out +// RUN: %GPU_RUN_PLACEHOLDER %t_all_aot.out +// RUN: %ACC_RUN_PLACEHOLDER %t_all_aot.out + +// CPU, GPU +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %t.o -o %t_cpu_gpu.out +// RUN: %CPU_RUN_PLACEHOLDER %t_cpu_gpu.out +// RUN: %GPU_RUN_PLACEHOLDER %t_cpu_gpu.out + +// CPU, FPGA +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64,spir64_fpga %t.o -o %t_cpu_fpga.out +// RUN: %CPU_RUN_PLACEHOLDER %t_cpu_fpga.out +// RUN: %ACC_RUN_PLACEHOLDER %t_cpu_fpga.out + +// GPU, FPGA +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen,spir64_fpga -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %t.o -o %t_gpu_fpga.out +// RUN: %GPU_RUN_PLACEHOLDER %t_gpu_fpga.out +// RUN: %ACC_RUN_PLACEHOLDER %t_gpu_fpga.out + +// No AOT-compiled image for CPU +// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_gen,spir64_fpga -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %t.o -o %t_spv_gpu_fpga.out +// RUN: %CPU_RUN_PLACEHOLDER %t_spv_gpu_fpga.out +// Check that execution on AOT-compatible devices is unaffected +// RUN: %GPU_RUN_PLACEHOLDER %t_spv_gpu_fpga.out +// RUN: %ACC_RUN_PLACEHOLDER %t_spv_gpu_fpga.out + +// No AOT-compiled image for GPU +// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_x86_64,spir64_fpga %t.o -o %t_spv_cpu_fpga.out +// RUN: %GPU_RUN_PLACEHOLDER %t_spv_cpu_fpga.out +// Check that execution on AOT-compatible devices is unaffected +// RUN: %CPU_RUN_PLACEHOLDER %t_spv_cpu_fpga.out +// RUN: %ACC_RUN_PLACEHOLDER %t_spv_cpu_fpga.out + +// No AOT-compiled image for FPGA +// RUN: %clangxx -fsycl -fsycl-targets=spir64,spir64_x86_64,spir64_gen -Xsycl-target-backend=spir64_gen %gpu_aot_target_opts %t.o -o %t_spv_cpu_gpu.out +// RUN: %ACC_RUN_PLACEHOLDER %t_spv_cpu_gpu.out +// Check that execution on AOT-compatible devices is unaffected +// RUN: %CPU_RUN_PLACEHOLDER %t_spv_cpu_gpu.out +// RUN: %GPU_RUN_PLACEHOLDER %t_spv_cpu_gpu.out diff --git a/SYCL/AOT/with-llvm-bc.cpp b/SYCL/AOT/with-llvm-bc.cpp new file mode 100644 index 0000000000..76b6b10b1c --- /dev/null +++ b/SYCL/AOT/with-llvm-bc.cpp @@ -0,0 +1,17 @@ +//==----- with-llvm-bc.cpp - SYCL kernel with LLVM IR bitcode as binary ----==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: cpu, dump_ir + +// RUN: %clangxx -fsycl -fsycl-targets=spir64 -c %S/Inputs/aot.cpp -o %t.o +// RUN: %clangxx -fsycl -fsycl-link-targets=spir64 %t.o -o %t.spv +// RUN: llvm-spirv -r %t.spv -o %t.bc +// RUN: %clangxx -fsycl -fsycl-add-targets=spir64:%t.bc %t.o -o %t.out +// +// Only CPU supports LLVM IR bitcode as a binary +// RUN: %CPU_RUN_PLACEHOLDER %t.out diff --git a/SYCL/Assert/Inputs/kernels_in_file2.cpp b/SYCL/Assert/Inputs/kernels_in_file2.cpp new file mode 100644 index 0000000000..e07fdbb3c5 --- /dev/null +++ b/SYCL/Assert/Inputs/kernels_in_file2.cpp @@ -0,0 +1,45 @@ +#include "kernels_in_file2.hpp" + +#ifdef DEFINE_NDEBUG_INFILE2 +#define NDEBUG +#else +#undef NDEBUG +#endif + +#include + +using namespace sycl; +using namespace sycl::access; + +int calculus(int X) { + assert(X && "this message from calculus"); + return X * 2; +} + +void check_nil(int value) { assert(value && "this message from file2"); } + +static constexpr size_t BUFFER_SIZE = 4; + +void enqueueKernel_1_fromFile2(queue *Q) { + sycl::range<1> numOfItems{BUFFER_SIZE}; + sycl::buffer Buf(numOfItems); + + Q->submit([&](handler &CGH) { + auto Acc = Buf.template get_access(CGH); + + CGH.parallel_for( + numOfItems, [=](sycl::id<1> wiID) { check_nil(Acc[wiID]); }); + }); +} + +void enqueueKernel_2_fromFile2(queue *Q) { + sycl::range<1> numOfItems{BUFFER_SIZE}; + sycl::buffer Buf(numOfItems); + + Q->submit([&](handler &CGH) { + auto Acc = Buf.template get_access(CGH); + + CGH.parallel_for( + numOfItems, [=](sycl::id<1> wiID) { check_nil(Acc[wiID]); }); + }); +} diff --git a/SYCL/Assert/Inputs/kernels_in_file2.hpp b/SYCL/Assert/Inputs/kernels_in_file2.hpp new file mode 100644 index 0000000000..851207ac66 --- /dev/null +++ b/SYCL/Assert/Inputs/kernels_in_file2.hpp @@ -0,0 +1,7 @@ +#include + +SYCL_EXTERNAL int calculus(int X); + +void enqueueKernel_1_fromFile2(sycl::queue *Q); + +void enqueueKernel_2_fromFile2(sycl::queue *Q); diff --git a/SYCL/Assert/assert_in_kernels.cpp b/SYCL/Assert/assert_in_kernels.cpp new file mode 100644 index 0000000000..675411f314 --- /dev/null +++ b/SYCL/Assert/assert_in_kernels.cpp @@ -0,0 +1,20 @@ +// REQUIRES: linux +// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// RUN: %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// Shouldn't fail on ACC as fallback assert isn't enqueued there +// RUN: %ACC_RUN_PLACEHOLDER %t.out &> %t.txt +// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --check-prefix=CHECK-ACC --input-file %t.txt +// +// CHECK-NOT: One shouldn't see this message +// CHECK: {{.*}}assert_in_kernels.hpp:25: void kernelFunc2(int *, int): {{.*}} [{{[0,2]}},0,0], {{.*}} [0,0,0] +// CHECK-SAME: Assertion `Buf[wiID] == 0 && "from assert statement"` failed. +// CHECK-NOT: test aborts earlier, one shouldn't see this message +// CHECK-NOT: The test ended. +// +// CHECK-ACC-NOT: {{.*}}assert_in_kernels.hpp:25: void kernelFunc2(int *, int): {{.*}} [{{[0,2]}},0,0], {{.*}} [0,0,0] +// CHECK-ACC: The test ended. + +#include "assert_in_kernels.hpp" diff --git a/SYCL/Assert/assert_in_kernels.hpp b/SYCL/Assert/assert_in_kernels.hpp new file mode 100644 index 0000000000..834ddb22da --- /dev/null +++ b/SYCL/Assert/assert_in_kernels.hpp @@ -0,0 +1,69 @@ +#include +#include +#include + +using namespace sycl; +using namespace sycl::access; + +void kernelFunc1(int *Buf, int wiID) { + Buf[wiID] = 9; + assert(Buf[wiID] != 0 && "One shouldn't see this message"); +} + +void assertTest1(queue &Q, buffer &Buf) { + Q.submit([&](handler &CGH) { + auto Acc = Buf.template get_access(CGH); + + CGH.parallel_for( + Buf.get_range(), [=](sycl::id<1> wiID) { kernelFunc1(&Acc[0], wiID); }); + }); +} + +void kernelFunc2(int *Buf, int wiID) { + if (wiID % 2 != 0) + Buf[wiID] = 0; + assert(Buf[wiID] == 0 && "from assert statement"); +} + +void assertTest2(queue &Q, buffer &Buf) { + Q.submit([&](handler &CGH) { + auto Acc = Buf.template get_access(CGH); + + CGH.parallel_for( + Buf.get_range(), [=](sycl::id<1> wiID) { kernelFunc2(&Acc[0], wiID); }); + }); +} + +void kernelFunc3(int *Buf, int wiID) { + if (wiID == 0) + assert(false && "test aborts earlier, one shouldn't see this message"); + Buf[wiID] = 9; +} + +void assertTest3(queue &Q, buffer &Buf) { + Q.submit([&](handler &CGH) { + auto Acc = Buf.template get_access(CGH); + + CGH.parallel_for( + Buf.get_range(), [=](sycl::id<1> wiID) { kernelFunc3(&Acc[0], wiID); }); + }); +} + +int main(int Argc, const char *Argv[]) { + std::array Vec = {1, 2, 3, 4}; + sycl::range<1> numOfItems{Vec.size()}; + sycl::buffer Buf(Vec.data(), numOfItems); + + queue Q; + assertTest1(Q, Buf); + Q.wait(); + + assertTest2(Q, Buf); + Q.wait(); + + assertTest3(Q, Buf); + Q.wait(); + + std::cout << "The test ended." << std::endl; + return 0; +} diff --git a/SYCL/Assert/assert_in_kernels_ndebug.cpp b/SYCL/Assert/assert_in_kernels_ndebug.cpp new file mode 100644 index 0000000000..4475083ffe --- /dev/null +++ b/SYCL/Assert/assert_in_kernels_ndebug.cpp @@ -0,0 +1,9 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DNDEBUG %S/assert_in_kernels.cpp -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER +// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER +// RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER +// +// CHECK-NOT: One shouldn't see this message +// CHECK-NOT: from assert statement +// CHECK-NOT: test aborts earlier, one shouldn't see this message +// CHECK: The test ended. diff --git a/SYCL/Assert/assert_in_kernels_win.cpp b/SYCL/Assert/assert_in_kernels_win.cpp new file mode 100644 index 0000000000..8b05ce7a06 --- /dev/null +++ b/SYCL/Assert/assert_in_kernels_win.cpp @@ -0,0 +1,22 @@ +// REQUIRES: windows +// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// RUN: %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// Shouldn't fail on ACC as fallback assert isn't enqueued there +// RUN: %ACC_RUN_PLACEHOLDER %t.out &> %t.txt +// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --check-prefix=CHECK-ACC --input-file %t.txt +// +// CHECK-NOT: One shouldn't see this message +// FIXME Windows version prints '(null)' instead of '' once in a +// while for some insane reason. +// CHECK: {{.*}}assert_in_kernels.hpp:25: {{|(null)}}: {{.*}} [{{[0,2]}},0,0], {{.*}} [0,0,0] +// CHECK-SAME: Assertion `Buf[wiID] == 0 && "from assert statement"` failed. +// CHECK-NOT: test aborts earlier, one shouldn't see this message +// CHECK-NOT: The test ended. +// +// CHECK-ACC-NOT: {{.*}}assert_in_kernels.hpp:25: {{|(null)}}: {{.*}} [{{[0,2]}},0,0], {{.*}} [0,0,0] +// CHECK-ACC: The test ended. + +#include "assert_in_kernels.hpp" diff --git a/SYCL/Assert/assert_in_multiple_tus.cpp b/SYCL/Assert/assert_in_multiple_tus.cpp new file mode 100644 index 0000000000..275ee960e0 --- /dev/null +++ b/SYCL/Assert/assert_in_multiple_tus.cpp @@ -0,0 +1,21 @@ +// REQUIRES: linux +// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple -I %S/Inputs %s %S/Inputs/kernels_in_file2.cpp -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// RUN: %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// Shouldn't fail on ACC as fallback assert isn't enqueued there +// RUN: %ACC_RUN_PLACEHOLDER %t.out &> %t.txt +// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --check-prefix=CHECK-ACC --input-file %t.txt +// +// CUDA uses block/thread vs global/local id for SYCL, also it shows the +// position of a thread within the block, not the absolute ID. +// CHECK: {{.*}}kernels_in_file2.cpp:15: int calculus(int): {{global id: \[5|block: \[1}},0,0], {{local id|thread}}: [1,0,0] +// CHECK-SAME: Assertion `X && "this message from calculus"` failed. +// CHECK-NOT: this message from file2 +// CHECK-NOT: The test ended. +// +// CHECK-ACC-NOT: {{.*}}kernels_in_file2.cpp:15: int calculus(int): global id: [5,0,0], local id: [1,0,0] +// CHECK-ACC: The test ended. + +#include "assert_in_multiple_tus.hpp" diff --git a/SYCL/Assert/assert_in_multiple_tus.hpp b/SYCL/Assert/assert_in_multiple_tus.hpp new file mode 100644 index 0000000000..cf65d6a461 --- /dev/null +++ b/SYCL/Assert/assert_in_multiple_tus.hpp @@ -0,0 +1,51 @@ +#include "Inputs/kernels_in_file2.hpp" +#include +#include + +#ifdef DEFINE_NDEBUG_INFILE1 +#define NDEBUG +#else +#undef NDEBUG +#endif + +#include + +using namespace sycl; +using namespace sycl::access; + +static constexpr size_t BUFFER_SIZE = 16; + +int checkFunction() { + int X = calculus(0); + assert(X && "Nil in result"); + return X; +} + +void enqueueKernel_1_fromFile1(queue *Q) { + sycl::range<1> numOfItems{BUFFER_SIZE}; + sycl::buffer Buf(numOfItems); + + Q->submit([&](handler &CGH) { + auto Acc = Buf.template get_access(CGH); + + CGH.parallel_for( + sycl::nd_range(Buf.get_range(), sycl::range<1>(4)), + [=](sycl::id<1> wiID) { + int X = 0; + if (wiID == 5) + X = checkFunction(); + Acc[wiID] = X; + }); + }); +} + +int main(int Argc, const char *Argv[]) { + + queue Q; + enqueueKernel_1_fromFile1(&Q); + enqueueKernel_2_fromFile2(&Q); + Q.wait(); + + std::cout << "The test ended." << std::endl; + return 0; +} diff --git a/SYCL/Assert/assert_in_multiple_tus_one_ndebug.cpp b/SYCL/Assert/assert_in_multiple_tus_one_ndebug.cpp new file mode 100644 index 0000000000..5984cad082 --- /dev/null +++ b/SYCL/Assert/assert_in_multiple_tus_one_ndebug.cpp @@ -0,0 +1,20 @@ +// REQUIRES: linux +// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// RUN: %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// Shouldn't fail on ACC as fallback assert isn't enqueued there +// RUN: %ACC_RUN_PLACEHOLDER %t.out &> %t.txt +// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --check-prefix=CHECK-ACC --input-file %t.txt +// +// CHECK-NOT: this message from calculus +// CUDA uses block/thread vs global/local id for SYCL, also it shows the +// position of a thread within the block, not the absolute ID. +// CHECK: {{.*}}assert_in_multiple_tus.hpp:20: int checkFunction(): {{global id: \[5|block: \[1}},0,0], +// CHECK-SAME: {{.*}} [1,0,0] Assertion `X && "Nil in result"` failed. +// CHECK-NOT: this message from file2 +// CHECK-NOT: The test ended. +// +// CHECK-ACC-NOT: {{.*}}assert_in_multiple_tus.hpp:20: int checkFunction(): {{.*}} +// CHECK-ACC: The test ended. diff --git a/SYCL/Assert/assert_in_multiple_tus_one_ndebug_win.cpp b/SYCL/Assert/assert_in_multiple_tus_one_ndebug_win.cpp new file mode 100644 index 0000000000..e2add269bd --- /dev/null +++ b/SYCL/Assert/assert_in_multiple_tus_one_ndebug_win.cpp @@ -0,0 +1,20 @@ +// REQUIRES: windows +// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// RUN: %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// Shouldn't fail on ACC as fallback assert isn't enqueued there +// RUN: %ACC_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --check-prefix=CHECK-ACC --input-file %t.txt +// +// CHECK-NOT: this message from calculus +// FIXME Windows version prints '(null)' instead of '' once in a +// while for some insane reason. +// CHECK: {{.*}}assert_in_multiple_tus.hpp:20: {{|(null)}}: {{.*}} [5,0,0], +// CHECK-SAME: {{.*}} [1,0,0] Assertion `X && "Nil in result"` failed. +// CHECK-NOT: this message from file2 +// CHECK-NOT: The test ended. +// +// CHECK-ACC-NOT: {{.*}}assert_in_multiple_tus.hpp:20: {{|(null)}}: {{.*}} [5,0,0], +// CHECK-ACC: The test ended. diff --git a/SYCL/Assert/assert_in_multiple_tus_win.cpp b/SYCL/Assert/assert_in_multiple_tus_win.cpp new file mode 100644 index 0000000000..1915bd8ed8 --- /dev/null +++ b/SYCL/Assert/assert_in_multiple_tus_win.cpp @@ -0,0 +1,21 @@ +// REQUIRES: windows +// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple -I %S/Inputs %s %S/Inputs/kernels_in_file2.cpp -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// RUN: %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// Shouldn't fail on ACC as fallback assert isn't enqueued there +// RUN: %ACC_RUN_PLACEHOLDER %t.out &> %t.txt +// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --check-prefix=CHECK-ACC --input-file %t.txt +// +// FIXME Windows version prints '(null)' instead of '' once in a +// while for some insane reason. +// CHECK: {{.*}}kernels_in_file2.cpp:15: {{|(null)}}: {{.*}} [5,0,0], {{.*}} [1,0,0] +// CHECK-SAME: Assertion `X && "this message from calculus"` failed. +// CHECK-NOT: this message from file2 +// CHECK-NOT: The test ended. +// +// CHECK-ACC-NOT: {{.*}}kernels_in_file2.cpp:15: {{|(null)}}: {{.*}} [5,0,0], {{.*}} [1,0,0] +// CHECK-ACC: The test ended. + +#include "assert_in_multiple_tus.hpp" diff --git a/SYCL/Assert/assert_in_one_kernel.cpp b/SYCL/Assert/assert_in_one_kernel.cpp new file mode 100644 index 0000000000..fb9b111894 --- /dev/null +++ b/SYCL/Assert/assert_in_one_kernel.cpp @@ -0,0 +1,18 @@ +// REQUIRES: linux +// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// RUN: %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// Shouldn't fail on ACC as fallback assert isn't enqueued there +// RUN: %ACC_RUN_PLACEHOLDER %t.out &> %t.txt +// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --check-prefix=CHECK-ACC --input-file %t.txt +// +// CHECK: {{.*}}assert_in_one_kernel.hpp:10: void kernelFunc(int *, int): {{.*}} [{{[0-3]}},0,0], {{.*}} [0,0,0] +// CHECK-SAME: Assertion `Buf[wiID] != 0 && "from assert statement"` failed. +// CHECK-NOT: The test ended. +// +// CHECK-ACC-NOT: {{.*}}assert_in_one_kernel.hpp:10: void kernelFunc(int *, int): {{.*}} [{{[0-3]}},0,0], {{.*}} [0,0,0] +// CHECK-ACC: The test ended. + +#include "assert_in_one_kernel.hpp" diff --git a/SYCL/Assert/assert_in_one_kernel.hpp b/SYCL/Assert/assert_in_one_kernel.hpp new file mode 100644 index 0000000000..5f37916fdf --- /dev/null +++ b/SYCL/Assert/assert_in_one_kernel.hpp @@ -0,0 +1,34 @@ +#include +#include +#include + +using namespace sycl; +using namespace sycl::access; + +void kernelFunc(int *Buf, int wiID) { + Buf[wiID] = 0; + assert(Buf[wiID] != 0 && "from assert statement"); +} + +void assertTest() { + std::array Vec = {1, 2, 3, 4}; + sycl::range<1> numOfItems{Vec.size()}; + sycl::buffer Buf(Vec.data(), numOfItems); + + queue Q; + Q.submit([&](handler &CGH) { + auto Acc = Buf.template get_access(CGH); + + CGH.parallel_for( + numOfItems, [=](item<1> Item) { kernelFunc(&Acc[0], Item[0]); }); + }); + Q.wait(); +} + +int main(int Argc, const char *Argv[]) { + + assertTest(); + + std::cout << "The test ended." << std::endl; + return 0; +} diff --git a/SYCL/Assert/assert_in_one_kernel_ndebug.cpp b/SYCL/Assert/assert_in_one_kernel_ndebug.cpp new file mode 100644 index 0000000000..63aeddee9b --- /dev/null +++ b/SYCL/Assert/assert_in_one_kernel_ndebug.cpp @@ -0,0 +1,7 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple -DNDEBUG %S/assert_in_one_kernel.cpp -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out %CPU_CHECK_PLACEHOLDER +// RUN: %GPU_RUN_PLACEHOLDER %t.out %GPU_CHECK_PLACEHOLDER +// RUN: %ACC_RUN_PLACEHOLDER %t.out %ACC_CHECK_PLACEHOLDER +// +// CHECK-NOT: from assert statement +// CHECK: The test ended. diff --git a/SYCL/Assert/assert_in_one_kernel_win.cpp b/SYCL/Assert/assert_in_one_kernel_win.cpp new file mode 100644 index 0000000000..9eed7fe065 --- /dev/null +++ b/SYCL/Assert/assert_in_one_kernel_win.cpp @@ -0,0 +1,20 @@ +// REQUIRES: windows +// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// RUN: %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// Shouldn't fail on ACC as fallback assert isn't enqueued there +// RUN: %ACC_RUN_PLACEHOLDER %t.out &> %t.txt +// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --check-prefix=CHECK-ACC --input-file %t.txt +// +// FIXME Windows version prints '(null)' instead of '' once in a +// while for some insane reason. +// CHECK: {{.*}}assert_in_one_kernel.hpp:10: {{|(null)}}: {{.*}} [{{[0-3]}},0,0], {{.*}} [0,0,0] +// CHECK-SAME: Assertion `Buf[wiID] != 0 && "from assert statement"` failed. +// CHECK-NOT: The test ended. +// +// CHECK-ACC-NOT: {{.*}}assert_in_one_kernel.hpp:10: {{|(null)}}: {{.*}} [{{[0-3]}},0,0], {{.*}} [0,0,0] +// CHECK-ACC: The test ended. + +#include "assert_in_one_kernel.hpp" diff --git a/SYCL/Assert/assert_in_simultaneous_kernels.cpp b/SYCL/Assert/assert_in_simultaneous_kernels.cpp new file mode 100644 index 0000000000..a70b3c6544 --- /dev/null +++ b/SYCL/Assert/assert_in_simultaneous_kernels.cpp @@ -0,0 +1,28 @@ +// REQUIRES: linux +// FIXME: Flaky on HIP and cuda +// UNSUPPORTED: hip || cuda +// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple %s -o %t.out %threads_lib +// RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// +// Since this is a multi-threaded application enable memory tracking and +// deferred release feature in the Level Zero plugin to avoid releasing memory +// too early. This is necessary because currently SYCL RT sets indirect access +// flag for all kernels and the Level Zero runtime doesn't support deferred +// release yet. +// Suppress runtime from printing out error messages, so that the test can +// match on assert message generated by the toolchains. +// RUN: env SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY=1 SYCL_PI_SUPPRESS_ERROR_MESSAGE=1 %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// Shouldn't fail on ACC as fallback assert isn't enqueued there +// RUN: %ACC_RUN_PLACEHOLDER %t.out &> %t.txt +// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --check-prefix=CHECK-ACC --input-file %t.txt +// +// CHECK: {{.*}}assert_in_simultaneous_kernels.hpp:13: void assertFunc(): {{.*}}[9,7,0], {{.*}}[0,0,0] +// CHECK-SAME: Assertion `false && "from assert statement"` failed. +// CHECK-NOT: The test ended. +// +// CHECK-ACC-NOT: {{.*}}assert_in_simultaneous_kernels.hpp:13: void assertFunc(): {{.*}} [9,7,0], {{.*}} [0,0,0] +// CHECK-ACC: The test ended. + +#include "assert_in_simultaneous_kernels.hpp" diff --git a/SYCL/Assert/assert_in_simultaneous_kernels.hpp b/SYCL/Assert/assert_in_simultaneous_kernels.hpp new file mode 100644 index 0000000000..8f6bb1e999 --- /dev/null +++ b/SYCL/Assert/assert_in_simultaneous_kernels.hpp @@ -0,0 +1,75 @@ +#include +#include +#include +#include +#include + +using namespace sycl; +using namespace sycl::access; + +static constexpr size_t NUM_THREADS = 4; +static constexpr size_t RANGE_SIZE = 1024; + +void assertFunc() { assert(false && "from assert statement"); } + +template void assertTest(queue *Q) { + Q->submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<2>{{RANGE_SIZE, RANGE_SIZE}, {1, 1}}, [=](nd_item<2> it) { + if (it.get_global_id(0) == 7 && it.get_global_id(1) == 9) + assertFunc(); + }); + }); + Q->wait(); +} + +void runTestForTid(queue *Q, size_t Tid) { + switch (Tid % 4) { + case 0: { + assertTest(Q); + break; + } + case 1: { + assertTest(Q); + break; + } + case 2: { + assertTest(Q); + break; + } + case 3: { + assertTest(Q); + break; + } + } +} + +int main(int Argc, const char *Argv[]) { + // On windows stderr output becomes messed up if several thread + // output simultaneously. Hence, setting explicit line buffering here. +#ifndef __SYCL_DEVICE_ONLY__ + if (setvbuf(stderr, nullptr, _IOLBF, BUFSIZ)) { + std::cerr << "Can't set line-buffering mode fo stderr\n"; + return 1; + } +#endif + + std::vector threadPool; + threadPool.reserve(NUM_THREADS); + + std::vector> Queues; + for (size_t i = 0; i < NUM_THREADS; ++i) { + Queues.push_back(std::make_unique()); + } + + for (size_t tid = 0; tid < NUM_THREADS; ++tid) { + threadPool.push_back(std::thread(runTestForTid, Queues[tid].get(), tid)); + } + + for (auto ¤tThread : threadPool) { + currentThread.join(); + } + + std::cout << "The test ended." << std::endl; + return 0; +} diff --git a/SYCL/Assert/assert_in_simultaneous_kernels_win.cpp b/SYCL/Assert/assert_in_simultaneous_kernels_win.cpp new file mode 100644 index 0000000000..647dc8ff3d --- /dev/null +++ b/SYCL/Assert/assert_in_simultaneous_kernels_win.cpp @@ -0,0 +1,26 @@ +// REQUIRES: windows +// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple %s -o %t.out %threads_lib +// RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// +// Since this is a multi-threaded application enable memory tracking and +// deferred release feature in the Level Zero plugin to avoid releasing memory +// too early. This is necessary because currently SYCL RT sets indirect access +// flag for all kernels and the Level Zero runtime doesn't support deferred +// release yet. +// RUN: env SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY=1 %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// Shouldn't fail on ACC as fallback assert isn't enqueued there +// RUN: %ACC_RUN_PLACEHOLDER %t.out &> %t.txt +// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --check-prefix=CHECK-ACC --input-file %t.txt +// +// FIXME Windows version prints '(null)' instead of '' once in a +// while for some insane reason. +// CHECK: {{.*}}assert_in_simultaneous_kernels.hpp:13: {{|(null)}}: global id: [9,7,0], local id: [0,0,0] +// CHECK-SAME: Assertion `false && "from assert statement"` failed. +// CHECK-NOT: The test ended. +// +// CHECK-ACC-NOT: {{.*}}assert_in_simultaneous_kernels.hpp:13: {{|(null)}}: global id: [9,7,0], local id: [0,0,0] +// CHECK-ACC: The test ended. + +#include "assert_in_simultaneous_kernels.hpp" diff --git a/SYCL/Assert/assert_in_simultaneously_multiple_tus.cpp b/SYCL/Assert/assert_in_simultaneously_multiple_tus.cpp new file mode 100644 index 0000000000..75fb682044 --- /dev/null +++ b/SYCL/Assert/assert_in_simultaneously_multiple_tus.cpp @@ -0,0 +1,115 @@ +// FIXME flaky fail on CUDA and HIP +// UNSUPPORTED: cuda || hip +// +// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple -I %S/Inputs %s %S/Inputs/kernels_in_file2.cpp -o %t.out %threads_lib +// RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// +// Since this is a multi-threaded application enable memory tracking and +// deferred release feature in the Level Zero plugin to avoid releasing memory +// too early. This is necessary because currently SYCL RT sets indirect access +// flag for all kernels and the Level Zero runtime doesn't support deferred +// release yet. +// Suppress runtime from printing out error messages, so that the test can +// match on assert message generated by the toolchains. +// RUN: env SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY=1 SYCL_PI_SUPPRESS_ERROR_MESSAGE=1 %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// Shouldn't fail on ACC as fallback assert isn't enqueued there +// RUN: %ACC_RUN_PLACEHOLDER %t.out &> %t.txt +// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --check-prefix=CHECK-ACC --input-file %t.txt +// +// CHECK: {{this message from file1|this message from file2}} +// CHECK-NOT: The test ended. +// +// CHECK-ACC-NOT: {{this message from file1|this message from file2}} +// CHECK-ACC: The test ended. + +#include "Inputs/kernels_in_file2.hpp" +#include +#include +#include +#include + +#ifdef DEFINE_NDEBUG_INFILE1 +#define NDEBUG +#else +#undef NDEBUG +#endif + +#include + +using namespace sycl; +using namespace sycl::access; + +static constexpr size_t NUM_THREADS = 4; +static constexpr size_t BUFFER_SIZE = 10; + +template void enqueueKernel(queue *Q) { + sycl::range<1> numOfItems{BUFFER_SIZE}; + sycl::buffer Buf(numOfItems); + + Q->submit([&](handler &CGH) { + auto Acc = Buf.template get_access(CGH); + + CGH.parallel_for(numOfItems, [=](sycl::id<1> wiID) { + Acc[wiID] = 0; + if (wiID == 5) + assert(false && "this message from file1"); + }); + }); +} + +void runTestForTid(queue *Q, size_t Tid) { + switch (Tid % 4) { + case 0: { + enqueueKernel(Q); + Q->wait(); + break; + } + case 1: { + enqueueKernel(Q); + Q->wait(); + break; + } + case 2: { + enqueueKernel_1_fromFile2(Q); + Q->wait(); + break; + } + case 3: { + enqueueKernel_2_fromFile2(Q); + Q->wait(); + break; + } + } +} + +int main(int Argc, const char *Argv[]) { +#ifndef __SYCL_DEVICE_ONLY__ + // On windows stderr output becomes messed up if several thread + // output simultaneously. Hence, setting explicit line buffering here. + if (setvbuf(stderr, nullptr, _IOLBF, BUFSIZ)) { + std::cerr << "Can't set line-buffering mode fo stderr\n"; + return 1; + } +#endif + + std::vector threadPool; + threadPool.reserve(NUM_THREADS); + + std::vector> Queues; + for (size_t i = 0; i < NUM_THREADS; ++i) { + Queues.push_back(std::make_unique()); + } + + for (size_t tid = 0; tid < NUM_THREADS; ++tid) { + threadPool.push_back(std::thread(runTestForTid, Queues[tid].get(), tid)); + } + + for (auto ¤tThread : threadPool) { + currentThread.join(); + } + + std::cout << "The test ended." << std::endl; + return 0; +} diff --git a/SYCL/Assert/assert_in_simultaneously_multiple_tus_one_ndebug.cpp b/SYCL/Assert/assert_in_simultaneously_multiple_tus_one_ndebug.cpp new file mode 100644 index 0000000000..6958189ab7 --- /dev/null +++ b/SYCL/Assert/assert_in_simultaneously_multiple_tus_one_ndebug.cpp @@ -0,0 +1,24 @@ +// FIXME flaky fail on CUDA +// UNSUPPORTED: cuda +// RUN: %clangxx -DSYCL_FALLBACK_ASSERT=1 -fsycl -fsycl-targets=%sycl_triple -DDEFINE_NDEBUG_INFILE2 -I %S/Inputs %S/assert_in_simultaneously_multiple_tus.cpp %S/Inputs/kernels_in_file2.cpp -o %t.out %threads_lib +// RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// +// Since this is a multi-threaded application enable memory tracking and +// deferred release feature in the Level Zero plugin to avoid releasing memory +// too early. This is necessary because currently SYCL RT sets indirect access +// flag for all kernels and the Level Zero runtime doesn't support deferred +// release yet. +// Suppress runtime from printing out error messages, so that the test can +// match on assert message generated by the toolchains. +// RUN: env SYCL_PI_LEVEL_ZERO_TRACK_INDIRECT_ACCESS_MEMORY=1 SYCL_PI_SUPPRESS_ERROR_MESSAGE=1 %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// Shouldn't fail on ACC as fallback assert isn't enqueued there +// RUN: %ACC_RUN_PLACEHOLDER %t.out &> %t.txt +// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --check-prefix=CHECK-ACC --input-file %t.txt +// +// CHECK: this message from file1 +// CHECK-NOT: this message from file2 +// CHECK-NOT: The test ended. +// +// CHECK-ACC: The test ended. diff --git a/SYCL/AsyncHandler/default_async_handler.cpp b/SYCL/AsyncHandler/default_async_handler.cpp new file mode 100644 index 0000000000..15f5a56b56 --- /dev/null +++ b/SYCL/AsyncHandler/default_async_handler.cpp @@ -0,0 +1,24 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %CPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// RUN: %GPU_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %GPU_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt +// RUN: %ACC_RUN_PLACEHOLDER %t.out &> %t.txt || true +// RUN: %ACC_RUN_PLACEHOLDER FileCheck %s --input-file %t.txt + +#include + +using namespace sycl; + +int main() { + queue Q; + Q.submit([&](handler &CGH) { + CGH.host_task([=]() { + throw std::runtime_error("Exception thrown from host_task."); + }); + }).wait_and_throw(); + return 0; +} + +// CHECK: Default async_handler caught exceptions: +// CHECK-NEXT: Exception thrown from host_task. diff --git a/SYCL/AtomicRef/accessor.cpp b/SYCL/AtomicRef/accessor.cpp new file mode 100644 index 0000000000..058d33e0d9 --- /dev/null +++ b/SYCL/AtomicRef/accessor.cpp @@ -0,0 +1,105 @@ +// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +#include +#include +#include +#include +#include +#include +using namespace sycl; +using namespace sycl::ext::oneapi; + +// Equivalent to add_test from add.cpp +// Uses atomic_accessor instead of atomic_ref +template void accessor_test(queue q, size_t N) { + T sum = 0; + std::vector output(N, 0); + { + buffer sum_buf(&sum, 1); + buffer output_buf(output.data(), output.size()); + + q.submit([&](handler &cgh) { +#if __cplusplus > 201402L + static_assert( + std::is_same>::value, + "atomic_accessor type incorrectly deduced"); +#endif + auto sum = + atomic_accessor( + sum_buf, cgh); + auto out = + output_buf.template get_access(cgh); + cgh.parallel_for(range<1>(N), [=](item<1> it) { + int gid = it.get_id(0); + static_assert( + std::is_same>::value, + "atomic_accessor returns incorrect atomic_ref"); + out[gid] = sum[0].fetch_add(T(1)); + }); + }); + } + + // All work-items increment by 1, so final value should be equal to N + assert(sum == N); + + // Intermediate values should be unique + std::sort(output.begin(), output.end()); + assert(std::unique(output.begin(), output.end()) == output.end()); + + // Fetch returns original value: will be in [0, N-1] + auto min_e = output[0]; + auto max_e = output[output.size() - 1]; + assert(min_e == 0 && max_e == N - 1); +} + +// Simplified form of accessor_test for local memory +template +void local_accessor_test(queue q, size_t N, size_t L = 8) { + assert(N % L == 0); + std::vector output(N / L, 0); + { + buffer output_buf(output.data(), output.size()); + q.submit([&](handler &cgh) { + auto sum = + atomic_accessor(1, cgh); + auto out = output_buf.template get_access(cgh); + cgh.parallel_for(nd_range<1>(N, L), [=](nd_item<1> it) { + int grp = it.get_group(0); + sum[0].store(0); + it.barrier(); + static_assert( + std::is_same>::value, + "local atomic_accessor returns incorrect atomic_ref"); + T result = sum[0].fetch_add(T(1)); + if (result == it.get_local_range(0) - 1) { + out[grp] = result; + } + }); + }); + } + + // All work-items increment by 1, and last in the group writes out old value + // All values should be L-1 + assert(std::all_of(output.begin(), output.end(), + [=](T x) { return x == L - 1; })); +} + +int main() { + queue q; + constexpr int N = 32; + accessor_test(q, N); + local_accessor_test(q, N); + std::cout << "Test passed." << std::endl; +} diff --git a/SYCL/AtomicRef/add.cpp b/SYCL/AtomicRef/add.cpp new file mode 100644 index 0000000000..103fa2d5ec --- /dev/null +++ b/SYCL/AtomicRef/add.cpp @@ -0,0 +1,11 @@ +// See https://github.com/intel/llvm-test-suite/issues/867 for detailed status +// UNSUPPORTED: hip + +// RUN: %clangxx -fsycl -fsycl-unnamed-lambda -fsycl-device-code-split=per_kernel -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include "add.h" + +int main() { add_test_all(); } diff --git a/SYCL/AtomicRef/add.h b/SYCL/AtomicRef/add.h new file mode 100644 index 0000000000..48f2434220 --- /dev/null +++ b/SYCL/AtomicRef/add.h @@ -0,0 +1,350 @@ +#pragma once + +#ifndef TEST_GENERIC_IN_LOCAL +#define TEST_GENERIC_IN_LOCAL 0 +#endif + +#include +#include +#include +#include +#include +#include +#include + +using namespace sycl; + +template