From 030fb1af0147ff1b06eb8e58b1cddde98ef31dbf Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 9 Jun 2017 14:46:16 -0400 Subject: [PATCH 1/4] CMake example. --- posts/cmake/CMakeLists.txt | 47 +++++++++++++++++ posts/cmake/particle.cu | 57 ++++++++++++++++++++ posts/cmake/particle.h | 46 +++++++++++++++++ posts/cmake/test.cu | 103 +++++++++++++++++++++++++++++++++++++ posts/cmake/v3.cu | 59 +++++++++++++++++++++ posts/cmake/v3.h | 45 ++++++++++++++++ 6 files changed, 357 insertions(+) create mode 100644 posts/cmake/CMakeLists.txt create mode 100644 posts/cmake/particle.cu create mode 100644 posts/cmake/particle.h create mode 100644 posts/cmake/test.cu create mode 100644 posts/cmake/v3.cu create mode 100644 posts/cmake/v3.h diff --git a/posts/cmake/CMakeLists.txt b/posts/cmake/CMakeLists.txt new file mode 100644 index 0000000..859b56a --- /dev/null +++ b/posts/cmake/CMakeLists.txt @@ -0,0 +1,47 @@ + +cmake_minimum_required(VERSION 3.8 FATAL_ERROR) +project(cmake_and_cuda LANGUAGES CXX CUDA) + +include(CTest) + +add_library(particles STATIC + particle.cu + particle.h + v3.cu + v3.h + ) + +# Request that particles be built with -std=c++11 +# As this is a public compile feature anything that links to particles +# will also build with -std=c++11 +target_compile_features(particles PUBLIC cxx_std_11) + +# We need to explicitly state that we need all CUDA files in the particle +# library to be built with -dc as the member functions could be called by +# other libraries and executables +set_target_properties( particles + PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + +if(BUILD_TESTING) + + add_executable(particle_test test.cu) + + set_target_properties(particle_test PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + target_link_libraries(particle_test PRIVATE particles) + + add_test(NAME particles_10k COMMAND particle_test 10000 ) + add_test(NAME particles_256k COMMAND particle_test 256000 ) + + if(APPLE) + # We need to add the default path to the driver (libcuda.dylib) as an rpath, + # so that the static cuda runtime can find it at runtime. + target_link_libraries(particle_test + PRIVATE + "-Wl,-rpath,${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" + ) + endif() + +endif() + + + diff --git a/posts/cmake/particle.cu b/posts/cmake/particle.cu new file mode 100644 index 0000000..db05616 --- /dev/null +++ b/posts/cmake/particle.cu @@ -0,0 +1,57 @@ +/* Copyright (c) 1993-2015, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "particle.h" + +particle::particle() : position(), velocity(), totalDistance(1,0,0) +{ +} + +__device__ __host__ +void particle::advance(float d) +{ + velocity.normalize(); + float dx = d * velocity.x; + position.x += dx; + totalDistance.x += dx; + float dy = d * velocity.y; + position.y += dy; + totalDistance.y += dy; + float dz = d * velocity.z; + position.z += dz; + totalDistance.z += dz; +// #if __CUDA_ARCH__ +// int idx = threadIdx.x + blockIdx.x*blockDim.x; +// if(idx == 0) +// { +// printf("totalDistance: %f\n", totalDistance.x ); +// } +// #endif + velocity.scramble(); +} + +const v3& particle::getTotalDistance() const +{ return totalDistance; } diff --git a/posts/cmake/particle.h b/posts/cmake/particle.h new file mode 100644 index 0000000..a903ab0 --- /dev/null +++ b/posts/cmake/particle.h @@ -0,0 +1,46 @@ +/* Copyright (c) 1993-2015, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __particle_h__ +#define __particle_h__ + +#include "v3.h" + +class particle +{ + private: + v3 position; + v3 velocity; + v3 totalDistance; + + public: + particle(); + __host__ __device__ void advance(float dist); + const v3& getTotalDistance() const; + +}; + +#endif diff --git a/posts/cmake/test.cu b/posts/cmake/test.cu new file mode 100644 index 0000000..a74fdd6 --- /dev/null +++ b/posts/cmake/test.cu @@ -0,0 +1,103 @@ +/* Copyright (c) 1993-2015, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include "particle.h" +#include +#include + +__global__ void advanceParticles(float dt, particle * pArray, int nParticles) +{ + int idx = threadIdx.x + blockIdx.x*blockDim.x; + if(idx < nParticles) + { + pArray[idx].advance(dt); + } +} + +int main(int argc, char ** argv) +{ + cudaError_t error; + int n = 1000000; + if(argc > 1) { n = atoi(argv[1]);} // Number of particles + if(argc > 2) { srand(atoi(argv[2])); } // Random seed + + error = cudaGetLastError(); + if (error != cudaSuccess) + { + printf("0 %s\n",cudaGetErrorString(error)); + exit(1); + } + + particle * pArray = new particle[n]; + particle * devPArray = NULL; + cudaMalloc(&devPArray, n*sizeof(particle)); + cudaDeviceSynchronize(); error = cudaGetLastError(); + if (error != cudaSuccess) + { + printf("1 %s\n",cudaGetErrorString(error)); + exit(1); + } + + cudaMemcpy(devPArray, pArray, n*sizeof(particle), cudaMemcpyHostToDevice); + cudaDeviceSynchronize(); error = cudaGetLastError(); + if (error != cudaSuccess) + { + printf("2 %s\n",cudaGetErrorString(error)); + exit(1); + } + + for(int i=0; i<100; i++) + { + float dt = (float)rand()/(float) RAND_MAX; // Random distance each step + advanceParticles<<< 1 + n/256, 256>>>(dt, devPArray, n); + error = cudaGetLastError(); + if (error != cudaSuccess) + { + printf("3 %s\n",cudaGetErrorString(error)); + exit(1); + } + + cudaDeviceSynchronize(); + } + cudaMemcpy(pArray, devPArray, n*sizeof(particle), cudaMemcpyDeviceToHost); + + v3 totalDistance(0,0,0); + v3 temp; + for(int i=0; i + +v3::v3() +{ randomize(); } + +v3::v3(float xIn, float yIn, float zIn) : x(xIn), y(yIn), z(zIn) +{} + +void v3::randomize() +{ + x = (float)rand() / (float)RAND_MAX; + y = (float)rand() / (float)RAND_MAX; + z = (float)rand() / (float)RAND_MAX; +} + +__host__ __device__ void v3::normalize() +{ + float t = sqrt(x*x + y*y + z*z); + x /= t; + y /= t; + z /= t; +} + +__host__ __device__ void v3::scramble() +{ + float tx = 0.317f*(x + 1.0) + y + z * x * x + y + z; + float ty = 0.619f*(y + 1.0) + y * y + x * y * z + y + x; + float tz = 0.124f*(z + 1.0) + z * y + x * y * z + y + x; + x = tx; + y = ty; + z = tz; +} diff --git a/posts/cmake/v3.h b/posts/cmake/v3.h new file mode 100644 index 0000000..d8c20a8 --- /dev/null +++ b/posts/cmake/v3.h @@ -0,0 +1,45 @@ +/* Copyright (c) 1993-2015, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __v3_h__ +#define __v3_h__ + +class v3 +{ +public: + float x; + float y; + float z; + + v3(); + v3(float xIn, float yIn, float zIn); + void randomize(); + __host__ __device__ void normalize(); + __host__ __device__ void scramble(); + +}; + +#endif \ No newline at end of file From 77d0dc5c700d4e3c2cc66cac321dff8cc5f3b492 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 13 Jul 2017 11:30:13 -0400 Subject: [PATCH 2/4] Extend cmake example --- posts/cmake/CMakeLists.txt | 9 +++------ posts/cmake/particle.cu | 13 +++---------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/posts/cmake/CMakeLists.txt b/posts/cmake/CMakeLists.txt index 859b56a..13cfaf4 100644 --- a/posts/cmake/CMakeLists.txt +++ b/posts/cmake/CMakeLists.txt @@ -20,7 +20,8 @@ target_compile_features(particles PUBLIC cxx_std_11) # library to be built with -dc as the member functions could be called by # other libraries and executables set_target_properties( particles - PROPERTIES CUDA_SEPARABLE_COMPILATION ON) + PROPERTIES CUDA_SEPARABLE_COMPILATION ON + ) if(BUILD_TESTING) @@ -35,12 +36,8 @@ if(BUILD_TESTING) if(APPLE) # We need to add the default path to the driver (libcuda.dylib) as an rpath, # so that the static cuda runtime can find it at runtime. - target_link_libraries(particle_test - PRIVATE - "-Wl,-rpath,${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" - ) + set_property(TARGET particle_test PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) endif() - endif() diff --git a/posts/cmake/particle.cu b/posts/cmake/particle.cu index db05616..87f7756 100644 --- a/posts/cmake/particle.cu +++ b/posts/cmake/particle.cu @@ -34,22 +34,15 @@ __device__ __host__ void particle::advance(float d) { velocity.normalize(); - float dx = d * velocity.x; + auto dx = d * velocity.x; position.x += dx; totalDistance.x += dx; - float dy = d * velocity.y; + auto dy = d * velocity.y; position.y += dy; totalDistance.y += dy; - float dz = d * velocity.z; + auto dz = d * velocity.z; position.z += dz; totalDistance.z += dz; -// #if __CUDA_ARCH__ -// int idx = threadIdx.x + blockIdx.x*blockDim.x; -// if(idx == 0) -// { -// printf("totalDistance: %f\n", totalDistance.x ); -// } -// #endif velocity.scramble(); } From 3a39a446249f94b0904aefbbe8fc8218a126188d Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 13 Jul 2017 11:50:18 -0400 Subject: [PATCH 3/4] Add a C++ file to the CMake example. --- posts/cmake/CMakeLists.txt | 2 ++ posts/cmake/randomize.cpp | 42 ++++++++++++++++++++++++++++++++++++++ posts/cmake/randomize.h | 32 +++++++++++++++++++++++++++++ posts/cmake/v3.cu | 9 ++++---- 4 files changed, 81 insertions(+), 4 deletions(-) create mode 100644 posts/cmake/randomize.cpp create mode 100644 posts/cmake/randomize.h diff --git a/posts/cmake/CMakeLists.txt b/posts/cmake/CMakeLists.txt index 13cfaf4..0e39e4c 100644 --- a/posts/cmake/CMakeLists.txt +++ b/posts/cmake/CMakeLists.txt @@ -5,6 +5,8 @@ project(cmake_and_cuda LANGUAGES CXX CUDA) include(CTest) add_library(particles STATIC + randomize.cpp + randomize.h particle.cu particle.h v3.cu diff --git a/posts/cmake/randomize.cpp b/posts/cmake/randomize.cpp new file mode 100644 index 0000000..10b6162 --- /dev/null +++ b/posts/cmake/randomize.cpp @@ -0,0 +1,42 @@ +/* Copyright (c) 1993-2015, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +namespace { + std::random_device randDevice; + std::minstd_rand engine(randDevice()); + std::normal_distribution normalDist(0, RAND_MAX); +} + + +void randomize(float& x, float& y, float& z) +{ + x = normalDist(engine); + y = normalDist(engine); + z = normalDist(engine); +} diff --git a/posts/cmake/randomize.h b/posts/cmake/randomize.h new file mode 100644 index 0000000..b726379 --- /dev/null +++ b/posts/cmake/randomize.h @@ -0,0 +1,32 @@ +/* Copyright (c) 1993-2015, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of NVIDIA CORPORATION nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef __randomize_h__ +#define __randomize_h__ + +void randomize(float& x, float& y, float& z); + +#endif diff --git a/posts/cmake/v3.cu b/posts/cmake/v3.cu index db7c35b..f1523eb 100644 --- a/posts/cmake/v3.cu +++ b/posts/cmake/v3.cu @@ -25,19 +25,20 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #include "v3.h" +#include "randomize.h" #include v3::v3() -{ randomize(); } +{ + ::randomize(x, y, z); +} v3::v3(float xIn, float yIn, float zIn) : x(xIn), y(yIn), z(zIn) {} void v3::randomize() { - x = (float)rand() / (float)RAND_MAX; - y = (float)rand() / (float)RAND_MAX; - z = (float)rand() / (float)RAND_MAX; + ::randomize(x, y, z); } __host__ __device__ void v3::normalize() From e91effd69fca1b55cd00ded7bd1f7caaff6f248c Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Fri, 28 Jul 2017 15:28:29 -0400 Subject: [PATCH 4/4] Add cmake ptx example showing building and conversion to C header. The cmake_ptx example shows the conversion of multiple .cu files into ptx files. Once we have all the ptx files, we convert them into a C header using bin2c for embedding into an application. Lastly we show how you can also tell CMake to install ptx files, so that projects can distribute ptx files. --- posts/cmake_ptx/CMakeLists.txt | 62 +++++++++++++++++++++++++++++ posts/cmake_ptx/bin2c_wrapper.cmake | 19 +++++++++ posts/cmake_ptx/kernelA.cu | 7 ++++ posts/cmake_ptx/kernelB.cu | 8 ++++ posts/cmake_ptx/main.cu | 19 +++++++++ 5 files changed, 115 insertions(+) create mode 100644 posts/cmake_ptx/CMakeLists.txt create mode 100644 posts/cmake_ptx/bin2c_wrapper.cmake create mode 100644 posts/cmake_ptx/kernelA.cu create mode 100644 posts/cmake_ptx/kernelB.cu create mode 100644 posts/cmake_ptx/main.cu diff --git a/posts/cmake_ptx/CMakeLists.txt b/posts/cmake_ptx/CMakeLists.txt new file mode 100644 index 0000000..5372e4f --- /dev/null +++ b/posts/cmake_ptx/CMakeLists.txt @@ -0,0 +1,62 @@ +cmake_minimum_required(VERSION 3.8) +project (ExportPTX CUDA) + +#Goal for this example: +# How to generate PTX files instead of OBJECT files +# How to convert PTX files into a C header using bin2c +# How to install PTX files + +add_library(CudaPTX OBJECT kernelA.cu kernelB.cu) +set_property(TARGET CudaPTX PROPERTY CUDA_PTX_COMPILATION ON) + +#We are going to need a wrapper around bin2c for multiple reasons +# 1. bin2c only converts a single file at a time +# 2. bin2c has only standard out support, so we have to manually +# redirect to a cmake buffer +# 3. We want to pack everything into a single output file, so we +# need to also pass the --name option +set(output_file ${CMAKE_CURRENT_BINARY_DIR}/embedded_objs.h) + +get_filename_component(cuda_compiler_bin "${CMAKE_CUDA_COMPILER}" DIRECTORY) +find_program(bin_to_c + NAMES bin2c + PATHS ${cuda_compiler_bin} + ) +if(NOT bin_to_c) + message(FATAL_ERROR + "bin2c not found:\n" + " CMAKE_CUDA_COMPILER='${CMAKE_CUDA_COMPILER}'\n" + " cuda_compiler_bin='${cuda_compiler_bin}'\n" + ) +endif() + +add_custom_command( + OUTPUT "${output_file}" + COMMAND ${CMAKE_COMMAND} + "-DBIN_TO_C_COMMAND=${bin_to_c}" + "-DOBJECTS=$" + "-DOUTPUT=${output_file}" + -P ${CMAKE_CURRENT_SOURCE_DIR}/bin2c_wrapper.cmake + VERBATIM + DEPENDS $ + COMMENT "Converting Object files to a C header" + ) + +add_executable(ExportPTX main.cu ${output_file}) +add_dependencies(ExportPTX CudaPTX) +target_include_directories(ExportPTX PRIVATE + ${CMAKE_CURRENT_BINARY_DIR} ) + +if(APPLE) + # Help the static cuda runtime find the driver (libcuda.dyllib) at runtime. + set_property(TARGET ExportPTX PROPERTY BUILD_RPATH ${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}) +endif() + +#Install the raw PTX files into a ptx directory +install(TARGETS CudaPTX ExportPTX + EXPORT cudaPTX + RUNTIME DESTINATION bin + LIBRARY DESTINATION lib + OBJECTS DESTINATION ptx + ) +install(EXPORT cudaPTX DESTINATION lib/cudaPTX) diff --git a/posts/cmake_ptx/bin2c_wrapper.cmake b/posts/cmake_ptx/bin2c_wrapper.cmake new file mode 100644 index 0000000..0baf934 --- /dev/null +++ b/posts/cmake_ptx/bin2c_wrapper.cmake @@ -0,0 +1,19 @@ + +set(file_contents) +foreach(obj ${OBJECTS}) + get_filename_component(obj_ext ${obj} EXT) + get_filename_component(obj_name ${obj} NAME_WE) + get_filename_component(obj_dir ${obj} DIRECTORY) + + if(obj_ext MATCHES ".ptx") + set(args --name ${obj_name} ${obj}) + execute_process(COMMAND "${BIN_TO_C_COMMAND}" ${args} + WORKING_DIRECTORY ${obj_dir} + RESULT_VARIABLE result + OUTPUT_VARIABLE output + ERROR_VARIABLE error_var + ) + set(file_contents "${file_contents} \n${output}") + endif() +endforeach() +file(WRITE "${OUTPUT}" "${file_contents}") diff --git a/posts/cmake_ptx/kernelA.cu b/posts/cmake_ptx/kernelA.cu new file mode 100644 index 0000000..fbe0d26 --- /dev/null +++ b/posts/cmake_ptx/kernelA.cu @@ -0,0 +1,7 @@ + +__global__ void kernelA(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/posts/cmake_ptx/kernelB.cu b/posts/cmake_ptx/kernelB.cu new file mode 100644 index 0000000..11872e4 --- /dev/null +++ b/posts/cmake_ptx/kernelB.cu @@ -0,0 +1,8 @@ + + +__global__ void kernelB(float* r, float* x, float* y, float* z, int size) +{ + for (int i = threadIdx.x; i < size; i += blockDim.x) { + r[i] = x[i] * y[i] + z[i]; + } +} diff --git a/posts/cmake_ptx/main.cu b/posts/cmake_ptx/main.cu new file mode 100644 index 0000000..1b446b9 --- /dev/null +++ b/posts/cmake_ptx/main.cu @@ -0,0 +1,19 @@ + +#include + +#include "embedded_objs.h" + +int main(int argc, char** argv) +{ + (void)argc; + (void)argv; + + unsigned char* ka = kernelA; + unsigned char* kb = kernelB; + if(ka != NULL && kb != NULL) + { + std::cout << "loaded ptx files." << std::endl; + return 0; + } + return 1; +}