From 39c006a45fefa2172d3b7d18725d8f21584db159 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 28 Jun 2021 19:32:54 -0700 Subject: [PATCH 01/10] reduce poisson2d optimized.f90 problem size --- poisson2d/optimized.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poisson2d/optimized.f90 b/poisson2d/optimized.f90 index 3bc1045..65bfd09 100644 --- a/poisson2d/optimized.f90 +++ b/poisson2d/optimized.f90 @@ -19,7 +19,7 @@ pure real(dp) function rho(x,y) program poisson use rhofunc, only: rho implicit none -integer, parameter :: dp=kind(0.d0), M=300 +integer, parameter :: dp=kind(0.d0), M=150 integer :: i,j, iter real(dp),parameter :: epsilon0=8.85E-12_dp, target=1E-6_dp, a=0.01 real(dp) :: delta, b, e, phiprime(M,M), phi(M,M), a2, rhoarr(M,M) From a7ccf54291d89fcf346f3f03be05d2393d12d2c7 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 28 Jun 2021 19:55:55 -0700 Subject: [PATCH 02/10] feat: add modernized poisson2d solver Refactorings ------------ 1. Use BLOCK for tighter variable scoping (reduces some clutter). 2. Use array statements with vector subscripts to write compact initializations. 3. Use DO CONCURRENT to express fine-grained parallelism and expose opportunities for optimizations such as vectorization, multithreading, or GPU offloading. 4. Replace some magic values, e.g., initial delta. 5. Eliminate some unnecessary computation by initializing and updating only values that actually need initializing and updating. 6. Use more compact conditional logic. 7. Use a real kind specificaiton based on precision requirements rather than the ambiguously defined double-precision representation of any one given compiler on any one given platform. 8. Eliminate unnecessary precision specifiers ("_dp") when an expression is simply a constant that is exactly representable in any reasonable floating-point representation. 9. Separate the procedure interface into a module and the procedure definition into a submodule.** 10. Use a more descriptive variable names in a few places. 11. Add comments! (includeing some in FORD style) ** In larger projects, item 9 makes code much more user-friendly by isolation the application programmer interface (API) and reduces compile times by eliminating unnecessary compilation cascades. --- poisson2d/modern.f90 | 79 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 poisson2d/modern.f90 diff --git a/poisson2d/modern.f90 b/poisson2d/modern.f90 new file mode 100644 index 0000000..470232f --- /dev/null +++ b/poisson2d/modern.f90 @@ -0,0 +1,79 @@ +module rho_interface + !! Define a scalar function over 2-space. + implicit none + + private + public :: rho, dp + + integer, parameter :: precision = 15, range = 307 + integer, parameter :: dp = selected_real_kind(precision, range) + + interface + pure real(dp) module function rho(x,y) + !! Poisson equation inhomogeneous term. + implicit none + real(dp), intent(in) :: x,y + end function + end interface + +end module + +submodule(rho_interface) rho_definition + implicit none +contains + module procedure rho + !! To Do: give meaningful names to the magic numbers. + !! See https://en.wikipedia.org/wiki/Magic_number_(programming)#Unnamed_numerical_constants. + if (all([x,y]>0.6_dp .and. [x,y]<0.8_dp)) then + rho = 1. + else + rho = merge(-1., 0., all([x,y]>0.2_dp .and. [x,y]<0.4_dp)) + end if + end procedure +end submodule + +program poisson + !! Solve the 2D Poisson equation using a smoothing operation to iterate. + use rho_interface, only: rho, dp + implicit none + + integer i,j + integer, parameter :: M=150 + real(dp), parameter :: dx=0.01 + real(dp) :: dy=dx + real(dp) :: t_start, rho_sampled(M,M) + + call cpu_time(t_start) + + do concurrent(i=1:M, j=1:M) + rho_sampled(i,j) = rho(i*dx,j*dy) + end do + + block ! Tighten the scoping to declutter the code above. + real(dp) :: delta_phi, t_end + real(dp), parameter :: epsilon0=8.85E-12_dp, tolerance=1E-6_dp + real(dp), dimension(M,M) :: phi_prime, phi + integer iteration + + phi = 0. + + phi_prime([1,M], 2:M-1) = phi([1,M], 2:M-1) ! Initialize only boundary values except corners. (Internal values will + phi_prime(2:M-1, [1,M]) = phi(2:M-1, [1,M]) ! be overwritten in the first iteration. Corners will never be used.) + + delta_phi = tolerance + epsilon(tolerance) ! Ensure at least 1 iteration. + iteration = 0 + do while (delta_phi > tolerance ) + iteration = iteration + 1 + do concurrent(i=2:M-1, j=2:M-1) ! Compute updated solution estimate at internal points. + phi_prime(i,j) = (phi(i+1,j) + phi(i-1,j) + phi(i,j+1) + phi(i,j-1))/4._dp + (dx/2._dp)*(dy/2._dp)/epsilon0*rho_sampled(i,j) + end do + delta_phi = maxval(abs(phi_prime - phi)) + phi(2:M-1, 2:M-1) = phi_prime(2:M-1, 2:M-1) ! Update internal values. + end do + + call cpu_time(t_end) + print *, t_end-t_start, iteration + + end block + +end program \ No newline at end of file From da910c0132c604a550387553d0c1a220e07921e7 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 28 Jun 2021 20:03:31 -0700 Subject: [PATCH 03/10] rename optimized.f90 to archaic.f90 This code is old-fashioned, verbose, and most importantly, less clealry expressive, particularly in terms of exposing optimization opportunties, which makes the file name a misnomer. By contrast, commit a7ccf54 introduces a modern.f90 refactored to expose concurrency more explicitly through the use of `do concurrent` and array statements, both of which express sets of operations that can be safely executed in any order, opening up opportunities for vectorization, multithreading, and GPU offloading. In timings on a 2.3 GHz 8-Core Intel Core i9, modern.f90 executes in roughly the same time -- possibly 5-10% longer based on informal ad hoc testing -- but is considerably more compact in many places, is arguably more clear and expressive. --- poisson2d/{optimized.f90 => archaic.f90} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename poisson2d/{optimized.f90 => archaic.f90} (100%) diff --git a/poisson2d/optimized.f90 b/poisson2d/archaic.f90 similarity index 100% rename from poisson2d/optimized.f90 rename to poisson2d/archaic.f90 From 648739c7b3e5cdd6139bc77e30c26ae888935a1c Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 28 Jun 2021 20:36:31 -0700 Subject: [PATCH 04/10] feat: make dy immutable via expression-association --- poisson2d/modern.f90 | 58 +++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 27 deletions(-) diff --git a/poisson2d/modern.f90 b/poisson2d/modern.f90 index 470232f..0a34f07 100644 --- a/poisson2d/modern.f90 +++ b/poisson2d/modern.f90 @@ -40,40 +40,44 @@ program poisson integer i,j integer, parameter :: M=150 real(dp), parameter :: dx=0.01 - real(dp) :: dy=dx real(dp) :: t_start, rho_sampled(M,M) call cpu_time(t_start) - do concurrent(i=1:M, j=1:M) - rho_sampled(i,j) = rho(i*dx,j*dy) - end do + associate( dy => (dx) ) ! Associating with an expression provides immutable state so dy cannot be inadvertently redefined. + + do concurrent(i=1:M, j=1:M) + rho_sampled(i,j) = rho(i*dx,j*dy) + end do - block ! Tighten the scoping to declutter the code above. - real(dp) :: delta_phi, t_end - real(dp), parameter :: epsilon0=8.85E-12_dp, tolerance=1E-6_dp - real(dp), dimension(M,M) :: phi_prime, phi - integer iteration - - phi = 0. - - phi_prime([1,M], 2:M-1) = phi([1,M], 2:M-1) ! Initialize only boundary values except corners. (Internal values will - phi_prime(2:M-1, [1,M]) = phi(2:M-1, [1,M]) ! be overwritten in the first iteration. Corners will never be used.) - - delta_phi = tolerance + epsilon(tolerance) ! Ensure at least 1 iteration. - iteration = 0 - do while (delta_phi > tolerance ) - iteration = iteration + 1 - do concurrent(i=2:M-1, j=2:M-1) ! Compute updated solution estimate at internal points. - phi_prime(i,j) = (phi(i+1,j) + phi(i-1,j) + phi(i,j+1) + phi(i,j-1))/4._dp + (dx/2._dp)*(dy/2._dp)/epsilon0*rho_sampled(i,j) + block ! Tighten the scoping to declutter the code above. + real(dp) :: delta_phi, t_end + real(dp), parameter :: epsilon0=8.85E-12_dp, tolerance=1E-6_dp + real(dp), dimension(M,M) :: phi_prime, phi + integer iteration + + phi = 0. + + phi_prime([1,M], 2:M-1) = phi([1,M], 2:M-1) ! Initialize only boundary values except corners. (Internal values will + phi_prime(2:M-1, [1,M]) = phi(2:M-1, [1,M]) ! be overwritten in the first iteration. Corners will never be used.) + + delta_phi = tolerance + epsilon(tolerance) ! Ensure at least 1 iteration. + iteration = 0 + do while (delta_phi > tolerance ) + iteration = iteration + 1 + do concurrent(i=2:M-1, j=2:M-1) ! Compute updated solution estimate at internal points. + phi_prime(i,j) = (phi(i+1,j) + phi(i-1,j) + phi(i,j+1) + phi(i,j-1))/4._dp & + + (dx/2._dp)*(dy/2._dp)/epsilon0*rho_sampled(i,j) + end do + delta_phi = maxval(abs(phi_prime - phi)) + phi(2:M-1, 2:M-1) = phi_prime(2:M-1, 2:M-1) ! Update internal values. end do - delta_phi = maxval(abs(phi_prime - phi)) - phi(2:M-1, 2:M-1) = phi_prime(2:M-1, 2:M-1) ! Update internal values. - end do - call cpu_time(t_end) - print *, t_end-t_start, iteration + call cpu_time(t_end) + print *, t_end-t_start, iteration + + end block - end block + end associate end program \ No newline at end of file From f8768cd1d3f566e07cab8b408f932100fc0f7562 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 28 Jun 2021 21:53:05 -0700 Subject: [PATCH 05/10] Update poisson2d/modern.f90 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Either is fine. I'm a minimalist so I use the first one because the `_dp` isn't required when the real literal is exactly representable in the chosen floating-post representation, which presumably is the case for `1.` in any floating-point scheme. Co-authored-by: Ondřej Čertík --- poisson2d/modern.f90 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/poisson2d/modern.f90 b/poisson2d/modern.f90 index 0a34f07..70d8f34 100644 --- a/poisson2d/modern.f90 +++ b/poisson2d/modern.f90 @@ -25,7 +25,7 @@ pure real(dp) module function rho(x,y) !! To Do: give meaningful names to the magic numbers. !! See https://en.wikipedia.org/wiki/Magic_number_(programming)#Unnamed_numerical_constants. if (all([x,y]>0.6_dp .and. [x,y]<0.8_dp)) then - rho = 1. + rho = 1._dp else rho = merge(-1., 0., all([x,y]>0.2_dp .and. [x,y]<0.4_dp)) end if @@ -80,4 +80,4 @@ program poisson end associate -end program \ No newline at end of file +end program From 5563ae2896319a8afff1bb5c6c554f72af1c0a72 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 28 Jun 2021 21:54:07 -0700 Subject: [PATCH 06/10] Update poisson2d/modern.f90 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I concur that `_dp` is required in this one. Co-authored-by: Ondřej Čertík --- poisson2d/modern.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poisson2d/modern.f90 b/poisson2d/modern.f90 index 70d8f34..ac2fcab 100644 --- a/poisson2d/modern.f90 +++ b/poisson2d/modern.f90 @@ -39,7 +39,7 @@ program poisson integer i,j integer, parameter :: M=150 - real(dp), parameter :: dx=0.01 + real(dp), parameter :: dx=0.01_dp real(dp) :: t_start, rho_sampled(M,M) call cpu_time(t_start) From 7b63bc909df6cc4c73724f120f8a9a9983701a83 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Mon, 28 Jun 2021 21:57:26 -0700 Subject: [PATCH 07/10] Update poisson2d/modern.f90 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Ondřej Čertík --- poisson2d/modern.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poisson2d/modern.f90 b/poisson2d/modern.f90 index ac2fcab..44249c4 100644 --- a/poisson2d/modern.f90 +++ b/poisson2d/modern.f90 @@ -27,7 +27,7 @@ pure real(dp) module function rho(x,y) if (all([x,y]>0.6_dp .and. [x,y]<0.8_dp)) then rho = 1._dp else - rho = merge(-1., 0., all([x,y]>0.2_dp .and. [x,y]<0.4_dp)) + rho = merge(-1._dp, 0._dp, all([x,y]>0.2_dp .and. [x,y]<0.4_dp)) end if end procedure end submodule From f5ec2182b80c24ad00cc5edd5d246d0e5a9dc52e Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Tue, 29 Jun 2021 10:40:01 -0700 Subject: [PATCH 08/10] Update poisson2d/modern.f90 Co-authored-by: Milan Curcic --- poisson2d/modern.f90 | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/poisson2d/modern.f90 b/poisson2d/modern.f90 index 44249c4..0328fda 100644 --- a/poisson2d/modern.f90 +++ b/poisson2d/modern.f90 @@ -65,10 +65,8 @@ program poisson iteration = 0 do while (delta_phi > tolerance ) iteration = iteration + 1 - do concurrent(i=2:M-1, j=2:M-1) ! Compute updated solution estimate at internal points. - phi_prime(i,j) = (phi(i+1,j) + phi(i-1,j) + phi(i,j+1) + phi(i,j-1))/4._dp & - + (dx/2._dp)*(dy/2._dp)/epsilon0*rho_sampled(i,j) - end do + phi_prime(2:M-1,2:M-1) = (phi(3:,2:M-1) + phi(:M-2,2:M-1) + phi(2:M-1,3:) + phi(2:M-1,1:M-2))/4._dp & + + (dx/2._dp)*(dy/2._dp)/epsilon0*rho_sampled(2:M-1,2:M-1) delta_phi = maxval(abs(phi_prime - phi)) phi(2:M-1, 2:M-1) = phi_prime(2:M-1, 2:M-1) ! Update internal values. end do From bd22c454b0e40f1e04c16d716ec933ef21097903 Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Tue, 29 Jun 2021 10:43:16 -0700 Subject: [PATCH 09/10] Update poisson2d/modern.f90 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Ondřej Čertík --- poisson2d/modern.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poisson2d/modern.f90 b/poisson2d/modern.f90 index 0328fda..d1d302f 100644 --- a/poisson2d/modern.f90 +++ b/poisson2d/modern.f90 @@ -56,7 +56,7 @@ program poisson real(dp), dimension(M,M) :: phi_prime, phi integer iteration - phi = 0. + phi = 0 phi_prime([1,M], 2:M-1) = phi([1,M], 2:M-1) ! Initialize only boundary values except corners. (Internal values will phi_prime(2:M-1, [1,M]) = phi(2:M-1, [1,M]) ! be overwritten in the first iteration. Corners will never be used.) From fcb0cd00f5621b55cddbbd4d3b9817ecd7e124ec Mon Sep 17 00:00:00 2001 From: Damian Rouson Date: Tue, 29 Jun 2021 10:46:21 -0700 Subject: [PATCH 10/10] Update poisson2d/modern.f90 Co-authored-by: Milan Curcic --- poisson2d/modern.f90 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/poisson2d/modern.f90 b/poisson2d/modern.f90 index d1d302f..d9291c5 100644 --- a/poisson2d/modern.f90 +++ b/poisson2d/modern.f90 @@ -54,7 +54,7 @@ program poisson real(dp) :: delta_phi, t_end real(dp), parameter :: epsilon0=8.85E-12_dp, tolerance=1E-6_dp real(dp), dimension(M,M) :: phi_prime, phi - integer iteration + integer :: iteration phi = 0