diff --git a/README.md b/README.md index 905ff217..d2cff5b1 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,7 @@ Read the paper [here](https://arxiv.org/abs/1902.06714). | Dense (fully-connected) | `dense` | `input1d`, `flatten` | 1 | ✅ | ✅ | | Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) | | Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅ | -| Flatten | `flatten` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 | ✅ | ✅ | +| Flatten | `flatten` | `input2d`, `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 | ✅ | ✅ | | Reshape (1-d to 3-d) | `reshape` | `input1d`, `dense`, `flatten` | 3 | ✅ | ✅ | (*) See Issue [#145](https://github.com/modern-fortran/neural-fortran/issues/145) regarding non-converging CNN training on the MNIST dataset. diff --git a/fpm.toml b/fpm.toml index cab0d5d5..ebcceeb6 100644 --- a/fpm.toml +++ b/fpm.toml @@ -4,3 +4,6 @@ license = "MIT" author = "Milan Curcic" maintainer = "mcurcic@miami.edu" copyright = "Copyright 2018-2025, neural-fortran contributors" + +[preprocess] +[preprocess.cpp] diff --git a/src/nf/nf_flatten_layer.f90 b/src/nf/nf_flatten_layer.f90 index 38e38098..c7c83cda 100644 --- a/src/nf/nf_flatten_layer.f90 +++ b/src/nf/nf_flatten_layer.f90 @@ -18,7 +18,8 @@ module nf_flatten_layer integer, allocatable :: input_shape(:) integer :: output_size - real, allocatable :: gradient(:,:,:) + real, allocatable :: gradient_2d(:,:) + real, allocatable :: gradient_3d(:,:,:) real, allocatable :: output(:) contains @@ -40,23 +41,23 @@ end function flatten_layer_cons interface pure module subroutine backward(self, input, gradient) - !! Apply the backward pass to the flatten layer. - !! This is a reshape operation from 1-d gradient to 3-d input. + !! Apply the backward pass to the flatten layer for 2D and 3D input. + !! This is a reshape operation from 1-d gradient to 2-d and 3-d input. class(flatten_layer), intent(in out) :: self !! Flatten layer instance - real, intent(in) :: input(:,:,:) + real, intent(in) :: input(..) !! Input from the previous layer real, intent(in) :: gradient(:) !! Gradient from the next layer end subroutine backward pure module subroutine forward(self, input) - !! Propagate forward the layer. + !! Propagate forward the layer for 2D or 3D input. !! Calling this subroutine updates the values of a few data components !! of `flatten_layer` that are needed for the backward pass. class(flatten_layer), intent(in out) :: self !! Dense layer instance - real, intent(in) :: input(:,:,:) + real, intent(in) :: input(..) !! Input from the previous layer end subroutine forward diff --git a/src/nf/nf_flatten_layer_submodule.f90 b/src/nf/nf_flatten_layer_submodule.f90 index d52e996d..55e99485 100644 --- a/src/nf/nf_flatten_layer_submodule.f90 +++ b/src/nf/nf_flatten_layer_submodule.f90 @@ -17,16 +17,30 @@ end function flatten_layer_cons pure module subroutine backward(self, input, gradient) class(flatten_layer), intent(in out) :: self - real, intent(in) :: input(:,:,:) + real, intent(in) :: input(..) real, intent(in) :: gradient(:) - self % gradient = reshape(gradient, shape(input)) + select rank(input) + rank(2) + self % gradient_2d = reshape(gradient, shape(input)) + rank(3) + self % gradient_3d = reshape(gradient, shape(input)) + rank default + error stop "Unsupported rank of input" + end select end subroutine backward pure module subroutine forward(self, input) class(flatten_layer), intent(in out) :: self - real, intent(in) :: input(:,:,:) - self % output = pack(input, .true.) + real, intent(in) :: input(..) + select rank(input) + rank(2) + self % output = pack(input, .true.) + rank(3) + self % output = pack(input, .true.) + rank default + error stop "Unsupported rank of input" + end select end subroutine forward @@ -37,8 +51,13 @@ module subroutine init(self, input_shape) self % input_shape = input_shape self % output_size = product(input_shape) - allocate(self % gradient(input_shape(1), input_shape(2), input_shape(3))) - self % gradient = 0 + if (size(input_shape) == 2) then + allocate(self % gradient_2d(input_shape(1), input_shape(2))) + self % gradient_2d = 0 + else if (size(input_shape) == 3) then + allocate(self % gradient_3d(input_shape(1), input_shape(2), input_shape(3))) + self % gradient_3d = 0 + end if allocate(self % output(self % output_size)) self % output = 0 diff --git a/src/nf/nf_layer_submodule.f90 b/src/nf/nf_layer_submodule.f90 index 41b9a2ce..ab8d5b5d 100644 --- a/src/nf/nf_layer_submodule.f90 +++ b/src/nf/nf_layer_submodule.f90 @@ -37,8 +37,10 @@ pure module subroutine backward_1d(self, previous, gradient) type is(flatten_layer) - ! Upstream layers permitted: input3d, conv2d, maxpool2d + ! Upstream layers permitted: input2d, input3d, conv2d, maxpool2d select type(prev_layer => previous % p) + type is(input2d_layer) + call this_layer % backward(prev_layer % output, gradient) type is(input3d_layer) call this_layer % backward(prev_layer % output, gradient) type is(conv2d_layer) @@ -168,8 +170,10 @@ pure module subroutine forward(self, input) type is(flatten_layer) - ! Upstream layers permitted: input3d, conv2d, maxpool2d, reshape3d + ! Upstream layers permitted: input2d, input3d, conv2d, maxpool2d, reshape3d select type(prev_layer => input % p) + type is(input2d_layer) + call this_layer % forward(prev_layer % output) type is(input3d_layer) call this_layer % forward(prev_layer % output) type is(conv2d_layer) diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 506c3295..e90d92d9 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -135,12 +135,20 @@ module subroutine backward(self, output, loss) select type(next_layer => self % layers(n + 1) % p) type is(dense_layer) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) + type is(conv2d_layer) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) + type is(flatten_layer) - call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) + if (size(self % layers(n) % layer_shape) == 2) then + call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient_2d) + else + call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient_3d) + end if + type is(maxpool2d_layer) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) + type is(reshape3d_layer) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) end select diff --git a/test/test_flatten_layer.f90 b/test/test_flatten_layer.f90 index e4e1d6e4..0dca4c1f 100644 --- a/test/test_flatten_layer.f90 +++ b/test/test_flatten_layer.f90 @@ -3,16 +3,18 @@ program test_flatten_layer use iso_fortran_env, only: stderr => error_unit use nf, only: dense, flatten, input, layer, network use nf_flatten_layer, only: flatten_layer + use nf_input2d_layer, only: input2d_layer use nf_input3d_layer, only: input3d_layer implicit none type(layer) :: test_layer, input_layer type(network) :: net - real, allocatable :: gradient(:,:,:) + real, allocatable :: gradient_3d(:,:,:), gradient_2d(:,:) real, allocatable :: output(:) logical :: ok = .true. + ! Test 3D input test_layer = flatten() if (.not. test_layer % name == 'flatten') then @@ -59,14 +61,49 @@ program test_flatten_layer call test_layer % backward(input_layer, real([1, 2, 3, 4])) select type(this_layer => test_layer % p); type is(flatten_layer) - gradient = this_layer % gradient + gradient_3d = this_layer % gradient_3d end select - if (.not. all(gradient == reshape(real([1, 2, 3, 4]), [1, 2, 2]))) then + if (.not. all(gradient_3d == reshape(real([1, 2, 3, 4]), [1, 2, 2]))) then ok = .false. write(stderr, '(a)') 'flatten layer correctly propagates backward.. failed' end if + ! Test 2D input + test_layer = flatten() + input_layer = input(2, 3) + call test_layer % init(input_layer) + + if (.not. all(test_layer % layer_shape == [6])) then + ok = .false. + write(stderr, '(a)') 'flatten layer has an incorrect output shape for 2D input.. failed' + end if + + ! Test forward pass - reshaping from 2-d to 1-d + select type(this_layer => input_layer % p); type is(input2d_layer) + call this_layer % set(reshape(real([1, 2, 3, 4, 5, 6]), [2, 3])) + end select + + call test_layer % forward(input_layer) + call test_layer % get_output(output) + + if (.not. all(output == [1, 2, 3, 4, 5, 6])) then + ok = .false. + write(stderr, '(a)') 'flatten layer correctly propagates forward for 2D input.. failed' + end if + + ! Test backward pass - reshaping from 1-d to 2-d + call test_layer % backward(input_layer, real([1, 2, 3, 4, 5, 6])) + + select type(this_layer => test_layer % p); type is(flatten_layer) + gradient_2d = this_layer % gradient_2d + end select + + if (.not. all(gradient_2d == reshape(real([1, 2, 3, 4, 5, 6]), [2, 3]))) then + ok = .false. + write(stderr, '(a)') 'flatten layer correctly propagates backward for 2D input.. failed' + end if + net = network([ & input(1, 28, 28), & flatten(), &