DiFfRG
Loading...
Searching...
No Matches
integrator_angle_gpu.hh
Go to the documentation of this file.
1#pragma once
2
3#ifdef __CUDACC__
4
5// standard library
6#include <future>
7
8// external libraries
9#include <rmm/cuda_stream_pool.hpp>
10#include <rmm/device_uvector.hpp>
11#include <rmm/mr/device/pool_memory_resource.hpp>
12#include <thrust/reduce.h>
13
14// DiFfRG
17
18namespace DiFfRG
19{
20 template <typename ctype, int d, typename NT, typename KERNEL, typename... T>
21 __global__ void gridreduce_angle(NT *dest, const ctype *x_quadrature_p, const ctype *x_quadrature_w,
22 const ctype *ang_quadrature_p, const ctype *ang_quadrature_w, const ctype x_extent,
23 const ctype k, T... t)
24 {
25 uint len_x = gridDim.x * blockDim.x;
26 uint idx_x = (blockIdx.x * blockDim.x) + threadIdx.x;
27 uint idx_y = (blockIdx.y * blockDim.y) + threadIdx.y;
28 uint idx = idx_y * len_x + idx_x;
29
30 const ctype cos = 2 * (ang_quadrature_p[idx_y] - (ctype)0.5);
31 const ctype weight = 2 * ang_quadrature_w[idx_y] * x_quadrature_w[idx_x] * x_extent;
32 const ctype q = k * sqrt(x_quadrature_p[idx_x] * x_extent);
33 constexpr ctype S_d = S_d_prec<ctype>(d);
34 const ctype int_element = S_d // solid nd angle
35 * (powr<d - 2>(q) / (ctype)2 * powr<2>(k)) // x = p^2 / k^2 integral
36 * (1 / (ctype)2) // divide the cos integral out
37 / powr<d>(2 * (ctype)M_PI); // fourier factor
38
39 dest[idx] = int_element * weight * KERNEL::kernel(q, cos, k, t...);
40 }
41
52 template <int d, typename NT, typename KERNEL> class IntegratorAngleGPU
53 {
54 static_assert(d > 1, "IntegratorAngleGPU: d must be greater than 1, otherwise angles are not needed");
55
56 public:
60 using ctype = typename get_type::ctype<NT>;
61
62 IntegratorAngleGPU(QuadratureProvider &quadrature_provider, const std::array<uint, 2> grid_sizes,
63 const ctype x_extent, const JSONValue &json)
64 : IntegratorAngleGPU(quadrature_provider, grid_sizes, x_extent,
65 json.get_uint("/integration/cudathreadsperblock"))
66 {
67 }
68
69 IntegratorAngleGPU(QuadratureProvider &quadrature_provider, std::array<uint, 2> grid_sizes, const ctype x_extent,
70 const uint max_block_size = 256)
72 pool(rmm::mr::get_current_device_resource(), (device_data_size / 256 + 1) * 256)
73 {
74 ptr_x_quadrature_p = quadrature_provider.get_device_points<ctype>(grid_sizes[0]);
75 ptr_x_quadrature_w = quadrature_provider.get_device_weights<ctype>(grid_sizes[0]);
76 ptr_ang_quadrature_p = quadrature_provider.get_device_points<ctype>(grid_sizes[1]);
78
79 block_sizes = {max_block_size, max_block_size};
80 // choose block sizes such that the size is both as close to max_block_size as possible and the individual sizes
81 // are as close to each other as possible
82 uint optimize_dim = 1;
83 while (block_sizes[0] * block_sizes[1] > max_block_size || block_sizes[0] > grid_sizes[0] ||
84 block_sizes[1] > grid_sizes[1]) {
85 if (block_sizes[optimize_dim] > 1) block_sizes[optimize_dim]--;
86 while (grid_sizes[optimize_dim] % block_sizes[optimize_dim] != 0)
87 block_sizes[optimize_dim]--;
88 optimize_dim = (optimize_dim + 1) % 2;
89 }
90
91 uint blocks1 = grid_sizes[0] / block_sizes[0];
92 uint threads1 = block_sizes[0];
93 uint blocks2 = grid_sizes[1] / block_sizes[1];
94 uint threads2 = block_sizes[1];
95
96 num_blocks = dim3(blocks1, blocks2);
97 threads_per_block = dim3(threads1, threads2);
98 }
99
111
122 template <typename... T> NT get(const ctype k, const T &...t) const
123 {
124 const auto cuda_stream = cuda_stream_pool.get_stream();
125 rmm::device_uvector<NT> device_data(device_data_size, cuda_stream.value(), &pool);
128 x_extent, k, t...);
129 check_cuda();
130 return KERNEL::constant(k, t...) + thrust::reduce(thrust::cuda::par.on(cuda_stream.value()), device_data.begin(),
131 device_data.end(), NT(0.), thrust::plus<NT>());
132 }
133
144 template <typename... T> std::future<NT> request(const ctype k, const T &...t) const
145 {
146 const auto cuda_stream = cuda_stream_pool.get_stream();
147 std::shared_ptr<rmm::device_uvector<NT>> device_data =
148 std::make_shared<rmm::device_uvector<NT>>(device_data_size, cuda_stream, &pool);
151 x_extent, k, t...);
152 check_cuda();
153 const NT constant = KERNEL::constant(k, t...);
154
155 return std::async(std::launch::deferred, [=, this]() {
156 return constant + thrust::reduce(thrust::cuda::par.on(cuda_stream.value()), (*device_data).begin(),
157 (*device_data).end(), NT(0.), thrust::plus<NT>());
158 });
159 }
160
161 private:
162 const std::array<uint, 2> grid_sizes;
163 std::array<uint, 2> block_sizes;
164
166
171
173
176
177 using PoolMR = rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>;
178 mutable PoolMR pool;
179 const rmm::cuda_stream_pool cuda_stream_pool;
180 };
181} // namespace DiFfRG
182
183#else
184
185#ifdef USE_CUDA
186
187namespace DiFfRG
188{
189 template <uint d, typename NT, typename KERNEL> class IntegratorAngleGPU;
190}
191
192#else
193
195
196namespace DiFfRG
197{
198 template <uint d, typename NT, typename KERNEL> class IntegratorAngleGPU : public IntegratorAngleTBB<d, NT, KERNEL>
199 {
200 public:
201 using ctype = typename get_type::ctype<NT>;
202
203 IntegratorAngleGPU(QuadratureProvider &quadrature_provider, std::array<uint, 2> grid_sizes, const ctype x_extent,
204 const uint max_block_size = 256)
205 : IntegratorAngleTBB<d, NT, KERNEL>(quadrature_provider, grid_sizes, x_extent)
206 {
207 }
208
209 IntegratorAngleGPU(QuadratureProvider &quadrature_provider, const std::array<uint, 2> grid_sizes,
210 const ctype x_extent, const JSONValue &)
211 : IntegratorAngleTBB<d, NT, KERNEL>(quadrature_provider, grid_sizes, x_extent)
212 {
213 }
214 };
215} // namespace DiFfRG
216
217#endif
218
219#endif
GPU integrator for the integration of a function with one angle with CUDA. Calculates.
Definition integrator_angle_gpu.hh:53
const ctype * ptr_ang_quadrature_w
Definition integrator_angle_gpu.hh:170
std::future< NT > request(const ctype k, const T &...t) const
Request a future for the integral of the kernel.
Definition integrator_angle_gpu.hh:144
rmm::mr::pool_memory_resource< rmm::mr::device_memory_resource > PoolMR
Definition integrator_angle_gpu.hh:177
NT get(const ctype k, const T &...t) const
Get the integral of the kernel.
Definition integrator_angle_gpu.hh:122
const ctype x_extent
Definition integrator_angle_gpu.hh:172
IntegratorAngleGPU(QuadratureProvider &quadrature_provider, std::array< uint, 2 > grid_sizes, const ctype x_extent, const uint max_block_size=256)
Definition integrator_angle_gpu.hh:69
const rmm::cuda_stream_pool cuda_stream_pool
Definition integrator_angle_gpu.hh:179
dim3 threads_per_block
Definition integrator_angle_gpu.hh:175
const uint device_data_size
Definition integrator_angle_gpu.hh:165
PoolMR pool
Definition integrator_angle_gpu.hh:178
const ctype * ptr_x_quadrature_w
Definition integrator_angle_gpu.hh:168
const ctype * ptr_ang_quadrature_p
Definition integrator_angle_gpu.hh:169
dim3 num_blocks
Definition integrator_angle_gpu.hh:174
const std::array< uint, 2 > grid_sizes
Definition integrator_angle_gpu.hh:162
const ctype * ptr_x_quadrature_p
Definition integrator_angle_gpu.hh:167
std::array< uint, 2 > block_sizes
Definition integrator_angle_gpu.hh:163
typename get_type::ctype< NT > ctype
Numerical type to be used for integration tasks e.g. the argument or possible jacobians.
Definition integrator_angle_gpu.hh:60
IntegratorAngleGPU(const IntegratorAngleGPU &other)
Definition integrator_angle_gpu.hh:100
IntegratorAngleGPU(QuadratureProvider &quadrature_provider, const std::array< uint, 2 > grid_sizes, const ctype x_extent, const JSONValue &json)
Definition integrator_angle_gpu.hh:62
A wrapper around the boost json value class.
Definition json.hh:19
A class that provides quadrature points and weights, in host and device memory. The quadrature points...
Definition quadrature_provider.hh:139
const NT * get_device_weights(const size_t order, const int device=0, const QuadratureType type=QuadratureType::legendre)
Get the device-side quadrature weights for a quadrature of size quadrature_size.
Definition quadrature_provider.hh:211
const NT * get_device_points(const size_t order, const int device=0, const QuadratureType type=QuadratureType::legendre)
Get the device-side quadrature points for a quadrature of size quadrature_size.
Definition quadrature_provider.hh:198
typename internal::_ctype< CT >::value ctype
Definition types.hh:106
Definition complex_math.hh:14
constexpr __forceinline__ __host__ __device__ NumberType powr(const NumberType x)
A compile-time evaluatable power function for whole number exponents.
Definition math.hh:45
constexpr __forceinline__ __host__ __device__ double S_d(NT d)
Surface of a d-dimensional sphere.
Definition math.hh:91
consteval NT S_d_prec(uint d)
Surface of a d-dimensional sphere (precompiled)
Definition math.hh:104
void check_cuda(std::string prefix="")
Check if a CUDA error occurred and print an error message if it did.
unsigned int uint
Definition utils.hh:22
__global__ void gridreduce_angle(NT *dest, const ctype *x_quadrature_p, const ctype *x_quadrature_w, const ctype *ang_quadrature_p, const ctype *ang_quadrature_w, const ctype x_extent, const ctype k, T... t)
Definition integrator_angle_gpu.hh:21