9#include <rmm/cuda_stream_pool.hpp>
10#include <rmm/device_uvector.hpp>
11#include <rmm/mr/device/pool_memory_resource.hpp>
12#include <thrust/reduce.h>
20 template <
typename ctype,
typename NT,
typename KERNEL,
typename... T>
21 __global__
void gridreduce_4d(NT *dest,
const ctype *x_quadrature_p,
const ctype *x_quadrature_w,
22 const ctype *ang_quadrature_p,
const ctype *ang_quadrature_w,
const ctype x_extent,
23 const ctype k, T... t)
25 const uint len_x = gridDim.x * blockDim.x;
26 const uint len_y = gridDim.y * blockDim.y;
27 const uint idx_x = (blockIdx.x * blockDim.x) + threadIdx.x;
28 const uint idx_y = (blockIdx.y * blockDim.y) + threadIdx.y;
29 const uint idx_z = (blockIdx.z * blockDim.z) + threadIdx.z;
30 const uint idx = idx_z * len_x * len_y + idx_y * len_x + idx_x;
32 const ctype q = k * sqrt(x_quadrature_p[idx_x] * x_extent);
33 const ctype cos1 = 2 * (ang_quadrature_p[idx_y] - (ctype)0.5);
34 const ctype phi = 2 * (ctype)M_PI * ang_quadrature_p[idx_z];
38 const ctype weight = 2 * (ctype)M_PI * ang_quadrature_w[idx_z]
39 * 2 * ang_quadrature_w[idx_y]
40 * x_quadrature_w[idx_x] * x_extent;
42 for (
uint idx_int = 0; idx_int < len_y; ++idx_int) {
43 const ctype cos2 = 2 * (ang_quadrature_p[idx_int] - (ctype)0.5);
44 mres += KERNEL::kernel(q, cos1, cos2, phi, k, t...)
45 * int_element * weight
46 * 2 * ang_quadrature_w[idx_int];
64 using PoolMR = rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>;
79 const uint max_block_size = 256)
84 if (
n_devices == 0)
throw std::runtime_error(
"No CUDA devices found!");
86 for (
int device = 0; device <
n_devices; ++device) {
87 const rmm::cuda_device_id device_id(device);
89 std::make_shared<PoolMR>(rmm::mr::get_per_device_resource(device_id), (
device_data_size / 256 + 1) * 256));
93 throw std::runtime_error(
"Grid sizes must be currently equal for all angular dimensions!");
95 block_sizes = {max_block_size, max_block_size, max_block_size};
98 uint optimize_dim = 2;
104 optimize_dim = (optimize_dim + 2) % 3;
142 template <
typename... T> NT
get(
const ctype k,
const T &...t)
const
144 const int m_device = 0;
158 return KERNEL::constant(k, t...) + thrust::reduce(thrust::cuda::par.on(cuda_stream.value()), device_data.begin(),
159 device_data.end(), NT(0.), thrust::plus<NT>());
174 template <
typename... T> std::future<NT>
request(
const ctype k,
const T &...t)
const
176 const int m_device = 0;
185 std::shared_ptr<rmm::device_uvector<NT>> device_data =
191 const NT constant = KERNEL::constant(k, t...);
193 return std::async(std::launch::deferred, [=,
this]() {
196 return constant + thrust::reduce(thrust::cuda::par.on(cuda_stream.value()), (*device_data).begin(),
197 (*device_data).end(), NT(0.), thrust::plus<NT>());
221 mutable std::vector<std::shared_ptr<PoolMR>>
pool;
234 template <
typename NT,
typename KERNEL>
class Integrator4DGPU;
243 template <
typename NT,
typename KERNEL>
class Integrator4DGPU :
public Integrator4DTBB<NT, KERNEL>
249 const uint max_block_size = 256)
GPU integrator for the integration of a 4D function with three angles with CUDA. Calculates.
Definition integrator_4D_gpu.hh:63
rmm::mr::pool_memory_resource< rmm::mr::device_memory_resource > PoolMR
Definition integrator_4D_gpu.hh:64
const rmm::cuda_stream_pool cuda_stream_pool
Definition integrator_4D_gpu.hh:222
const ctype x_extent
Definition integrator_4D_gpu.hh:215
Integrator4DGPU(QuadratureProvider &quadrature_provider, std::array< uint, 4 > grid_sizes, const ctype x_extent, const uint max_block_size=256)
Definition integrator_4D_gpu.hh:78
Integrator4DGPU(QuadratureProvider &quadrature_provider, const std::array< uint, 4 > grid_sizes, const ctype x_extent, const JSONValue &json)
Definition integrator_4D_gpu.hh:72
dim3 threads_per_block
Definition integrator_4D_gpu.hh:218
const ctype * ptr_x_quadrature_w
Definition integrator_4D_gpu.hh:211
const ctype * ptr_ang_quadrature_w
Definition integrator_4D_gpu.hh:213
dim3 num_blocks
Definition integrator_4D_gpu.hh:217
std::atomic_ullong evaluations
Definition integrator_4D_gpu.hh:224
const std::array< uint, 4 > grid_sizes
Definition integrator_4D_gpu.hh:205
int n_devices
Definition integrator_4D_gpu.hh:220
typename get_type::ctype< NT > ctype
Numerical type to be used for integration tasks e.g. the argument or possible jacobians.
Definition integrator_4D_gpu.hh:70
const ctype * ptr_ang_quadrature_p
Definition integrator_4D_gpu.hh:212
Integrator4DGPU(const Integrator4DGPU &other)
Definition integrator_4D_gpu.hh:120
QuadratureProvider & quadrature_provider
Definition integrator_4D_gpu.hh:204
const ctype * ptr_x_quadrature_p
Definition integrator_4D_gpu.hh:210
std::future< NT > request(const ctype k, const T &...t) const
Request a future for the integral of the kernel.
Definition integrator_4D_gpu.hh:174
std::array< uint, 3 > block_sizes
Definition integrator_4D_gpu.hh:206
NT get(const ctype k, const T &...t) const
Get the integral of the kernel.
Definition integrator_4D_gpu.hh:142
const uint device_data_size
Definition integrator_4D_gpu.hh:208
std::vector< std::shared_ptr< PoolMR > > pool
Definition integrator_4D_gpu.hh:221
A wrapper around the boost json value class.
Definition json.hh:19
A class that provides quadrature points and weights, in host and device memory. The quadrature points...
Definition quadrature_provider.hh:139
const NT * get_device_weights(const size_t order, const int device=0, const QuadratureType type=QuadratureType::legendre)
Get the device-side quadrature weights for a quadrature of size quadrature_size.
Definition quadrature_provider.hh:211
const NT * get_device_points(const size_t order, const int device=0, const QuadratureType type=QuadratureType::legendre)
Get the device-side quadrature points for a quadrature of size quadrature_size.
Definition quadrature_provider.hh:198
typename internal::_ctype< CT >::value ctype
Definition types.hh:106
Definition complex_math.hh:14
constexpr __forceinline__ __host__ __device__ NumberType powr(const NumberType x)
A compile-time evaluatable power function for whole number exponents.
Definition math.hh:45
void check_cuda(std::string prefix="")
Check if a CUDA error occurred and print an error message if it did.
unsigned int uint
Definition utils.hh:22
__global__ void gridreduce_4d(NT *dest, const ctype *x_quadrature_p, const ctype *x_quadrature_w, const ctype *ang_quadrature_p, const ctype *ang_quadrature_w, const ctype x_extent, const ctype k, T... t)
Definition integrator_4D_gpu.hh:21