9#include <rmm/cuda_stream_pool.hpp>
10#include <rmm/device_uvector.hpp>
11#include <rmm/mr/device/pool_memory_resource.hpp>
12#include <thrust/reduce.h>
20 template <
typename ctype,
int d,
typename NT,
typename KERNEL,
typename... T>
22 const ctype *ang_quadrature_p,
const ctype *ang_quadrature_w,
23 const ctype *matsubara_quadrature_p,
const ctype *matsubara_quadrature_w,
24 const ctype x_extent,
const ctype m_T,
const ctype k, T... t)
26 uint len_x = gridDim.x * blockDim.x;
27 uint len_y = gridDim.y * blockDim.y;
28 uint idx_x = (blockIdx.x * blockDim.x) + threadIdx.x;
29 uint idx_y = (blockIdx.y * blockDim.y) + threadIdx.y;
30 uint idx_z = (blockIdx.z * blockDim.z) + threadIdx.z;
31 uint idx = idx_z * len_x * len_y + idx_y * len_x + idx_x;
33 const ctype q = k * sqrt(x_quadrature_p[idx_x] * x_extent);
34 const ctype cos = 2 * (ang_quadrature_p[idx_y] - (ctype)0.5);
37 const ctype x_weight = 2 * ang_quadrature_w[idx_y] * x_quadrature_w[idx_x] * x_extent;
38 const ctype int_element = S_dm1
41 /
powr<d - 1>(2 * (ctype)M_PI);
43 const ctype q0 = matsubara_quadrature_p[idx_z];
44 const ctype weight = x_weight * matsubara_quadrature_w[idx_z];
46 NT res = int_element * weight * (KERNEL::kernel(q, cos, q0, k, t...) + KERNEL::kernel(q, cos, -q0, k, t...));
47 if (m_T > 0 && idx_z == 0) res += int_element * x_weight * m_T * KERNEL::kernel(q, cos, (ctype)0, k, t...);
54 static_assert(d >= 3,
"dimension must be at least 3");
62 json.get_uint(
"/integration/cudathreadsperblock"))
88 uint optimize_dim = 2;
94 optimize_dim = (optimize_dim + 2) % 3;
117 if (
is_close(T,
m_T) && E != 0 && (std::abs(E -
m_E) / std::max(E,
m_E) < 2.5e-2))
return;
154 template <
typename... T> NT
get(
const ctype k,
const T &...t)
156 if (!
manual_E && (std::abs(k -
m_E) / std::max(k,
m_E) > 2.5e-2)) {
167 return KERNEL::constant(k, t...) + thrust::reduce(thrust::cuda::par.on(cuda_stream.value()), device_data.begin(),
168 device_data.end(), NT(0.), thrust::plus<NT>());
171 template <
typename... T> std::future<NT>
request(
const ctype k,
const T &...t)
173 if (!
manual_E && (std::abs(k -
m_E) / std::max(k,
m_E) > 2.5e-2)) {
179 std::shared_ptr<rmm::device_uvector<NT>> device_data =
185 const NT constant = KERNEL::constant(k, t...);
187 return std::async(std::launch::deferred, [=,
this]() {
188 return constant + thrust::reduce(thrust::cuda::par.on(cuda_stream.value()), (*device_data).begin(),
189 (*device_data).end(), NT(0.), thrust::plus<NT>());
216 using PoolMR = rmm::mr::pool_memory_resource<rmm::mr::device_memory_resource>;
228 template <
int d,
typename NT,
typename KERNEL>
class IntegratorAngleFiniteTq0GPU;
237 template <
int d,
typename NT,
typename KERNEL>
238 class IntegratorAngleFiniteTq0GPU :
public IntegratorAngleFiniteTq0TBB<d, NT, KERNEL>
Definition integrator_angle_finiteTq0_gpu.hh:53
const rmm::cuda_stream_pool cuda_stream_pool
Definition integrator_angle_finiteTq0_gpu.hh:218
QuadratureProvider & quadrature_provider
Definition integrator_angle_finiteTq0_gpu.hh:194
const ctype x_extent
Definition integrator_angle_finiteTq0_gpu.hh:208
ctype m_E
Definition integrator_angle_finiteTq0_gpu.hh:209
void reinit()
Definition integrator_angle_finiteTq0_gpu.hh:76
bool manual_E
Definition integrator_angle_finiteTq0_gpu.hh:210
const ctype * ptr_matsubara_quadrature_w
Definition integrator_angle_finiteTq0_gpu.hh:206
const ctype * ptr_matsubara_quadrature_p
Definition integrator_angle_finiteTq0_gpu.hh:205
IntegratorAngleFiniteTq0GPU(const IntegratorAngleFiniteTq0GPU &other)
Definition integrator_angle_finiteTq0_gpu.hh:139
IntegratorAngleFiniteTq0GPU(QuadratureProvider &quadrature_provider, const std::array< uint, 2 > _grid_sizes, const ctype x_extent, const ctype T, const uint max_block_size=256)
Definition integrator_angle_finiteTq0_gpu.hh:66
IntegratorAngleFiniteTq0GPU(QuadratureProvider &quadrature_provider, const std::array< uint, 2 > grid_sizes, const ctype x_extent, const JSONValue &json)
Definition integrator_angle_finiteTq0_gpu.hh:59
PoolMR pool
Definition integrator_angle_finiteTq0_gpu.hh:217
void set_E(const ctype E)
Set the typical energy scale of the integrator and recompute the Matsubara quadrature rule.
Definition integrator_angle_finiteTq0_gpu.hh:137
NT get(const ctype k, const T &...t)
Definition integrator_angle_finiteTq0_gpu.hh:154
uint device_data_size
Definition integrator_angle_finiteTq0_gpu.hh:199
rmm::mr::pool_memory_resource< rmm::mr::device_memory_resource > PoolMR
Definition integrator_angle_finiteTq0_gpu.hh:216
typename get_type::ctype< NT > ctype
Definition integrator_angle_finiteTq0_gpu.hh:57
ctype m_T
Definition integrator_angle_finiteTq0_gpu.hh:209
const ctype * ptr_ang_quadrature_w
Definition integrator_angle_finiteTq0_gpu.hh:204
std::array< uint, 3 > grid_sizes
Definition integrator_angle_finiteTq0_gpu.hh:196
dim3 threads_per_block
Definition integrator_angle_finiteTq0_gpu.hh:214
void set_T(const ctype T, const ctype E=0)
Set the temperature and typical energy scale of the integrator and recompute the Matsubara quadrature...
Definition integrator_angle_finiteTq0_gpu.hh:115
const ctype * ptr_ang_quadrature_p
Definition integrator_angle_finiteTq0_gpu.hh:203
dim3 num_blocks
Definition integrator_angle_finiteTq0_gpu.hh:213
const ctype * ptr_x_quadrature_w
Definition integrator_angle_finiteTq0_gpu.hh:202
std::future< NT > request(const ctype k, const T &...t)
Definition integrator_angle_finiteTq0_gpu.hh:171
const uint max_block_size
Definition integrator_angle_finiteTq0_gpu.hh:212
std::array< uint, 3 > block_sizes
Definition integrator_angle_finiteTq0_gpu.hh:197
const ctype * ptr_x_quadrature_p
Definition integrator_angle_finiteTq0_gpu.hh:201
A wrapper around the boost json value class.
Definition json.hh:19
A class that provides quadrature points and weights, in host and device memory. The quadrature points...
Definition quadrature_provider.hh:139
const NT * get_device_weights(const size_t order, const int device=0, const QuadratureType type=QuadratureType::legendre)
Get the device-side quadrature weights for a quadrature of size quadrature_size.
Definition quadrature_provider.hh:211
const NT * get_device_matsubara_points(const NT T, const NT typical_E, const int device=0)
Get the device-side quadrature points for a quadrature of size quadrature_size.
Definition quadrature_provider.hh:225
const std::vector< NT > & get_matsubara_points(const NT T, const NT typical_E)
Get the quadrature points for a quadrature of size quadrature_size.
Definition quadrature_provider.hh:174
const NT * get_device_points(const size_t order, const int device=0, const QuadratureType type=QuadratureType::legendre)
Get the device-side quadrature points for a quadrature of size quadrature_size.
Definition quadrature_provider.hh:198
const NT * get_device_matsubara_weights(const NT T, const NT typical_E, const int device=0)
Get the device-side quadrature weights for a quadrature of size quadrature_size.
Definition quadrature_provider.hh:237
typename internal::_ctype< CT >::value ctype
Definition types.hh:106
Definition complex_math.hh:14
constexpr __forceinline__ __host__ __device__ NumberType powr(const NumberType x)
A compile-time evaluatable power function for whole number exponents.
Definition math.hh:45
bool __forceinline__ __host__ __device__ is_close(T1 a, T2 b, T3 eps_)
Function to evaluate whether two floats are equal to numerical precision. Tests for both relative and...
Definition math.hh:160
consteval NT S_d_prec(uint d)
Surface of a d-dimensional sphere (precompiled)
Definition math.hh:104
void check_cuda(std::string prefix="")
Check if a CUDA error occurred and print an error message if it did.
__global__ void gridreduce_angle_finiteTq0(NT *dest, const ctype *x_quadrature_p, const ctype *x_quadrature_w, const ctype *ang_quadrature_p, const ctype *ang_quadrature_w, const ctype *matsubara_quadrature_p, const ctype *matsubara_quadrature_w, const ctype x_extent, const ctype m_T, const ctype k, T... t)
Definition integrator_angle_finiteTq0_gpu.hh:21
unsigned int uint
Definition utils.hh:22