24 template <
int dim,
typename NT,
typename KERNEL,
typename ExecutionSpace>
39 const std::array<ctype, dim> &grid_min,
const std::array<ctype, dim> &grid_max,
40 const std::array<QuadratureType, dim> &quadrature_type)
41 : quadrature_provider(quadrature_provider)
43 for (
size_t i = 0; i < dim; ++i) {
44 grid_size[i] = _grid_size[i];
46 nodes[i] = quadrature_provider.template nodes<ctype, typename ExecutionSpace::memory_space>(grid_size[i],
48 weights[i] = quadrature_provider.template weights<ctype, typename ExecutionSpace::memory_space>(
49 grid_size[i], quadrature_type[i]);
51 set_grid_extents(grid_min, grid_max);
54 void set_grid_extents(
const std::array<ctype, dim> &grid_min,
const std::array<ctype, dim> &grid_max)
56 for (
size_t i = 0; i < dim; ++i) {
57 grid_extents[0][i] = grid_min[i];
58 grid_extents[1][i] = grid_max[i];
60 grid_start[i] = grid_extents[0][i];
61 grid_scale[i] = (grid_extents[1][i] - grid_extents[0][i]);
65 template <
typename... T>
67 void get(NT &dest,
const T &...t)
const
72 if (!m_result_views_initialized) {
73 m_result_view = Kokkos::View<NT, typename ExecutionSpace::memory_space>(
"result");
74 m_result_host = Kokkos::create_mirror_view(m_result_view);
75 m_result_views_initialized =
true;
77 get(space, m_result_view, t...);
78 Kokkos::deep_copy(space, m_result_host, m_result_view);
80 dest = m_result_host();
83 template <
typename OT,
typename... T>
84 requires(!std::is_same_v<OT, NT> &&
is_valid_kernel<NT, KERNEL, ctype, dim, T...>)
85 void get(OT &dest,
const T &...t)
const
88 get(space, dest, t...);
91 template <
typename OT,
typename... T>
92 requires(!std::is_same_v<OT, NT> &&
is_valid_kernel<NT, KERNEL, ctype, dim, T...>)
93 void get(ExecutionSpace &space, OT &dest,
const T &...t)
const
95 const auto args = device::make_tuple(t...);
97 const auto &n = nodes;
98 const auto &w = weights;
99 const auto &start = grid_start;
100 const auto &scale = grid_scale;
106 bool is_first =
true;
107 for (
size_t i = 0; i < dim; ++i) {
108 x[i] = Kokkos::fma(scale[i], n[i][idx[i]], start[i]);
109 weight *= w[i][idx[i]] * scale[i];
110 is_first &= idx[i] == 0;
112 device::apply([&](
const auto &...iargs) { update += weight * KERNEL::kernel(iargs...); },
113 device::tuple_cat(x, args));
114 device::apply([&](
const auto &...iargs) { update += is_first ? KERNEL::constant(iargs...) : NT(0); }, args);
117 Kokkos::parallel_reduce(
"QuadratureIntegral_" + std::to_string(dim) +
"D",
122 template <
typename view_type,
typename Coordinates,
typename... Args>
123 void map(ExecutionSpace &space,
const view_type integral_view,
const Coordinates &coordinates,
const Args &...args)
126 extents[0] = integral_view.size();
127 for (
int i = 0; i < dim; ++i)
128 extents[1 + i] = grid_size[i];
132 bool needs_realloc =
false;
133 for (
size_t i = 0; i < 1 + dim; ++i)
134 needs_realloc |= (extents[i] > m_cache_extents[i]);
136 for (
size_t i = 0; i < 1 + dim; ++i)
137 m_cache_extents[i] = std::max(m_cache_extents[i], extents[i]);
144 const auto m_args = device::make_tuple(args...);
146 const auto &n = nodes;
147 const auto &w = weights;
148 const auto &start = grid_start;
149 const auto &scale = grid_scale;
154 auto subview = device::apply([&](
const auto &...i) {
return Kokkos::subview(cache, i...); }, idx);
157 const auto idx_v = coordinates.from_linear_index(idx[0]);
158 const auto pos = coordinates.forward(idx_v);
162 for (
int i = 0; i < dim; ++i) {
163 x[i] = Kokkos::fma(scale[i], n[i][idx[1 + i]], start[i]);
164 weight *= w[i][idx[1 + i]] * scale[i];
168 const auto full_args = device::tuple_cat(x, pos, m_args);
170 device::apply([&](
const auto &...iargs) { subview() = weight * KERNEL::kernel(iargs...); }, full_args);
176 using TeamType = Kokkos::TeamPolicy<ExecutionSpace>::member_type;
178 constexpr int vector_width = 32;
179 Kokkos::parallel_for(
180 Kokkos::TeamPolicy(space, integral_view.size(), Kokkos::AUTO, vector_width),
181 KOKKOS_CLASS_LAMBDA(
const TeamType &team) {
183 const uint k = team.league_rank();
185 if (k > integral_view.size())
return;
192 size_t total_elements = 1;
193 for (
int d = 0; d < dim; ++d)
194 total_elements *= grid_size[d];
198 strides[dim - 1] = 1;
199 for (
int d = dim - 2; d >= 0; --d)
200 strides[d] = strides[d + 1] * grid_size[d + 1];
203 Kokkos::parallel_reduce(
204 Kokkos::TeamThreadRange(team, (total_elements + vector_width - 1) / vector_width),
205 [&](
const size_t outer, NT &team_update) {
207 Kokkos::parallel_reduce(
208 Kokkos::ThreadVectorRange(team, vector_width),
209 [&](
const size_t inner, NT &vec_update) {
210 const size_t flat = outer * vector_width + inner;
211 if (flat < total_elements) {
214 size_t remainder = flat;
215 for (
int d = 0; d < dim; ++d) {
216 ridx[d] = remainder / strides[d];
217 remainder -= ridx[d] * strides[d];
219 device::apply([&](
const auto &...iargs) { vec_update += cache(k, iargs...); }, ridx);
223 team_update += vec_sum;
228 Kokkos::single(Kokkos::PerTeam(team), [&]() {
229 const auto idx = coordinates.from_linear_index(k);
230 const auto pos = coordinates.forward(idx);
231 const auto full_args = device::tuple_cat(pos, m_args);
233 res + device::apply([&](
const auto &...iargs) {
return KERNEL::constant(iargs...); }, full_args);
238 template <
typename Coordinates,
typename... Args>
239 auto map(NT *dest,
const Coordinates &coordinates,
const Args &...args)
243 if (node_distribution.mpi_comm != MPI_COMM_NULL && node_distribution.total_size > 0) {
244 auto mpi_comm = node_distribution.mpi_comm;
245 const auto &nodes = node_distribution.nodes;
246 const auto &sizes = node_distribution.sizes;
251 if (std::find(nodes.begin(), nodes.end(), m_rank) == nodes.end())
return ExecutionSpace();
254 const size_t rank_size = sizes[m_rank];
256 const size_t offset = std::accumulate(sizes.begin(), sizes.begin() + m_rank, 0);
259 const auto sub_coordinates =
SubCoordinates(coordinates, offset, rank_size);
261 NT *dest_offset = dest + offset;
263 return map_dist(dest_offset, sub_coordinates, args...);
266 return map_dist(dest, coordinates, args...);
269 template <
typename Coordinates,
typename... Args>
270 auto map_dist(NT *dest,
const Coordinates &coordinates,
const Args &...args)
273 auto dest_view = Kokkos::View<NT *, CPU_memory, Kokkos::MemoryUnmanaged>(dest, coordinates.size());
276 if (m_dest_device_size < coordinates.size()) {
277 m_dest_device = Kokkos::View<NT *, ExecutionSpace>(Kokkos::view_alloc(space,
"MapIntegrators_device_view"),
279 m_dest_device_size = coordinates.size();
281 auto dest_device_view =
282 Kokkos::View<NT *, ExecutionSpace>(m_dest_device, Kokkos::make_pair(
size_t(0), coordinates.size()));
285 map(space, dest_device_view, coordinates, args...);
288 Kokkos::deep_copy(space, dest_view, dest_device_view);
309 mutable size_t m_dest_device_size = 0;
310 mutable Kokkos::View<NT, typename ExecutionSpace::memory_space>
m_result_view;
311 mutable typename Kokkos::View<NT, typename ExecutionSpace::memory_space>::host_mirror_type
m_result_host;
312 mutable bool m_result_views_initialized =
false;
315 template <
int dim,
typename NT,
typename KERNEL>
329 std::array<ctype, dim> grid_min, std::array<ctype, dim> grid_max,
330 const std::array<QuadratureType, dim> quadrature_type)
335 template <
typename... T>
337 void get(NT &dest,
const T &...t)
const
339 const auto args = device::tie(t...);
341 const auto &n =
nodes;
349 bool is_first =
true;
350 for (
size_t i = 0; i < dim; ++i) {
351 x[i] = Kokkos::fma(scale[i], n[i][idx[i]], start[i]);
352 weight *= w[i][idx[i]] * scale[i];
353 is_first &= idx[i] == 0;
355 return device::apply([&](
const auto &...iargs) {
return weight * KERNEL::kernel(iargs...); },
356 device::tuple_cat(x, args));
362 template <
typename Coordinates,
typename... Args>
365 const auto m_args = device::tie(args...);
367 tbb::parallel_for(tbb::blocked_range<uint>(0, coordinates.size()), [&](
const tbb::blocked_range<uint> &r) {
368 for (uint idx = r.begin(); idx != r.end(); ++idx) {
369 const auto dis_idx = coordinates.from_linear_index(idx);
370 const auto pos = coordinates.forward(dis_idx);
372 const auto full_args = device::tuple_cat(pos, m_args);
373 device::apply([&](const auto &...iargs) { get(dest[idx], iargs...); }, full_args);
378 template <
typename Coordinates,
typename... Args>
379 auto map(NT *dest,
const Coordinates &coordinates,
const Args &...args)
385 if (node_distribution.mpi_comm != MPI_COMM_NULL && node_distribution.total_size > 0) {
386 auto mpi_comm = node_distribution.mpi_comm;
387 const auto &nodes = node_distribution.nodes;
388 const auto &sizes = node_distribution.sizes;
393 if (std::find(nodes.begin(), nodes.end(), m_rank) == nodes.end())
return execution_space();
396 const size_t rank_size = sizes[m_rank];
398 const size_t offset = std::accumulate(sizes.begin(), sizes.begin() + m_rank, 0);
401 const auto sub_coordinates =
SubCoordinates(coordinates, offset, rank_size);
403 NT *dest_offset = dest + offset;
405 map(space, dest_offset, sub_coordinates, args...);
407 map(space, dest, coordinates, args...);
412 using Base::grid_extents;
413 using Base::grid_scale;
414 using Base::grid_size;
415 using Base::grid_start;
416 using Base::quadrature_provider;
Definition abstract_integrator.hh:64
NodeDistribution node_distribution
Definition abstract_integrator.hh:73
auto map(NT *dest, const Coordinates &coordinates, const Args &...args)
Definition quadrature_integrator.hh:379
void get(NT &dest, const T &...t) const
Definition quadrature_integrator.hh:337
typename get_type::ctype< NT > ctype
Numerical type to be used for integration tasks e.g. the argument or possible jacobians.
Definition quadrature_integrator.hh:325
void map(execution_space &, NT *dest, const Coordinates &coordinates, const Args &...args)
Definition quadrature_integrator.hh:363
QuadratureIntegrator(QuadratureProvider &quadrature_provider, const std::array< size_t, dim > _grid_size, std::array< ctype, dim > grid_min, std::array< ctype, dim > grid_max, const std::array< QuadratureType, dim > quadrature_type)
Definition quadrature_integrator.hh:328
This class performs numerical integration over a d-dimensional hypercube using quadrature rules.
Definition quadrature_integrator.hh:27
Kokkos::View< NT, typename ExecutionSpace::memory_space > m_result_view
Definition quadrature_integrator.hh:310
ExecutionSpace execution_space
Execution space to be used for the integration, e.g. GPU_exec, TBB_exec.
Definition quadrature_integrator.hh:36
Kokkos::View< NT *, ExecutionSpace > m_dest_device
Definition quadrature_integrator.hh:308
void get(NT &dest, const T &...t) const
Definition quadrature_integrator.hh:67
void set_grid_extents(const std::array< ctype, dim > &grid_min, const std::array< ctype, dim > &grid_max)
Definition quadrature_integrator.hh:54
device::array< device::array< ctype, dim >, 2 > grid_extents
Definition quadrature_integrator.hh:298
QuadratureIntegrator(QuadratureProvider &quadrature_provider, const std::array< size_t, dim > &_grid_size, const std::array< ctype, dim > &grid_min, const std::array< ctype, dim > &grid_max, const std::array< QuadratureType, dim > &quadrature_type)
Definition quadrature_integrator.hh:38
Kokkos::View< NT, typenameExecutionSpace::memory_space >::host_mirror_type m_result_host
Definition quadrature_integrator.hh:311
ExecutionSpace space
Definition quadrature_integrator.hh:296
void map(ExecutionSpace &space, const view_type integral_view, const Coordinates &coordinates, const Args &...args)
Definition quadrature_integrator.hh:123
typename get_type::ctype< NT > ctype
Numerical type to be used for integration tasks e.g. the argument or possible jacobians.
Definition quadrature_integrator.hh:32
KokkosNDView< 1+dim, NT, ExecutionSpace > m_cache
Definition quadrature_integrator.hh:306
device::array< Kokkos::View< const ctype *, typename ExecutionSpace::memory_space >, dim > nodes
Definition quadrature_integrator.hh:302
device::array< Kokkos::View< const ctype *, typename ExecutionSpace::memory_space >, dim > weights
Definition quadrature_integrator.hh:303
device::array< ctype, dim > grid_scale
Definition quadrature_integrator.hh:300
QuadratureProvider & quadrature_provider
Definition quadrature_integrator.hh:297
device::array< ctype, dim > grid_start
Definition quadrature_integrator.hh:299
device::array< size_t, dim > grid_size
Definition quadrature_integrator.hh:294
auto map_dist(NT *dest, const Coordinates &coordinates, const Args &...args)
Definition quadrature_integrator.hh:270
auto map(NT *dest, const Coordinates &coordinates, const Args &...args)
Definition quadrature_integrator.hh:239
void get(OT &dest, const T &...t) const
Definition quadrature_integrator.hh:85
void get(ExecutionSpace &space, OT &dest, const T &...t) const
Definition quadrature_integrator.hh:93
A class that provides quadrature points and weights, in host and device memory. The quadrature points...
Definition quadrature_provider.hh:137
Definition coordinates.hh:149
Definition abstract_integrator.hh:51
std::array< T, N > array
Definition kokkos.hh:133
typename internal::_ctype< CT >::value ctype
Definition types.hh:134
Definition complex_math.hh:10
Kokkos::View< typename GetKokkosNDStarType< dim, T >::type, ExecutionSpace > KokkosNDView
Definition kokkos.hh:160
auto make_kokkos_nd_range(ExecutionSpace &space, const device::array< size_t, dim > start, const device::array< size_t, dim > end)
Definition kokkos.hh:227
Kokkos::View< typename GetKokkosNDStarType< dim, T >::type, ExecutionSpace, Kokkos::MemoryTraits< Kokkos::Restrict > > KokkosNDViewRestrict
Definition kokkos.hh:165
constexpr auto & get(named_tuple< tuple_type, strSet > &ob)
get a reference to the element with the given name
Definition tuples.hh:111
unsigned int uint
Definition utils.hh:24
NT TBBReduction(const device::array< size_t, dim > &grid_size, const FUN &functor)
Definition tbb.hh:91
auto make_kokkos_nd_view(const std::string &label, const device::array< size_t, dim > &extents)
Definition kokkos.hh:181
ExecutionSpaces::TBB_exec_space TBB_exec
Definition kokkos.hh:49
This is a functor which wraps a lambda for reduction. Basically, this is necessary when one wants to ...
Definition kokkos.hh:314
This is a functor which wraps a lambda. Basically, this is necessary when one wants to call a variadi...
Definition kokkos.hh:288
This execution space is optimal when used in conjunction with the FE discretizations.
Definition kokkos.hh:23