/home/runner/work/DiFfRG_current/DiFfRG_current/DiFfRG/include/DiFfRG/common/kokkos.hh Source File#

DiFfRG: /home/runner/work/DiFfRG_current/DiFfRG_current/DiFfRG/include/DiFfRG/common/kokkos.hh Source File
DiFfRG
kokkos.hh
Go to the documentation of this file.
1#pragma once
2
4#include <Kokkos_Core.hpp>
5#include <type_traits>
6
7#ifdef KOKKOS_ENABLE_CUDA
8#include <cuda/std/array>
9#include <cuda/std/tuple>
10#include <cuda/std/utility>
11#else
12#include <array>
13#include <tuple>
14#include <utility>
15#endif
16
17namespace DiFfRG
18{
24 using memory_space = Kokkos::DefaultHostExecutionSpace::memory_space;
25 void fence() const {}
26 };
27
29 {
30 public:
31 using GPU_exec_space = Kokkos::DefaultExecutionSpace;
32 using GPU_memory_space = GPU_exec_space::memory_space;
33
34 using Threads_exec_space = Kokkos::DefaultHostExecutionSpace;
35 using Threads_memory_space = Threads_exec_space::memory_space;
36
39 };
40
44
45 using CPU_memory = Kokkos::DefaultHostExecutionSpace::memory_space;
46
50
51 // Ensure that CPU memory space is the same as Threads memory space and TBB memory space
52 // We assume that this is true, and when switching to a different memory space, it is always unique.
53 static_assert(std::is_same_v<CPU_memory, Threads_memory>,
54 "CPU memory space must be the same as Threads memory space");
55 static_assert(std::is_same_v<CPU_memory, TBB_memory>, "CPU memory space must be the same as TBB memory space");
56
57 template <typename MemorySpace>
58 using other_memory_space_t = std::conditional_t<std::is_same_v<MemorySpace, GPU_memory>, CPU_memory, GPU_memory>;
59
66 template <class Scalar, class SavedScalar, class Space> struct SumPlus {
67 public:
68 // Required
70 using value_type = std::remove_cv_t<Scalar>;
71 using saved_type = std::remove_cv_t<SavedScalar>;
72 static_assert(!std::is_pointer_v<value_type> && !std::is_array_v<value_type>);
73
74 using result_view_type = Kokkos::View<value_type, Space>;
75
76 private:
79
81
82 public:
83 KOKKOS_INLINE_FUNCTION
84 SumPlus(value_type &value_, const saved_type &plus_value_)
85 : value(&value_), references_scalar_v(true), plus_value(plus_value_)
86 {
87 }
88
89 KOKKOS_INLINE_FUNCTION
90 SumPlus(const result_view_type &value_, const saved_type &plus_value_)
91 : value(value_), references_scalar_v(false), plus_value(plus_value_)
92 {
93 }
94
95 // Required
96 KOKKOS_INLINE_FUNCTION
97 void join(value_type &dest, const value_type &src) const { dest += src; }
98
99 KOKKOS_INLINE_FUNCTION
100 void init(value_type &val) const { val = Kokkos::reduction_identity<value_type>::sum(); }
101
102 KOKKOS_INLINE_FUNCTION
103 value_type &reference() const { return *value.data(); }
104
105 KOKKOS_INLINE_FUNCTION
106 result_view_type view() const { return value; }
107
108 KOKKOS_INLINE_FUNCTION
109 bool references_scalar() const { return references_scalar_v; }
110
111 KOKKOS_INLINE_FUNCTION
112 void final(value_type &update) const { update += plus_value; }
113 };
114
115 namespace device
116 {
117#ifdef KOKKOS_ENABLE_CUDA
118 template <typename... T> using tuple = cuda::std::tuple<T...>;
119 template <typename T, std::size_t N> using array = cuda::std::array<T, N>;
120 using cuda::std::apply;
121 using cuda::std::forward;
122 using cuda::std::forward_as_tuple;
123 using cuda::std::get;
124 using cuda::std::index_sequence;
125 using cuda::std::integer_sequence;
126 using cuda::std::make_integer_sequence;
127 using cuda::std::make_tuple;
128 using cuda::std::tie;
129 using cuda::std::tuple_cat;
130 using cuda::std::tuple_element;
131#else
132 template <typename... T> using tuple = std::tuple<T...>;
133 template <typename T, std::size_t N> using array = std::array<T, N>;
134 using std::apply;
135 using std::forward;
136 using std::forward_as_tuple;
137 using std::get;
138 using std::index_sequence;
139 using std::integer_sequence;
140 using std::make_integer_sequence;
141 using std::make_tuple;
142 using std::tie;
143 using std::tuple_cat;
144 using std::tuple_element;
145#endif
146 } // namespace device
147
148 template <int dim, typename T> struct GetKokkosNDStarType {
149 using type = typename GetKokkosNDStarType<dim - 1, T>::type *;
150 };
151 template <typename T> struct GetKokkosNDStarType<1, T> {
152 using type = T *;
153 };
154
155 // ------------------------------------------------
156 // Getting View types
157 // ------------------------------------------------
158
159 template <int dim, typename T, typename ExecutionSpace>
160 using KokkosNDView = Kokkos::View<typename GetKokkosNDStarType<dim, T>::type, // Get the star syntax for
161 // dimensionality recursively with
162 ExecutionSpace // Choice between GPU and CPU
163 >;
164 template <int dim, typename T, typename ExecutionSpace>
166 Kokkos::View<typename GetKokkosNDStarType<dim, T>::type, // Get the star syntax for dimensionality recursively
167 // with a helper
168 ExecutionSpace, // Choice between GPU and CPU
169 Kokkos::MemoryTraits<Kokkos::Restrict> // No-alias hint for compiler optimization
170 >;
171
172 template <int dim, typename T, typename ExecutionSpace>
174 Kokkos::View<typename GetKokkosNDStarType<dim, T>::type, // Get the star syntax for dimensionality recursively
175 // with a helper
176 ExecutionSpace, // Choice between GPU and CPU
177 Kokkos::MemoryTraits<Kokkos::Unmanaged> // No allocation: Attach to existing memory
178 >;
179
180 template <int dim, typename T, typename ExecutionSpace>
181 auto make_kokkos_nd_view(const std::string &label, const device::array<size_t, dim> &extents)
182 {
183 return device::apply([&](const auto &...args) { return KokkosNDView<dim, T, ExecutionSpace>(label, args...); },
184 extents);
185 }
186
187 template <int dim, typename T, typename ExecutionSpace>
188 auto make_kokkos_nd_view_restrict(const std::string &label, const device::array<size_t, dim> &extents)
189 {
190 return device::apply(
191 [&](const auto &...args) { return KokkosNDViewRestrict<dim, T, ExecutionSpace>(label, args...); }, extents);
192 }
193
194 // ------------------------------------------------
195 // Getting ranges to iterate over
196 // ------------------------------------------------
197 template <int dim, typename ExecutionSpace> struct KokkosNDRangeHelper {
198 using type = Kokkos::MDRangePolicy<Kokkos::Rank<dim>, ExecutionSpace>;
199 };
200 template <typename ExecutionSpace> struct KokkosNDRangeHelper<1, ExecutionSpace> {
201 using type = Kokkos::RangePolicy<ExecutionSpace>;
202 };
203 template <int dim, typename ExecutionSpace> using KokkosNDRange = KokkosNDRangeHelper<dim, ExecutionSpace>::type;
204
209 template <int dim>
211 {
213 for (int i = 0; i < dim; ++i)
214 tile[i] = 1;
215
216 size_t budget = max_threads;
217 // Fill from innermost dimension outward
218 for (int i = dim - 1; i >= 0; --i) {
219 tile[i] = std::min(extents[i], budget);
220 budget /= tile[i];
221 if (budget == 0) break;
222 }
223 return tile;
224 }
225
226 template <int dim, typename ExecutionSpace>
227 auto make_kokkos_nd_range(ExecutionSpace &space, const device::array<size_t, dim> start,
229 {
230 if constexpr (dim == 1) {
231 return KokkosNDRange<dim, ExecutionSpace>(space, start[0], end[0]);
232 } else {
233 Kokkos::Array<size_t, dim> start_view;
234 Kokkos::Array<size_t, dim> end_view;
235 for (size_t i = 0; i < dim; ++i) {
236 start_view[i] = start[i];
237 end_view[i] = end[i];
238 }
239 return KokkosNDRange<dim, ExecutionSpace>(space, start_view, end_view);
240 }
241 }
242
243 template <int dim, typename ExecutionSpace>
244 auto make_kokkos_nd_range(ExecutionSpace &space, const device::array<size_t, dim> start,
246 {
247 if constexpr (dim == 1) {
248 return KokkosNDRange<dim, ExecutionSpace>(space, start[0], end[0]);
249 } else {
250 Kokkos::Array<size_t, dim> start_view;
251 Kokkos::Array<size_t, dim> end_view;
252 Kokkos::Array<size_t, dim> tile_view;
253 for (size_t i = 0; i < dim; ++i) {
254 start_view[i] = start[i];
255 end_view[i] = end[i];
256 tile_view[i] = tile[i];
257 }
258 return KokkosNDRange<dim, ExecutionSpace>(space, start_view, end_view, tile_view);
259 }
260 }
261
262 template <int dim, typename TeamType>
263 KOKKOS_FORCEINLINE_FUNCTION auto make_kokkos_nd_thread_range(const TeamType &team, const device::array<size_t, dim> end)
264 {
265 if constexpr (dim == 1) {
266 return Kokkos::TeamThreadRange(team, end[0]);
267 } else {
268 return device::apply([&](const auto &...args) { return Kokkos::TeamThreadMDRange(team, args...); }, end);
269 }
270 }
271
272 // ------------------------------------------------
273 // Wrap Kokkos lambdas
274 // ------------------------------------------------
275
288 template <int dim, typename FUN> struct KokkosNDLambdaWrapper {
289 KOKKOS_FUNCTION
290 KokkosNDLambdaWrapper(const FUN &_fun) : fun(_fun) {};
291
292 template <typename... Args>
293 requires(sizeof...(Args) == dim)
294 KOKKOS_FORCEINLINE_FUNCTION void operator()(Args &&...args) const
295 {
296 fun({{std::forward<Args>(args)...}});
297 }
298
299 FUN fun;
300 };
301
314 template <int dim, typename FUN> struct KokkosNDLambdaWrapperReduction {
315 KOKKOS_FUNCTION
316 KokkosNDLambdaWrapperReduction(const FUN &_fun) : fun(_fun) {};
317
318 template <typename... Args>
319 requires(sizeof...(Args) == dim + 1)
320 KOKKOS_FORCEINLINE_FUNCTION void operator()(Args &&...args) const
321 {
322 impl(device::make_integer_sequence<size_t, dim>{}, device::forward<Args>(args)...);
323 }
324
325 FUN fun;
326
327 private:
328 template <size_t... Is, typename... Args>
329 KOKKOS_FORCEINLINE_FUNCTION void impl(device::integer_sequence<size_t, Is...>, Args &&...args) const
330 {
331 auto tuple = device::tie(args...);
332 fun(device::array<size_t, dim>{{static_cast<size_t>(device::get<Is>(tuple))...}}, device::get<dim>(tuple));
333 }
334 };
335} // namespace DiFfRG
336
337#include <autodiff/forward/real.hpp>
338
339namespace Kokkos
340{ // reduction identity must be defined in Kokkos namespace
341 template <size_t N, class T> struct reduction_identity<autodiff::Real<N, T>> {
342 KOKKOS_FORCEINLINE_FUNCTION static autodiff::Real<N, T> sum() { return autodiff::Real<N, T>(); }
343 };
344} // namespace Kokkos
Definition kokkos.hh:29
TBB_ExecutionSpace TBB_exec_space
Definition kokkos.hh:37
TBB_exec_space::memory_space TBB_memory_space
Definition kokkos.hh:38
Kokkos::DefaultExecutionSpace GPU_exec_space
Definition kokkos.hh:31
Threads_exec_space::memory_space Threads_memory_space
Definition kokkos.hh:35
GPU_exec_space::memory_space GPU_memory_space
Definition kokkos.hh:32
Kokkos::DefaultHostExecutionSpace Threads_exec_space
Definition kokkos.hh:34
std::array< T, N > array
Definition kokkos.hh:133
std::tuple< T... > tuple
Definition kokkos.hh:132
Definition complex_math.hh:10
Kokkos::View< typename GetKokkosNDStarType< dim, T >::type, ExecutionSpace > KokkosNDView
Definition kokkos.hh:160
ExecutionSpaces::Threads_memory_space Threads_memory
Definition kokkos.hh:42
auto make_kokkos_nd_range(ExecutionSpace &space, const device::array< size_t, dim > start, const device::array< size_t, dim > end)
Definition kokkos.hh:227
device::array< size_t, dim > compute_tile_hints(const device::array< size_t, dim > &extents, size_t max_threads=256)
Compute clamped tile sizes for MDRangePolicy so that the product of tile dimensions does not exceed m...
Definition kokkos.hh:210
Kokkos::View< typename GetKokkosNDStarType< dim, T >::type, ExecutionSpace, Kokkos::MemoryTraits< Kokkos::Unmanaged > > KokkosNDViewUnmanaged
Definition kokkos.hh:173
Kokkos::View< typename GetKokkosNDStarType< dim, T >::type, ExecutionSpace, Kokkos::MemoryTraits< Kokkos::Restrict > > KokkosNDViewRestrict
Definition kokkos.hh:165
std::conditional_t< std::is_same_v< MemorySpace, GPU_memory >, CPU_memory, GPU_memory > other_memory_space_t
Definition kokkos.hh:58
auto make_kokkos_nd_view_restrict(const std::string &label, const device::array< size_t, dim > &extents)
Definition kokkos.hh:188
ExecutionSpaces::GPU_exec_space GPU_exec
Definition kokkos.hh:47
Kokkos::DefaultHostExecutionSpace::memory_space CPU_memory
Definition kokkos.hh:45
KOKKOS_FORCEINLINE_FUNCTION auto make_kokkos_nd_thread_range(const TeamType &team, const device::array< size_t, dim > end)
Definition kokkos.hh:263
auto make_kokkos_nd_view(const std::string &label, const device::array< size_t, dim > &extents)
Definition kokkos.hh:181
KokkosNDRangeHelper< dim, ExecutionSpace >::type KokkosNDRange
Definition kokkos.hh:203
ExecutionSpaces::Threads_exec_space Threads_exec
Definition kokkos.hh:48
ExecutionSpaces::GPU_memory_space GPU_memory
Definition kokkos.hh:41
ExecutionSpaces::TBB_memory_space TBB_memory
Definition kokkos.hh:43
Definition kokkos.hh:340
Definition complex_math.hh:19
T * type
Definition kokkos.hh:152
Definition kokkos.hh:148
typename GetKokkosNDStarType< dim - 1, T >::type * type
Definition kokkos.hh:149
This is a functor which wraps a lambda for reduction. Basically, this is necessary when one wants to ...
Definition kokkos.hh:314
KOKKOS_FORCEINLINE_FUNCTION void impl(device::integer_sequence< size_t, Is... >, Args &&...args) const
Definition kokkos.hh:329
FUN fun
Definition kokkos.hh:325
KOKKOS_FUNCTION KokkosNDLambdaWrapperReduction(const FUN &_fun)
Definition kokkos.hh:316
KOKKOS_FORCEINLINE_FUNCTION void operator()(Args &&...args) const
Definition kokkos.hh:320
This is a functor which wraps a lambda. Basically, this is necessary when one wants to call a variadi...
Definition kokkos.hh:288
FUN fun
Definition kokkos.hh:299
KOKKOS_FORCEINLINE_FUNCTION void operator()(Args &&...args) const
Definition kokkos.hh:294
KOKKOS_FUNCTION KokkosNDLambdaWrapper(const FUN &_fun)
Definition kokkos.hh:290
Kokkos::RangePolicy< ExecutionSpace > type
Definition kokkos.hh:201
Definition kokkos.hh:197
Kokkos::MDRangePolicy< Kokkos::Rank< dim >, ExecutionSpace > type
Definition kokkos.hh:198
An extension of the Kokkos::Sum reducer that adds a constant value to the result.
Definition kokkos.hh:66
std::remove_cv_t< SavedScalar > saved_type
Definition kokkos.hh:71
bool references_scalar_v
Definition kokkos.hh:78
KOKKOS_INLINE_FUNCTION SumPlus(value_type &value_, const saved_type &plus_value_)
Definition kokkos.hh:84
KOKKOS_INLINE_FUNCTION bool references_scalar() const
Definition kokkos.hh:109
KOKKOS_INLINE_FUNCTION SumPlus(const result_view_type &value_, const saved_type &plus_value_)
Definition kokkos.hh:90
KOKKOS_INLINE_FUNCTION value_type & reference() const
Definition kokkos.hh:103
Kokkos::View< value_type, Space > result_view_type
Definition kokkos.hh:74
KOKKOS_INLINE_FUNCTION void join(value_type &dest, const value_type &src) const
Definition kokkos.hh:97
KOKKOS_INLINE_FUNCTION void init(value_type &val) const
Definition kokkos.hh:100
result_view_type value
Definition kokkos.hh:77
std::remove_cv_t< Scalar > value_type
Definition kokkos.hh:70
const saved_type plus_value
Definition kokkos.hh:80
KOKKOS_INLINE_FUNCTION result_view_type view() const
Definition kokkos.hh:106
This execution space is optimal when used in conjunction with the FE discretizations.
Definition kokkos.hh:23
void fence() const
Definition kokkos.hh:25
Kokkos::DefaultHostExecutionSpace::memory_space memory_space
Definition kokkos.hh:24
static KOKKOS_FORCEINLINE_FUNCTION autodiff::Real< N, T > sum()
Definition kokkos.hh:342