/home/runner/work/DiFfRG_current/DiFfRG_current/DiFfRG/include/DiFfRG/common/kokkos.hh File Reference#

DiFfRG: /home/runner/work/DiFfRG_current/DiFfRG_current/DiFfRG/include/DiFfRG/common/kokkos.hh File Reference
DiFfRG
kokkos.hh File Reference
#include <DiFfRG/common/tuples.hh>
#include <Kokkos_Core.hpp>
#include <type_traits>
#include <array>
#include <tuple>
#include <utility>
#include <autodiff/forward/real.hpp>

Go to the source code of this file.

Classes

struct  DiFfRG::TBB_ExecutionSpace
 This execution space is optimal when used in conjunction with the FE discretizations. More...
 
class  DiFfRG::ExecutionSpaces
 
struct  DiFfRG::SumPlus< Scalar, SavedScalar, Space >
 An extension of the Kokkos::Sum reducer that adds a constant value to the result. More...
 
struct  DiFfRG::GetKokkosNDStarType< dim, T >
 
struct  DiFfRG::GetKokkosNDStarType< 1, T >
 
struct  DiFfRG::KokkosNDRangeHelper< dim, ExecutionSpace >
 
struct  DiFfRG::KokkosNDRangeHelper< 1, ExecutionSpace >
 
struct  DiFfRG::KokkosNDLambdaWrapper< dim, FUN >
 This is a functor which wraps a lambda. Basically, this is necessary when one wants to call a variadic lambda on an NVIDIA GPU. CUDA seems to be unable to expand the variadic arguments - in contrast, a direct approach does indeed work for openMP or serial compilation. To get around this limitation, the KokkosNDLambdaWrapper packs the indices into an array. If you wonder, whether there's a difference when using tie and tuples: https://godbolt.org/z/M3bG39rsM No. Therefore, we spare the ourselves the hassle and simply use an array. More...
 
struct  DiFfRG::KokkosNDLambdaWrapperReduction< dim, FUN >
 This is a functor which wraps a lambda for reduction. Basically, this is necessary when one wants to call a variadic lambda on an NVIDIA GPU. CUDA seems to be unable to expand the variadic arguments - in contrast, a direct approach does indeed work for openMP or serial compilation. To get around this limitation, the KokkosNDLambdaWrapperReduction packs the indices into an array. Uses compile-time index sequences to extract the first dim args as indices and the last arg as the reduction value, avoiding recursive tuple_first/tuple_cat overhead per GPU thread. More...
 
struct  Kokkos::reduction_identity< autodiff::Real< N, T > >
 

Namespaces

namespace  DiFfRG
 
namespace  DiFfRG::device
 
namespace  Kokkos
 

Typedefs

using DiFfRG::GPU_memory = ExecutionSpaces::GPU_memory_space
 
using DiFfRG::Threads_memory = ExecutionSpaces::Threads_memory_space
 
using DiFfRG::TBB_memory = ExecutionSpaces::TBB_memory_space
 
using DiFfRG::CPU_memory = Kokkos::DefaultHostExecutionSpace::memory_space
 
using DiFfRG::GPU_exec = ExecutionSpaces::GPU_exec_space
 
using DiFfRG::Threads_exec = ExecutionSpaces::Threads_exec_space
 
using DiFfRG::TBB_exec = ExecutionSpaces::TBB_exec_space
 
template<typename MemorySpace >
using DiFfRG::other_memory_space_t = std::conditional_t<std::is_same_v<MemorySpace, GPU_memory>, CPU_memory, GPU_memory>
 
template<typename... T>
using DiFfRG::device::tuple = std::tuple<T...>
 
template<typename T , std::size_t N>
using DiFfRG::device::array = std::array<T, N>
 
template<int dim, typename T , typename ExecutionSpace >
using DiFfRG::KokkosNDView
 
template<int dim, typename T , typename ExecutionSpace >
using DiFfRG::KokkosNDViewRestrict
 
template<int dim, typename T , typename ExecutionSpace >
using DiFfRG::KokkosNDViewUnmanaged
 
template<int dim, typename ExecutionSpace >
using DiFfRG::KokkosNDRange = KokkosNDRangeHelper<dim, ExecutionSpace>::type
 

Functions

template<int dim, typename T , typename ExecutionSpace >
auto DiFfRG::make_kokkos_nd_view (const std::string &label, const device::array< size_t, dim > &extents)
 
template<int dim, typename T , typename ExecutionSpace >
auto DiFfRG::make_kokkos_nd_view_restrict (const std::string &label, const device::array< size_t, dim > &extents)
 
template<int dim>
device::array< size_t, dim > DiFfRG::compute_tile_hints (const device::array< size_t, dim > &extents, size_t max_threads=256)
 Compute clamped tile sizes for MDRangePolicy so that the product of tile dimensions does not exceed max_threads. Fills from the innermost (last) dimension outward.
 
template<int dim, typename ExecutionSpace >
auto DiFfRG::make_kokkos_nd_range (ExecutionSpace &space, const device::array< size_t, dim > start, const device::array< size_t, dim > end)
 
template<int dim, typename ExecutionSpace >
auto DiFfRG::make_kokkos_nd_range (ExecutionSpace &space, const device::array< size_t, dim > start, const device::array< size_t, dim > end, const device::array< size_t, dim > tile)
 
template<int dim, typename TeamType >
KOKKOS_FORCEINLINE_FUNCTION auto DiFfRG::make_kokkos_nd_thread_range (const TeamType &team, const device::array< size_t, dim > end)