42 #ifndef STOKHOS_CUDA_WARP_SHUFFLE_HPP 43 #define STOKHOS_CUDA_WARP_SHUFFLE_HPP 45 #include "Kokkos_Core.hpp" 48 # if (__CUDA_ARCH__ >= 300) 49 # define HAVE_CUDA_SHUFFLE 1 51 # define HAVE_CUDA_SHUFFLE 0 54 # define HAVE_CUDA_SHUFFLE 0 60 template<
typename Scalar>
61 KOKKOS_INLINE_FUNCTION
66 template<
typename Scalar>
67 KOKKOS_INLINE_FUNCTION
75 KOKKOS_INLINE_FUNCTION
77 const unsigned int &
val,
const int& delta,
const int& width) {
78 unsigned int tmp1 =
val;
79 int tmp = *
reinterpret_cast<int*
>(&tmp1);
80 tmp = __shfl_down(tmp,delta,width);
81 return *
reinterpret_cast<unsigned int*
>(&tmp);
84 KOKKOS_INLINE_FUNCTION
85 int shfl_down(
const int &
val,
const int& delta,
const int& width) {
86 return __shfl_down(
val,delta,width);
89 KOKKOS_INLINE_FUNCTION
90 float shfl_down(
const float &
val,
const int& delta,
const int& width) {
91 return __shfl_down(
val,delta,width);
94 KOKKOS_INLINE_FUNCTION
95 double shfl_down(
const double &
val,
const int& delta,
const int& width) {
96 int lo = __double2loint(
val);
97 int hi = __double2hiint(
val);
98 lo = __shfl_down(lo,delta,width);
99 hi = __shfl_down(hi,delta,width);
100 return __hiloint2double(hi,lo);
103 KOKKOS_INLINE_FUNCTION
105 const unsigned int &
val,
const int& delta,
const int& width) {
106 unsigned int tmp1 =
val;
107 int tmp = *
reinterpret_cast<int*
>(&tmp1);
108 tmp = __shfl_up(tmp,delta,width);
109 return *
reinterpret_cast<unsigned int*
>(&tmp);
112 KOKKOS_INLINE_FUNCTION
113 int shfl_up(
const int &
val,
const int& delta,
const int& width) {
114 return __shfl_up(
val,delta,width);
117 KOKKOS_INLINE_FUNCTION
118 float shfl_up(
const float &
val,
const int& delta,
const int& width) {
119 return __shfl_up(
val,delta,width);
122 KOKKOS_INLINE_FUNCTION
123 double shfl_up(
const double &
val,
const int& delta,
const int& width) {
124 int lo = __double2loint(
val);
125 int hi = __double2hiint(
val);
126 lo = __shfl_up(lo,delta,width);
127 hi = __shfl_up(hi,delta,width);
128 return __hiloint2double(hi,lo);
131 #endif // #if HAVE_CUDA_SHUFFLE KOKKOS_INLINE_FUNCTION Scalar shfl_down(const Scalar &val, const int &delta, const int &width)
KOKKOS_INLINE_FUNCTION Scalar shfl_up(const Scalar &val, const int &delta, const int &width)
Top-level namespace for Stokhos classes and functions.