42 #ifndef STOKHOS_MULTIPLY_HPP 43 #define STOKHOS_MULTIPLY_HPP 45 #include "Kokkos_Core.hpp" 58 template <
typename T>
struct ViewRank< std::vector<T> > {
62 template <
typename MatrixType,
63 typename InputVectorType,
64 typename OutputVectorType,
65 typename ColumnIndicesType = void,
70 template <
typename MatrixType,
71 typename InputVectorType,
72 typename OutputVectorType>
74 const InputVectorType&
x,
75 OutputVectorType&
y) {
77 multiply_type::apply( A,
x,
y );
80 template <
typename MatrixType,
81 typename InputVectorType,
82 typename OutputVectorType>
84 const InputVectorType&
x,
88 multiply_type::apply( A,
x,
y );
91 template <
typename MatrixType,
92 typename InputVectorType,
93 typename OutputVectorType,
94 typename ColumnIndicesType>
96 const InputVectorType&
x,
98 const ColumnIndicesType& col) {
100 multiply_type::apply( A,
x,
y, col );
103 template <
typename MatrixType,
104 typename InputVectorType,
105 typename OutputVectorType,
106 typename ColumnIndicesType>
108 const InputVectorType&
x,
110 const ColumnIndicesType& col,
113 multiply_type::apply( A,
x,
y, col );
124 template <
typename scalar_type,
typename execution_space,
typename size_type>
125 KOKKOS_INLINE_FUNCTION
126 Kokkos::pair<size_type, size_type>
128 const size_type work_count,
129 const size_type thread_count,
130 const size_type thread_rank)
132 #if defined( KOKKOS_HAVE_CUDA ) 134 Kokkos::Impl::is_same<execution_space,Kokkos::Cuda>::value ? 128 : 64 };
136 enum { cache_line = 64 };
139 enum { work_align = cache_line /
sizeof(
scalar_type) };
140 enum { work_shift = Kokkos::Impl::power_of_two< work_align >::value };
141 enum { work_mask = work_align - 1 };
143 const size_type work_per_thread =
144 ( ( ( ( work_count + work_mask ) >> work_shift ) + thread_count - 1 ) /
145 thread_count ) << work_shift ;
147 size_type work_begin = thread_rank * work_per_thread;
148 size_type work_end = work_begin + work_per_thread;
149 if (work_begin > work_count)
150 work_begin = work_count;
151 if (work_end > work_count)
152 work_end = work_count;
154 return Kokkos::make_pair(work_begin, work_end);
159 template <
typename Scalar>
160 KOKKOS_INLINE_FUNCTION
166 template <
typename Scalar>
167 KOKKOS_INLINE_FUNCTION
172 template <
typename Value>
176 template <
typename Scalar>
177 KOKKOS_INLINE_FUNCTION
182 template <
typename Value>
186 template <
typename Scalar>
187 KOKKOS_INLINE_FUNCTION
192 template <
typename Value>
197 template <
typename Scalar>
198 KOKKOS_INLINE_FUNCTION
IntegralRank< T::Rank > type
Kokkos::DefaultExecutionSpace execution_space
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
MultiplyScaledAssign(const Value &a_)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
Top-level namespace for Stokhos classes and functions.
MultiplyScaledUpdate2(const Value &a_, const Value &b_)
KOKKOS_INLINE_FUNCTION Kokkos::pair< size_type, size_type > compute_work_range(const execution_space device, const size_type work_count, const size_type thread_count, const size_type thread_rank)
MultiplyScaledUpdate(const Value &a_)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
IntegralRank< T::Rank > type
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y