Stokhos Package Browser (Single Doxygen Collection)  Version of the Day
Stokhos_Multiply.hpp
Go to the documentation of this file.
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Stokhos Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Eric T. Phipps (etphipp@sandia.gov).
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #ifndef STOKHOS_MULTIPLY_HPP
43 #define STOKHOS_MULTIPLY_HPP
44 
45 #include "Kokkos_Core.hpp"
46 #include <vector> // for std::vector (needed below)
47 
48 namespace Stokhos {
49 
50 class DefaultMultiply {};
51 
52 template <unsigned> class IntegralRank {};
53 
54 template <typename T> struct ViewRank {
56 };
57 
58 template <typename T> struct ViewRank< std::vector<T> > {
60 };
61 
62 template <typename MatrixType,
63  typename InputVectorType,
64  typename OutputVectorType,
65  typename ColumnIndicesType = void,
66  typename VectorRank = typename ViewRank<InputVectorType>::type,
67  typename ImplTag = DefaultMultiply
68  > class Multiply;
69 
70 template <typename MatrixType,
71  typename InputVectorType,
72  typename OutputVectorType>
73 void multiply(const MatrixType& A,
74  const InputVectorType& x,
75  OutputVectorType& y) {
77  multiply_type::apply( A, x, y );
78 }
79 
80 template <typename MatrixType,
81  typename InputVectorType,
82  typename OutputVectorType>
83 void multiply(const MatrixType& A,
84  const InputVectorType& x,
85  OutputVectorType& y,
86  DefaultMultiply tag) {
88  multiply_type::apply( A, x, y );
89 }
90 
91 template <typename MatrixType,
92  typename InputVectorType,
93  typename OutputVectorType,
94  typename ColumnIndicesType>
95 void multiply(const MatrixType& A,
96  const InputVectorType& x,
97  OutputVectorType& y,
98  const ColumnIndicesType& col) {
100  multiply_type::apply( A, x, y, col );
101 }
102 
103 template <typename MatrixType,
104  typename InputVectorType,
105  typename OutputVectorType,
106  typename ColumnIndicesType>
107 void multiply(const MatrixType& A,
108  const InputVectorType& x,
109  OutputVectorType& y,
110  const ColumnIndicesType& col,
111  DefaultMultiply tag) {
113  multiply_type::apply( A, x, y, col );
114 }
115 
116 template <typename BlockSpec> class BlockMultiply;
117 
118 namespace details {
119 
120 /*
121  * Compute work range = (begin, end) such that adjacent threads/blocks write to
122  * separate cache lines
123  */
124 template <typename scalar_type, typename execution_space, typename size_type>
125 KOKKOS_INLINE_FUNCTION
126 Kokkos::pair<size_type, size_type>
128  const size_type work_count,
129  const size_type thread_count,
130  const size_type thread_rank)
131 {
132 #if defined( KOKKOS_HAVE_CUDA )
133  enum { cache_line =
134  Kokkos::Impl::is_same<execution_space,Kokkos::Cuda>::value ? 128 : 64 };
135 #else
136  enum { cache_line = 64 };
137 #endif
138 
139  enum { work_align = cache_line / sizeof(scalar_type) };
140  enum { work_shift = Kokkos::Impl::power_of_two< work_align >::value };
141  enum { work_mask = work_align - 1 };
142 
143  const size_type work_per_thread =
144  ( ( ( ( work_count + work_mask ) >> work_shift ) + thread_count - 1 ) /
145  thread_count ) << work_shift ;
146 
147  size_type work_begin = thread_rank * work_per_thread;
148  size_type work_end = work_begin + work_per_thread;
149  if (work_begin > work_count)
150  work_begin = work_count;
151  if (work_end > work_count)
152  work_end = work_count;
153 
154  return Kokkos::make_pair(work_begin, work_end);
155 }
156 
157 // Functor implementing assignment update for multiply kernels
159  template <typename Scalar>
160  KOKKOS_INLINE_FUNCTION
161  void operator()(Scalar& y, const Scalar& x) const { y = x; }
162 };
163 
164 // Functor implementing += update for multiply kernels
166  template <typename Scalar>
167  KOKKOS_INLINE_FUNCTION
168  void operator()(Scalar& y, const Scalar& x) const { y += x; }
169 };
170 
171 // Functor implementing scaled assignment update for multiply kernels
172 template <typename Value>
174  const Value a;
175  MultiplyScaledAssign(const Value& a_) : a(a_) {}
176  template <typename Scalar>
177  KOKKOS_INLINE_FUNCTION
178  void operator()(Scalar& y, const Scalar& x) const { y = a*x; }
179 };
180 
181 // Functor implementing += update for multiply kernels
182 template <typename Value>
184  const Value a;
185  MultiplyScaledUpdate(const Value& a_) : a(a_) {}
186  template <typename Scalar>
187  KOKKOS_INLINE_FUNCTION
188  void operator()(Scalar& y, const Scalar& x) const { y += a*x; }
189 };
190 
191 // Functor implementing saxpby update for multiply kernels
192 template <typename Value>
194  const Value a;
195  const Value b;
196  MultiplyScaledUpdate2(const Value& a_, const Value& b_) : a(a_), b(b_) {}
197  template <typename Scalar>
198  KOKKOS_INLINE_FUNCTION
199  void operator()(Scalar& y, const Scalar& x) const { y = a*x + b*y; }
200 };
201 
202 } // namespace details
203 
204 } // namespace Stokhos
205 
206 #endif
IntegralRank< T::Rank > type
Kokkos::DefaultExecutionSpace execution_space
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
Definition: csr_vector.h:260
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
Top-level namespace for Stokhos classes and functions.
MultiplyScaledUpdate2(const Value &a_, const Value &b_)
KOKKOS_INLINE_FUNCTION Kokkos::pair< size_type, size_type > compute_work_range(const execution_space device, const size_type work_count, const size_type thread_count, const size_type thread_rank)
Kokkos::DefaultExecutionSpace device
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
KOKKOS_INLINE_FUNCTION void operator()(Scalar &y, const Scalar &x) const
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
Definition: csr_vector.h:267