48 #include "Kokkos_Sparse_CrsMatrix.hpp" 58 #include "impl/Kokkos_Timer.hpp" 60 template<
typename IntType >
67 return k + N * (
j + N * i );
72 std::vector< std::vector<size_t> > & graph )
74 graph.resize( N * N * N , std::vector<size_t>() );
78 for (
int i = 0 ; i < (
int) N ; ++i ) {
79 for (
int j = 0 ;
j < (
int) N ; ++
j ) {
80 for (
int k = 0 ; k < (
int) N ; ++k ) {
84 graph[row].reserve(27);
86 for (
int ii = -1 ; ii < 2 ; ++ii ) {
87 for (
int jj = -1 ; jj < 2 ; ++jj ) {
88 for (
int kk = -1 ; kk < 2 ; ++kk ) {
89 if ( 0 <= i + ii && i + ii < (
int) N &&
90 0 <=
j + jj &&
j + jj < (
int) N &&
91 0 <= k + kk && k + kk < (
int) N ) {
94 graph[row].push_back(col);
97 total += graph[row].size();
103 template <
typename ScalarType,
typename OrdinalType,
typename Device>
106 const OrdinalType dim,
107 const OrdinalType nGrid,
108 const OrdinalType iterCount,
109 std::vector<double>& scalar_perf,
110 std::vector<double>& block_left_perf,
111 std::vector<double>& block_right_perf,
112 std::vector<double>& pce_perf,
113 std::vector<double>& block_pce_perf)
122 typedef Kokkos::View< value_type*, Kokkos::LayoutLeft, execution_space > scalar_vector_type;
123 typedef Kokkos::View< value_type**, Kokkos::LayoutLeft, execution_space > scalar_left_multi_vector_type;
124 typedef Kokkos::View< value_type**, Kokkos::LayoutRight, execution_space > scalar_right_multi_vector_type;
125 typedef Kokkos::View< pce_type*, Kokkos::LayoutLeft, execution_space > pce_vector_type;
126 typedef Kokkos::View< pce_type**, Kokkos::LayoutLeft, execution_space > pce_multi_vector_type;
128 typedef KokkosSparse::CrsMatrix< value_type, ordinal_type, execution_space > scalar_matrix_type;
129 typedef KokkosSparse::CrsMatrix< pce_type, ordinal_type, execution_space > pce_matrix_type;
130 typedef typename scalar_matrix_type::StaticCrsGraphType matrix_graph_type;
131 typedef typename scalar_matrix_type::values_type scalar_matrix_values_type;
132 typedef typename pce_matrix_type::values_type pce_matrix_values_type;
139 typedef typename pce_type::cijk_type kokkos_cijk_type;
143 using Teuchos::Array;
149 Array< RCP<const abstract_basis_type> > bases(dim);
151 bases[i] = Teuchos::rcp(
new basis_type(order,
true));
153 RCP<const product_basis_type> basis = rcp(
new product_basis_type(bases));
154 RCP<cijk_type>
cijk = basis->computeTripleProductTensor();
155 kokkos_cijk_type kokkos_cijk =
156 Stokhos::create_product_tensor<execution_space>(*basis, *
cijk);
162 std::vector< std::vector<size_t> > fem_graph;
163 const size_t fem_length = nGrid * nGrid * nGrid;
170 scalar_left_multi_vector_type xl(Kokkos::ViewAllocateWithoutInitializing(
"scalar left x"), fem_length, pce_size);
171 scalar_left_multi_vector_type yl(Kokkos::ViewAllocateWithoutInitializing(
"scalar right y"), fem_length, pce_size);
172 scalar_right_multi_vector_type xr(Kokkos::ViewAllocateWithoutInitializing(
"scalar right x"), fem_length, pce_size);
173 scalar_right_multi_vector_type yr(Kokkos::ViewAllocateWithoutInitializing(
"scalar right y"), fem_length, pce_size);
174 std::vector<scalar_vector_type> x_col(pce_size), y_col(pce_size);
176 x_col[i] = scalar_vector_type (Kokkos::ViewAllocateWithoutInitializing(
"scalar x col"), fem_length);
177 y_col[i] = scalar_vector_type(Kokkos::ViewAllocateWithoutInitializing(
"scalar y col"), fem_length);
181 pce_vector_type x_pce =
182 Kokkos::make_view<pce_vector_type>(Kokkos::ViewAllocateWithoutInitializing(
"pce x"),
183 kokkos_cijk, fem_length, pce_size);
184 pce_vector_type y_pce =
185 Kokkos::make_view<pce_vector_type>(Kokkos::ViewAllocateWithoutInitializing(
"pce y"),
186 kokkos_cijk, fem_length, pce_size);
187 pce_multi_vector_type x_multi_pce =
188 Kokkos::make_view<pce_multi_vector_type>(Kokkos::ViewAllocateWithoutInitializing(
"pce multi x"),
189 kokkos_cijk, fem_length,
190 num_pce_col, pce_size);
191 pce_multi_vector_type y_multi_pce =
192 Kokkos::make_view<pce_multi_vector_type>(Kokkos::ViewAllocateWithoutInitializing(
"pce multi y"),
193 kokkos_cijk, fem_length,
194 num_pce_col, pce_size);
208 matrix_graph_type matrix_graph =
209 Kokkos::create_staticcrsgraph<matrix_graph_type>(
210 std::string(
"test crs graph"), fem_graph);
211 scalar_matrix_values_type scalar_matrix_values =
212 scalar_matrix_values_type(Kokkos::ViewAllocateWithoutInitializing(
"scalar matrix"), graph_length);
213 pce_matrix_values_type pce_matrix_values =
214 Kokkos::make_view<pce_matrix_values_type>(Kokkos::ViewAllocateWithoutInitializing(
"pce matrix"), kokkos_cijk, graph_length, 1);
215 scalar_matrix_type scalar_matrix(
"scalar matrix", fem_length,
216 scalar_matrix_values, matrix_graph);
217 pce_matrix_type pce_matrix(
"pce matrix", fem_length,
218 pce_matrix_values, matrix_graph);
239 execution_space::fence();
240 Kokkos::Impl::Timer clock ;
251 execution_space::fence();
253 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
254 const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
256 scalar_perf.resize(5);
257 scalar_perf[0] = fem_length;
258 scalar_perf[1] = pce_size;
259 scalar_perf[2] = graph_length;
260 scalar_perf[3] = seconds_per_iter;
261 scalar_perf[4] = flops / seconds_per_iter;
273 execution_space::fence();
274 Kokkos::Impl::Timer clock ;
278 execution_space::fence();
280 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
281 const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
283 block_left_perf.resize(5);
284 block_left_perf[0] = fem_length;
285 block_left_perf[1] = pce_size;
286 block_left_perf[2] = graph_length;
287 block_left_perf[3] = seconds_per_iter;
288 block_left_perf[4] = flops / seconds_per_iter;
300 execution_space::fence();
301 Kokkos::Impl::Timer clock ;
305 execution_space::fence();
307 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
308 const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
310 block_right_perf.resize(5);
311 block_right_perf[0] = fem_length;
312 block_right_perf[1] = pce_size;
313 block_right_perf[2] = graph_length;
314 block_right_perf[3] = seconds_per_iter;
315 block_right_perf[4] = flops / seconds_per_iter;
327 execution_space::fence();
328 Kokkos::Impl::Timer clock ;
332 execution_space::fence();
334 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
335 const double flops = 1.0e-9 * 2.0 * graph_length * pce_size;
338 pce_perf[0] = fem_length;
339 pce_perf[1] = pce_size;
340 pce_perf[2] = graph_length;
341 pce_perf[3] = seconds_per_iter;
342 pce_perf[4] = flops / seconds_per_iter;
354 execution_space::fence();
355 Kokkos::Impl::Timer clock ;
359 execution_space::fence();
361 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
362 const double flops = 1.0e-9 * 2.0 * graph_length * pce_size * num_pce_col;
364 block_pce_perf.resize(5);
365 block_pce_perf[0] = fem_length;
366 block_pce_perf[1] = pce_size;
367 block_pce_perf[2] = graph_length;
368 block_pce_perf[3] = seconds_per_iter;
369 block_pce_perf[4] = flops / seconds_per_iter;
374 template <
typename Scalar,
typename Ordinal,
typename Device>
381 std::cout.precision(8);
382 std::cout << std::endl
383 <<
"\"Grid Size\" , " 385 <<
"\"FEM Graph Size\" , " 386 <<
"\"Dimension\" , " 389 <<
"\"Scalar SpMM Time\" , " 390 <<
"\"Scalar SpMM Speedup\" , " 391 <<
"\"Scalar SpMM GFLOPS\" , " 392 <<
"\"Block-Left SpMM Speedup\" , " 393 <<
"\"Block-Left SpMM GFLOPS\" , " 394 <<
"\"Block-Right SpMM Speedup\" , " 395 <<
"\"Block-Right SpMM GFLOPS\" , " 396 <<
"\"PCE SpMM Speedup\" , " 397 <<
"\"PCE SpMM GFLOPS\" , " 398 <<
"\"Block PCE SpMM Speedup\" , " 399 <<
"\"Block PCE SpMM GFLOPS\" , " 402 std::vector<double> perf_scalar, perf_block_left, perf_block_right,
403 perf_pce, perf_block_pce;
404 for (
Ordinal dim=min_var; dim<=max_var; ++dim) {
406 test_mean_multiply<Scalar,Ordinal,Device>(
407 order, dim, nGrid, nIter, perf_scalar, perf_block_left, perf_block_right,
408 perf_pce, perf_block_pce );
410 std::cout << nGrid <<
" , " 411 << perf_scalar[0] <<
" , " 412 << perf_scalar[2] <<
" , " 415 << perf_scalar[1] <<
" , " 416 << perf_scalar[3] <<
" , " 417 << perf_scalar[4] / perf_scalar[4] <<
" , " 418 << perf_scalar[4] <<
" , " 419 << perf_block_left[4]/ perf_scalar[4] <<
" , " 420 << perf_block_left[4] <<
" , " 421 << perf_block_right[4]/ perf_scalar[4] <<
" , " 422 << perf_block_right[4] <<
" , " 423 << perf_pce[4]/ perf_scalar[4] <<
" , " 424 << perf_pce[4] <<
" , " 425 << perf_block_pce[4]/ perf_scalar[4] <<
" , " 426 << perf_block_pce[4] <<
" , " 432 #define INST_PERF_DRIVER(SCALAR, ORDINAL, DEVICE) \ 433 template void performance_test_driver< SCALAR, ORDINAL, DEVICE >( \ 434 const ORDINAL nGrid, const ORDINAL nIter, const ORDINAL order, \ 435 const ORDINAL min_var, const ORDINAL max_var);
Stokhos::StandardStorage< int, double > storage_type
Multivariate orthogonal polynomial basis generated from a total order tensor product of univariate po...
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
void performance_test_driver(const Ordinal nGrid, const Ordinal nIter, const Ordinal order, const Ordinal min_var, const Ordinal max_var)
Kokkos::DefaultExecutionSpace execution_space
Sacado::PCE::OrthogPoly< double, Storage > pce_type
void test_mean_multiply(const OrdinalType order, const OrdinalType dim, const OrdinalType nGrid, const OrdinalType iterCount, std::vector< double > &scalar_perf, std::vector< double > &block_left_perf, std::vector< double > &block_right_perf, std::vector< double > &pce_perf, std::vector< double > &block_pce_perf)
Stokhos::LegendreBasis< int, double > basis_type
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
void setGlobalCijkTensor(const cijk_type &cijk)
KOKKOS_INLINE_FUNCTION constexpr std::enable_if< is_view_uq_pce< view_type >::value, typename CijkType< view_type >::type >::type cijk(const view_type &view)
Legendre polynomial basis.
Abstract base class for 1-D orthogonal polynomials.
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
A comparison functor implementing a strict weak ordering based lexographic ordering.
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)