45 #include "Kokkos_Core.hpp" 46 #include "impl/Kokkos_Timer.hpp" 62 #if defined(HAVE_MPI) && 0 75 #ifdef HAVE_STOKHOS_KOKKOSLINALG 76 #include "Kokkos_Sparse.hpp" 77 #include "Kokkos_Blas1_MV.hpp" 82 template<
typename IntType >
89 return k + N * (
j + N * i );
94 std::vector< std::vector<size_t> > & graph )
96 graph.resize( N * N * N , std::vector<size_t>() );
100 for (
int i = 0 ; i < (
int) N ; ++i ) {
101 for (
int j = 0 ;
j < (
int) N ; ++
j ) {
102 for (
int k = 0 ; k < (
int) N ; ++k ) {
106 graph[row].reserve(27);
108 for (
int ii = -1 ; ii < 2 ; ++ii ) {
109 for (
int jj = -1 ; jj < 2 ; ++jj ) {
110 for (
int kk = -1 ; kk < 2 ; ++kk ) {
111 if ( 0 <= i + ii && i + ii < (
int) N &&
112 0 <=
j + jj &&
j + jj < (
int) N &&
113 0 <= k + kk && k + kk < (
int) N ) {
116 graph[row].push_back(col);
119 total += graph[row].size();
137 template<
typename ScalarType ,
typename TensorType,
class Device >
140 const std::vector<int> & var_degree ,
142 const int iterCount ,
143 const bool symmetric )
148 Device > block_vector_type ;
153 typedef typename matrix_type::graph_type graph_type ;
164 using Teuchos::Array;
167 const size_t num_KL = var_degree.size();
168 Array< RCP<const abstract_basis_type> > bases(num_KL);
169 for (
size_t i=0; i<num_KL; i++) {
171 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,1.0,
true));
173 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,2.0,
true));
175 RCP<const product_basis_type> basis =
176 rcp(
new product_basis_type(
178 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
183 std::vector< std::vector<size_t> > graph ;
185 const size_t outer_length = nGrid * nGrid * nGrid ;
194 Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
196 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string(
"test crs graph") , graph );
198 const size_t inner_length = matrix.block.dimension();
199 const size_t inner_length_aligned = matrix.block.aligned_dimension();
202 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), inner_length_aligned , graph_length );
204 block_vector_type
x =
205 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length_aligned , outer_length );
206 block_vector_type
y =
207 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length_aligned , outer_length );
215 block_vector_type x0 =
216 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"),
217 inner_length_aligned , outer_length );
223 Kokkos::Impl::Timer clock ;
224 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
230 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
231 const double flops_per_block = matrix.block.tensor().num_flops();
232 const double flops = 1.0e-9*graph_length*flops_per_block;
234 std::vector<double> perf(6) ;
236 perf[0] = outer_length * inner_length ;
237 perf[1] = seconds_per_iter ;
238 perf[2] = flops / seconds_per_iter;
239 perf[3] = matrix.block.tensor().entry_count();
240 perf[4] = inner_length ;
241 perf[5] = flops_per_block;
246 template<
typename ScalarType ,
class Device >
249 const std::vector<int> & var_degree ,
251 const int iterCount ,
252 const bool symmetric )
257 Device > block_vector_type ;
264 typedef typename matrix_type::graph_type graph_type ;
275 using Teuchos::Array;
278 const size_t num_KL = var_degree.size();
279 Array< RCP<const abstract_basis_type> > bases(num_KL);
280 for (
size_t i=0; i<num_KL; i++) {
282 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,1.0,
true));
284 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,2.0,
true));
286 RCP<const product_basis_type> basis =
287 rcp(
new product_basis_type(
289 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
294 std::vector< std::vector<size_t> > fem_graph ;
296 const size_t fem_length = nGrid * nGrid * nGrid ;
299 const size_t stoch_length = basis->size();
303 block_vector_type
x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), stoch_length , fem_length );
304 block_vector_type
y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), stoch_length , fem_length );
314 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>(
315 std::string(
"test product tensor graph") , fem_graph );
316 matrix.values = block_vector_type(
317 Kokkos::ViewAllocateWithoutInitializing(
"matrix"), matrix.block.matrix_size() , fem_graph_length );
324 Kokkos::Impl::Timer clock ;
325 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
330 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
331 const double flops_per_block = 2.0*stoch_length*stoch_length;
332 const double flops = 1e-9*fem_graph_length*flops_per_block;
334 std::vector<double> perf(6);
335 perf[0] = fem_length * stoch_length ;
336 perf[1] = seconds_per_iter;
337 perf[2] = flops / seconds_per_iter;
338 perf[3] = Cijk->num_entries();
339 perf[4] = stoch_length;
340 perf[5] = flops_per_block;
350 template<
typename ScalarType ,
class Device >
353 const std::vector<int> & var_degree ,
355 const int iterCount ,
356 const bool symmetric )
359 typedef Kokkos::View< value_type* , Device > vector_type ;
364 typedef typename matrix_type::values_type matrix_values_type;
365 typedef typename matrix_type::graph_type matrix_graph_type;
376 using Teuchos::Array;
379 const size_t num_KL = var_degree.size();
380 Array< RCP<const abstract_basis_type> > bases(num_KL);
381 for (
size_t i=0; i<num_KL; i++) {
383 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,1.0,
true));
385 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,2.0,
true));
387 RCP<const product_basis_type> basis =
388 rcp(
new product_basis_type(
390 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
395 std::vector< std::vector<size_t> > fem_graph ;
397 const size_t fem_length = nGrid * nGrid * nGrid ;
404 const size_t stoch_length = basis->size();
405 std::vector< std::vector< int > > stoch_graph( stoch_length );
406 #if defined(HAVE_MPI) && 0 412 *basis, *Cijk, comm);
413 for (
size_t i = 0 ; i < stoch_length ; ++i ) {
414 int len = cijk_graph->NumGlobalIndices(i);
415 stoch_graph[i].resize(len);
417 cijk_graph->ExtractGlobalRowCopy(i, len, len2, &stoch_graph[i][0]);
423 const size_t flat_length = fem_length * stoch_length ;
425 std::vector< std::vector<size_t> > flat_graph( flat_length );
427 for (
size_t iOuterRow = 0 ; iOuterRow < fem_length ; ++iOuterRow ) {
429 const size_t iOuterRowNZ = fem_graph[iOuterRow].size();
431 for (
size_t iInnerRow = 0 ; iInnerRow < stoch_length ; ++iInnerRow ) {
433 const size_t iInnerRowNZ = stoch_graph[ iInnerRow ].size(); ;
434 const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
435 const size_t iFlatRow = iInnerRow + iOuterRow * stoch_length ;
437 flat_graph[iFlatRow].resize( iFlatRowNZ );
439 size_t iFlatEntry = 0 ;
441 for (
size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
443 const size_t iOuterCol = fem_graph[iOuterRow][iOuterEntry];
445 for (
size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
447 const size_t iInnerCol = stoch_graph[iInnerRow][iInnerEntry] ;
448 const size_t iFlatColumn = iInnerCol + iOuterCol * stoch_length ;
450 flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
460 vector_type
x = vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), flat_length );
461 vector_type
y = vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), flat_length );
469 matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
470 std::string(
"testing") , flat_graph );
472 const size_t flat_graph_length = matrix.graph.entries.dimension_0();
474 matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), flat_graph_length );
483 Kokkos::Impl::Timer clock ;
484 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
489 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
490 const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
492 std::vector<double> perf(4);
493 perf[0] = flat_length ;
494 perf[1] = seconds_per_iter;
496 perf[3] = flat_graph_length ;
507 template<
typename ScalarType ,
class Device >
510 const std::vector<int> & var_degree ,
512 const int iterCount ,
513 const bool symmetric )
516 typedef Kokkos::View< value_type* , Device > vector_type ;
521 typedef typename matrix_type::values_type matrix_values_type;
522 typedef typename matrix_type::graph_type matrix_graph_type;
533 using Teuchos::Array;
536 const size_t num_KL = var_degree.size();
537 Array< RCP<const abstract_basis_type> > bases(num_KL);
538 for (
size_t i=0; i<num_KL; i++) {
540 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,1.0,
true));
542 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,2.0,
true));
544 RCP<const product_basis_type> basis =
545 rcp(
new product_basis_type(
547 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
552 std::vector< std::vector<size_t> > fem_graph ;
554 const size_t fem_length = nGrid * nGrid * nGrid ;
561 const size_t stoch_length = basis->size();
562 std::vector< std::vector< int > > stoch_graph( stoch_length );
563 #if defined(HAVE_MPI) && 0 569 *basis, *Cijk, comm);
570 for (
size_t i = 0 ; i < stoch_length ; ++i ) {
571 int len = cijk_graph->NumGlobalIndices(i);
572 stoch_graph[i].resize(len);
574 cijk_graph->ExtractGlobalRowCopy(i, len, len2, &stoch_graph[i][0]);
580 const size_t flat_length = fem_length * stoch_length ;
582 std::vector< std::vector<size_t> > flat_graph( flat_length );
584 for (
size_t iOuterRow = 0 ; iOuterRow < stoch_length ; ++iOuterRow ) {
586 const size_t iOuterRowNZ = stoch_graph[iOuterRow].size();
588 for (
size_t iInnerRow = 0 ; iInnerRow < fem_length ; ++iInnerRow ) {
590 const size_t iInnerRowNZ = fem_graph[iInnerRow].size();
591 const size_t iFlatRowNZ = iOuterRowNZ * iInnerRowNZ ;
592 const size_t iFlatRow = iInnerRow + iOuterRow * fem_length ;
594 flat_graph[iFlatRow].resize( iFlatRowNZ );
596 size_t iFlatEntry = 0 ;
598 for (
size_t iOuterEntry = 0 ; iOuterEntry < iOuterRowNZ ; ++iOuterEntry ) {
600 const size_t iOuterCol = stoch_graph[ iOuterRow ][ iOuterEntry ];
602 for (
size_t iInnerEntry = 0 ; iInnerEntry < iInnerRowNZ ; ++iInnerEntry ) {
604 const size_t iInnerCol = fem_graph[ iInnerRow][iInnerEntry];
605 const size_t iFlatColumn = iInnerCol + iOuterCol * fem_length ;
607 flat_graph[iFlatRow][iFlatEntry] = iFlatColumn ;
616 vector_type
x = vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), flat_length );
617 vector_type
y = vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), flat_length );
625 matrix.graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string(
"testing") , flat_graph );
627 const size_t flat_graph_length = matrix.graph.entries.dimension_0();
629 matrix.values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), flat_graph_length );
638 Kokkos::Impl::Timer clock ;
639 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
644 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
645 const double flops = 2.0*1e-9*flat_graph_length / seconds_per_iter;
647 std::vector<double> perf(4);
648 perf[0] = flat_length ;
649 perf[1] = seconds_per_iter;
651 perf[3] = flat_graph_length ;
655 template<
typename ScalarType ,
class Device >
658 const std::vector<int> & var_degree ,
660 const int iterCount ,
661 const bool symmetric )
666 Device > block_vector_type ;
672 typedef typename matrix_type::graph_type graph_type ;
683 using Teuchos::Array;
686 const size_t num_KL = var_degree.size();
687 Array< RCP<const abstract_basis_type> > bases(num_KL);
688 for (
size_t i=0; i<num_KL; i++) {
690 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,1.0,
true));
692 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,2.0,
true));
694 RCP<const product_basis_type> basis =
695 rcp(
new product_basis_type(
697 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
702 std::vector< std::vector<size_t> > graph ;
704 const size_t outer_length = nGrid * nGrid * nGrid ;
712 Teuchos::ParameterList params;
713 params.set(
"Tile Size", 128);
714 params.set(
"Max Tiles", 10000);
716 Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
718 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string(
"test crs graph") , graph );
720 const size_t inner_length = matrix.block.dimension();
721 const size_t inner_length_aligned = matrix.block.aligned_dimension();
724 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), inner_length_aligned , graph_length );
726 block_vector_type
x =
727 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length_aligned , outer_length );
728 block_vector_type
y =
729 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length_aligned , outer_length );
741 Kokkos::Impl::Timer clock ;
742 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
747 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
748 const double flops_per_block = matrix.block.tensor().num_flops();
749 const double flops = 1.0e-9*graph_length*flops_per_block;
754 std::vector<double> perf(6) ;
756 perf[0] = outer_length * inner_length ;
757 perf[1] = seconds_per_iter ;
758 perf[2] = flops / seconds_per_iter;
759 perf[3] = matrix.block.tensor().entry_count();
760 perf[4] = inner_length ;
761 perf[5] = flops_per_block;
766 template<
typename ScalarType ,
class Device >
769 const std::vector<int> & var_degree ,
771 const int iterCount ,
772 const bool symmetric )
777 Device > block_vector_type ;
783 typedef typename matrix_type::graph_type graph_type ;
794 using Teuchos::Array;
797 const size_t num_KL = var_degree.size();
798 Array< RCP<const abstract_basis_type> > bases(num_KL);
799 for (
size_t i=0; i<num_KL; i++) {
801 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,1.0,
true));
803 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,2.0,
true));
805 RCP<const product_basis_type> basis =
806 rcp(
new product_basis_type(
808 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
813 std::vector< std::vector<size_t> > graph ;
815 const size_t outer_length = nGrid * nGrid * nGrid ;
823 Teuchos::ParameterList params;
824 params.set(
"Tile Size", 128);
826 Stokhos::create_stochastic_product_tensor< TensorType >( *basis, *Cijk,
828 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string(
"test crs graph") , graph );
830 const size_t inner_length = matrix.block.dimension();
831 const size_t inner_length_aligned = matrix.block.aligned_dimension();
834 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), inner_length_aligned , graph_length );
836 block_vector_type
x =
837 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length_aligned , outer_length );
838 block_vector_type
y =
839 block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length_aligned , outer_length );
851 Kokkos::Impl::Timer clock ;
852 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
857 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
858 const double flops_per_block = matrix.block.tensor().num_flops();
859 const double flops = 1.0e-9*graph_length*flops_per_block;
864 std::vector<double> perf(6) ;
866 perf[0] = outer_length * inner_length ;
867 perf[1] = seconds_per_iter ;
868 perf[2] = flops / seconds_per_iter;
869 perf[3] = matrix.block.tensor().entry_count();
870 perf[4] = inner_length ;
871 perf[5] = flops_per_block;
876 template<
typename ScalarType ,
class Device >
879 const std::vector<int> & var_degree ,
881 const int iterCount ,
882 const bool symmetric )
887 Device > block_vector_type ;
893 typedef typename matrix_type::graph_type graph_type ;
904 using Teuchos::Array;
907 const size_t num_KL = var_degree.size();
908 Array< RCP<const abstract_basis_type> > bases(num_KL);
909 for (
size_t i=0; i<num_KL; i++) {
911 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,1.0,
true));
913 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,2.0,
true));
915 RCP<const product_basis_type> basis =
916 rcp(
new product_basis_type(
918 RCP<Cijk_type> Cijk =
924 std::vector< std::vector<size_t> > graph ;
926 const size_t outer_length = nGrid * nGrid * nGrid ;
935 Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
937 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string(
"test crs graph") , graph );
939 const size_t inner_length = matrix.block.dimension();
941 matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), inner_length , graph_length );
943 block_vector_type
x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length , outer_length );
944 block_vector_type
y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length , outer_length );
956 Kokkos::Impl::Timer clock ;
957 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
962 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
963 const double flops_per_block = matrix.block.tensor().num_flops();
964 const double flops = 1.0e-9*graph_length*flops_per_block;
969 std::vector<double> perf(6) ;
971 perf[0] = outer_length * inner_length ;
972 perf[1] = seconds_per_iter ;
973 perf[2] = flops / seconds_per_iter;
974 perf[3] = matrix.block.tensor().num_non_zeros();
975 perf[4] = inner_length ;
976 perf[5] = flops_per_block;
981 template<
typename ScalarType ,
class Device >
984 const std::vector<int> & var_degree ,
986 const int iterCount ,
987 const bool symmetric )
992 Device > block_vector_type ;
998 typedef typename matrix_type::graph_type graph_type ;
1009 using Teuchos::Array;
1012 const size_t num_KL = var_degree.size();
1013 Array< RCP<const abstract_basis_type> > bases(num_KL);
1014 for (
size_t i=0; i<num_KL; i++) {
1016 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,1.0,
true));
1018 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,2.0,
true));
1020 RCP<const product_basis_type> basis =
1021 rcp(
new product_basis_type(
1023 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1028 std::vector< std::vector<size_t> > graph ;
1030 const size_t outer_length = nGrid * nGrid * nGrid ;
1036 matrix_type matrix ;
1038 Teuchos::ParameterList params;
1039 params.set(
"Symmetric", symmetric);
1041 Stokhos::create_stochastic_product_tensor< TensorType >( *basis,
1044 matrix.graph = Kokkos::create_staticcrsgraph<graph_type>( std::string(
"test crs graph") , graph );
1046 const size_t inner_length = matrix.block.tensor().dimension();
1047 const size_t inner_length_aligned = matrix.block.tensor().aligned_dimension();
1049 matrix.values = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), inner_length_aligned , graph_length );
1051 block_vector_type
x = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length_aligned , outer_length );
1052 block_vector_type
y = block_vector_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length_aligned , outer_length );
1064 Kokkos::Impl::Timer clock ;
1065 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
1070 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
1071 const double flops_per_block = matrix.block.tensor().num_flops();
1072 const double flops = 1.0e-9*graph_length*flops_per_block;
1077 std::vector<double> perf(6) ;
1079 perf[0] = outer_length * inner_length ;
1080 perf[1] = seconds_per_iter ;
1081 perf[2] = flops / seconds_per_iter;
1082 perf[3] = matrix.block.tensor().num_non_zeros();
1083 perf[4] = inner_length ;
1084 perf[5] = flops_per_block;
1089 template<
typename ScalarType ,
class Device ,
class SparseMatOps >
1092 const std::vector<int> & var_degree ,
1094 const int iterCount ,
1095 const bool test_block ,
1096 const bool symmetric )
1107 using Teuchos::Array;
1110 const size_t num_KL = var_degree.size();
1111 Array< RCP<const abstract_basis_type> > bases(num_KL);
1112 for (
size_t i=0; i<num_KL; i++) {
1114 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,1.0,
true));
1116 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,2.0,
true));
1118 RCP<const product_basis_type> basis =
1119 rcp(
new product_basis_type(
1121 const size_t outer_length = basis->size();
1122 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1127 typedef typename matrix_type::values_type matrix_values_type;
1128 typedef typename matrix_type::graph_type matrix_graph_type;
1133 std::vector< std::vector<size_t> > fem_graph ;
1135 const size_t inner_length = nGrid * nGrid * nGrid ;
1136 const size_t graph_length =
1141 typedef Kokkos::View<value_type*,Device>
vec_type ;
1143 std::vector<matrix_type> matrix( outer_length ) ;
1144 std::vector<vec_type>
x( outer_length ) ;
1145 std::vector<vec_type>
y( outer_length ) ;
1146 std::vector<vec_type> tmp( outer_length ) ;
1148 for (
size_t block=0; block<outer_length; ++block) {
1149 matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>( std::string(
"testing") , fem_graph );
1151 matrix[block].values = matrix_values_type( Kokkos::ViewAllocateWithoutInitializing(
"matrix"), graph_length );
1153 x[block] =
vec_type( Kokkos::ViewAllocateWithoutInitializing(
"x"), inner_length );
1154 y[block] =
vec_type( Kokkos::ViewAllocateWithoutInitializing(
"y"), inner_length );
1155 tmp[block] =
vec_type( Kokkos::ViewAllocateWithoutInitializing(
"tmp"), inner_length );
1165 Kokkos::Impl::Timer clock ;
1168 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
1173 typename Cijk_type::k_iterator k_begin = Cijk->k_begin();
1174 typename Cijk_type::k_iterator k_end = Cijk->k_end();
1175 for (
typename Cijk_type::k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1176 int nj = Cijk->num_j(k_it);
1178 int k = index(k_it);
1179 typename Cijk_type::kj_iterator j_begin = Cijk->j_begin(k_it);
1180 typename Cijk_type::kj_iterator j_end = Cijk->j_end(k_it);
1181 std::vector<vec_type> xx(nj), yy(nj);
1183 for (
typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1185 int j = index(j_it);
1193 for (
typename Cijk_type::kj_iterator j_it = j_begin; j_it != j_end;
1195 typename Cijk_type::kji_iterator i_begin = Cijk->i_begin(j_it);
1196 typename Cijk_type::kji_iterator i_end = Cijk->i_end(j_it);
1197 for (
typename Cijk_type::kji_iterator i_it = i_begin; i_it != i_end;
1199 int i = index(i_it);
1212 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
1213 const double flops = 1.0e-9*(2.0*
static_cast<double>(n_apply)*graph_length+
1214 static_cast<double>(n_add)*inner_length);
1219 std::vector<double> perf(4);
1220 perf[0] = outer_length * inner_length;
1221 perf[1] = seconds_per_iter ;
1222 perf[2] = flops/seconds_per_iter;
1228 template<
typename ScalarType ,
class Device ,
class SparseMatOps >
1231 const std::vector<int> & var_degree ,
1233 const int iterCount ,
1234 const bool test_block ,
1235 const bool symmetric )
1246 using Teuchos::Array;
1249 const size_t num_KL = var_degree.size();
1250 Array< RCP<const abstract_basis_type> > bases(num_KL);
1251 for (
size_t i=0; i<num_KL; i++) {
1253 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,1.0,
true));
1255 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,2.0,
true));
1257 RCP<const product_basis_type> basis =
1258 rcp(
new product_basis_type(
1260 const size_t outer_length = basis->size();
1261 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1266 typedef typename matrix_type::values_type matrix_values_type;
1267 typedef typename matrix_type::graph_type matrix_graph_type;
1272 std::vector< std::vector<size_t> > fem_graph ;
1274 const size_t inner_length = nGrid * nGrid * nGrid ;
1275 const size_t graph_length =
1280 typedef Kokkos::View<value_type*, Kokkos::LayoutLeft, Device, Kokkos::MemoryUnmanaged>
vec_type ;
1281 typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type ;
1283 std::vector<matrix_type> matrix( outer_length ) ;
1284 multi_vec_type
x( Kokkos::ViewAllocateWithoutInitializing(
"x"),
1285 inner_length, outer_length ) ;
1286 multi_vec_type
y(
"y", inner_length, outer_length ) ;
1287 multi_vec_type tmp_x(
"tmp_x", inner_length, outer_length ) ;
1288 multi_vec_type tmp_y(
"tmp_y", inner_length, outer_length ) ;
1292 for (
size_t block=0; block<outer_length; ++block) {
1293 matrix[block].graph = Kokkos::create_staticcrsgraph<matrix_graph_type>(
1294 std::string(
"testing") , fem_graph );
1296 matrix[block].values = matrix_values_type(
"matrix" , graph_length );
1303 Kokkos::Impl::Timer clock ;
1306 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
1309 typedef typename Cijk_type::k_iterator k_iterator;
1310 typedef typename Cijk_type::kj_iterator kj_iterator;
1311 typedef typename Cijk_type::kji_iterator kji_iterator;
1314 k_iterator k_begin = Cijk->k_begin();
1315 k_iterator k_end = Cijk->k_end();
1316 for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1317 unsigned nj = Cijk->num_j(k_it);
1319 int k = index(k_it);
1320 kj_iterator j_begin = Cijk->j_begin(k_it);
1321 kj_iterator j_end = Cijk->j_end(k_it);
1322 std::vector<int> j_indices(nj);
1324 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1325 int j = index(j_it);
1326 vec_type xx = Kokkos::subview(
x, Kokkos::ALL(),
j );
1327 vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1330 multi_vec_type tmp_x_view =
1331 Kokkos::subview( tmp_x, Kokkos::ALL(),
1332 std::make_pair(0u,nj));
1333 multi_vec_type tmp_y_view =
1334 Kokkos::subview( tmp_y, Kokkos::ALL(),
1335 std::make_pair(0u,nj));
1339 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1341 Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1342 kji_iterator i_begin = Cijk->i_begin(j_it);
1343 kji_iterator i_end = Cijk->i_end(j_it);
1344 for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1345 int i = index(i_it);
1347 vec_type y_view = Kokkos::subview(
y, Kokkos::ALL(), i );
1358 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
1359 const double flops = 1.0e-9*(2.0*
static_cast<double>(n_apply)*graph_length+
1360 static_cast<double>(n_add)*inner_length);
1365 std::vector<double> perf(4);
1366 perf[0] = outer_length * inner_length;
1367 perf[1] = seconds_per_iter ;
1368 perf[2] = flops/seconds_per_iter;
1374 #ifdef HAVE_STOKHOS_KOKKOSLINALG 1375 template<
typename ScalarType ,
class Device >
1377 test_original_matrix_free_kokkos(
1378 const std::vector<int> & var_degree ,
1380 const int iterCount ,
1381 const bool test_block ,
1382 const bool symmetric )
1393 using Teuchos::Array;
1396 const size_t num_KL = var_degree.size();
1397 Array< RCP<const abstract_basis_type> > bases(num_KL);
1398 for (
size_t i=0; i<num_KL; i++) {
1400 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,1.0,
true));
1402 bases[i] = Teuchos::rcp(
new basis_type(var_degree[i],1.0,2.0,
true));
1404 RCP<const product_basis_type> basis =
1405 rcp(
new product_basis_type(
1406 bases, ScalarTolerances<value_type>::sparse_cijk_tol()));
1407 const size_t outer_length = basis->size();
1408 RCP<Cijk_type> Cijk = basis->computeTripleProductTensor();
1413 typedef KokkosSparse::CrsMatrix<value_type,ordinal_type,Device> matrix_type;
1414 typedef typename matrix_type::values_type matrix_values_type;
1415 typedef typename matrix_type::StaticCrsGraphType matrix_graph_type;
1420 std::vector< std::vector<size_t> > fem_graph ;
1422 const size_t inner_length = nGrid * nGrid * nGrid ;
1423 const size_t graph_length =
1428 typedef Kokkos::View<value_type*,Kokkos::LayoutLeft,Device, Kokkos::MemoryUnmanaged>
vec_type ;
1429 typedef Kokkos::View<value_type**, Kokkos::LayoutLeft, Device> multi_vec_type;
1431 std::vector<matrix_type> matrix( outer_length ) ;
1432 multi_vec_type
x( Kokkos::ViewAllocateWithoutInitializing(
"x"),
1433 inner_length, outer_length ) ;
1434 multi_vec_type
y(
"y", inner_length, outer_length ) ;
1435 multi_vec_type tmp_x(
"tmp_x", inner_length, outer_length ) ;
1436 multi_vec_type tmp_y(
"tmp_y", inner_length, outer_length ) ;
1440 for (
size_t block=0; block<outer_length; ++block) {
1441 matrix_graph_type matrix_graph =
1442 Kokkos::create_staticcrsgraph<matrix_graph_type>(
1443 std::string(
"test crs graph") , fem_graph );
1445 matrix_values_type matrix_values = matrix_values_type(
1446 Kokkos::ViewAllocateWithoutInitializing(
"matrix"), graph_length );
1448 matrix[block] = matrix_type(
"matrix", outer_length, matrix_values,
1453 Kokkos::Impl::Timer clock ;
1456 for (
int iter = 0 ; iter < iterCount ; ++iter ) {
1459 typedef typename Cijk_type::k_iterator k_iterator;
1460 typedef typename Cijk_type::kj_iterator kj_iterator;
1461 typedef typename Cijk_type::kji_iterator kji_iterator;
1464 k_iterator k_begin = Cijk->k_begin();
1465 k_iterator k_end = Cijk->k_end();
1466 for (k_iterator k_it=k_begin; k_it!=k_end; ++k_it) {
1467 unsigned nj = Cijk->num_j(k_it);
1469 int k = index(k_it);
1470 kj_iterator j_begin = Cijk->j_begin(k_it);
1471 kj_iterator j_end = Cijk->j_end(k_it);
1473 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1474 int j = index(j_it);
1475 vec_type xx = Kokkos::subview(
x, Kokkos::ALL(),
j );
1476 vec_type tt = Kokkos::subview( tmp_x, Kokkos::ALL(), jdx++ );
1479 multi_vec_type tmp_x_view =
1480 Kokkos::subview( tmp_x, Kokkos::ALL(),
1481 std::make_pair(0u,nj));
1482 multi_vec_type tmp_y_view =
1483 Kokkos::subview( tmp_y, Kokkos::ALL(),
1484 std::make_pair(0u,nj));
1488 for (kj_iterator j_it = j_begin; j_it != j_end; ++j_it) {
1490 Kokkos::subview( tmp_y, Kokkos::ALL(), jdx++ );
1491 kji_iterator i_begin = Cijk->i_begin(j_it);
1492 kji_iterator i_end = Cijk->i_end(j_it);
1493 for (kji_iterator i_it = i_begin; i_it != i_end; ++i_it) {
1494 int i = index(i_it);
1496 vec_type y_view = Kokkos::subview(
y, Kokkos::ALL(), i );
1508 const double seconds_per_iter = clock.seconds() / ((
double) iterCount );
1509 const double flops = 1.0e-9*(2.0*
static_cast<double>(n_apply)*graph_length+
1510 static_cast<double>(n_add)*inner_length);
1515 std::vector<double> perf(4);
1516 perf[0] = outer_length * inner_length;
1517 perf[1] = seconds_per_iter ;
1518 perf[2] = flops/seconds_per_iter;
1525 template<
class Scalar,
class Device >
1531 const bool test_block ,
1532 const bool symmetric )
1537 std::vector< std::vector<size_t> > fem_graph ;
1539 const size_t fem_nonzeros =
1544 std::cout.precision(8);
1548 std::cout << std::endl <<
"\"FEM NNZ = " << fem_nonzeros <<
"\"" << std::endl;
1550 std::cout << std::endl
1552 <<
"\"#Variable\" , " 1553 <<
"\"PolyDegree\" , " 1555 <<
"\"#TensorEntry\" , " 1556 <<
"\"VectorSize\" , " 1557 <<
"\"Original-Flat MXV-Time\" , " 1558 <<
"\"Original-Flat MXV-Speedup\" , " 1559 <<
"\"Original-Flat MXV-GFLOPS\" , " 1560 <<
"\"Commuted-Flat MXV-Speedup\" , " 1561 <<
"\"Commuted-Flat MXV-GFLOPS\" , " 1562 <<
"\"Block-Diagonal MXV-Speedup\" , " 1563 <<
"\"Block-Diagonal MXV-GFLOPS\" , " 1564 <<
"\"Block-Crs-Tensor MXV-Speedup\" , " 1565 <<
"\"Block-Crs-Tensor MXV-GFLOPS\" , " 1568 for (
int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1570 std::vector<int> var_degree( nvar , pdeg );
1574 const std::vector<double> perf_flat_original =
1575 test_product_flat_original_matrix<Scalar,Device>(
1576 var_degree , nGrid , nIter , symmetric );
1578 const std::vector<double> perf_flat_commuted =
1579 test_product_flat_commuted_matrix<Scalar,Device>(
1580 var_degree , nGrid , nIter , symmetric );
1582 const std::vector<double> perf_matrix =
1583 test_product_tensor_diagonal_matrix<Scalar,Device>(
1584 var_degree , nGrid , nIter , symmetric );
1586 const std::vector<double> perf_crs_tensor =
1587 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1588 var_degree , nGrid , nIter , symmetric );
1590 if ( perf_flat_commuted[0] != perf_flat_original[0] ||
1591 perf_flat_commuted[3] != perf_flat_original[3] ) {
1592 std::cout <<
"ERROR: Original and commuted matrix sizes do not match" 1594 <<
" original size = " << perf_flat_original[0]
1595 <<
" , nonzero = " << perf_flat_original[3]
1597 <<
" commuted size = " << perf_flat_commuted[0]
1598 <<
" , nonzero = " << perf_flat_commuted[3]
1602 std::cout << nGrid <<
" , " 1605 << perf_crs_tensor[4] <<
" , " 1606 << perf_crs_tensor[3] <<
" , " 1607 << perf_flat_original[0] <<
" , " 1608 << perf_flat_original[1] <<
" , " 1609 << perf_flat_original[1] / perf_flat_original[1] <<
" , " 1610 << perf_flat_original[2] <<
" , " 1611 << perf_flat_original[1] / perf_flat_commuted[1] <<
" , " 1612 << perf_flat_commuted[2] <<
" , " 1613 << perf_flat_original[1] / perf_matrix[1] <<
" , " 1614 << perf_matrix[2] <<
" , " 1615 << perf_flat_original[1] / perf_crs_tensor[1] <<
" , " 1616 << perf_crs_tensor[2] <<
" , " 1623 template<
class Scalar,
class Device ,
class SparseMatOps >
1629 const bool test_block ,
1630 const bool symmetric )
1632 std::cout.precision(8);
1636 std::vector< std::vector<size_t> > fem_graph ;
1637 const size_t graph_length =
1639 std::cout << std::endl <<
"\"FEM NNZ = " << graph_length <<
"\"" << std::endl;
1641 std::cout << std::endl
1643 <<
"\"#Variable\" , " 1644 <<
"\"PolyDegree\" , " 1646 <<
"\"#TensorEntry\" , " 1647 <<
"\"VectorSize\" , " 1648 <<
"\"Original-Matrix-Free-Block-MXV-Time\" , " 1649 <<
"\"Original-Matrix-Free-Block-MXV-Speedup\" , " 1650 <<
"\"Original-Matrix-Free-Block-MXV-GFLOPS\" , " 1651 <<
"\"Block-Crs-Tensor MXV-Speedup\" , " 1652 <<
"\"Block-Crs-Tensor MXV-GFLOPS\" , " 1659 for (
int nvar = minvar ; nvar <= maxvar ; ++nvar ) {
1660 std::vector<int> var_degree( nvar , pdeg );
1662 const std::vector<double> perf_crs_tensor =
1663 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1664 var_degree , nGrid , nIter , symmetric );
1675 std::vector<double> perf_original_mat_free_block;
1676 #if defined(HAVE_STOKHOS_KOKKOSLINALG) 1677 #if defined( KOKKOS_HAVE_CUDA ) 1678 enum { is_cuda = Kokkos::Impl::is_same<Device,Kokkos::Cuda>::value };
1680 enum { is_cuda =
false };
1683 perf_original_mat_free_block =
1684 test_original_matrix_free_kokkos<Scalar,Device>(
1685 var_degree , nGrid , nIter , test_block , symmetric );
1687 perf_original_mat_free_block =
1688 test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1689 var_degree , nGrid , nIter , test_block , symmetric );
1691 perf_original_mat_free_block =
1692 test_original_matrix_free_view<Scalar,Device,SparseMatOps>(
1693 var_degree , nGrid , nIter , test_block , symmetric );
1696 std::cout << nGrid <<
" , " 1699 << perf_crs_tensor[4] <<
" , " 1700 << perf_crs_tensor[3] <<
" , " 1701 << perf_original_mat_free_block[0] <<
" , " 1702 << perf_original_mat_free_block[1] <<
" , " 1703 << perf_original_mat_free_block[1] /
1704 perf_original_mat_free_block[1] <<
" , " 1705 << perf_original_mat_free_block[2] <<
" , " 1706 << perf_original_mat_free_block[1] / perf_crs_tensor[1] <<
" , " 1707 << perf_crs_tensor[2] <<
" , " 1718 template<
class Scalar,
class Device ,
class SparseMatOps >
1724 const bool test_block ,
1725 const bool symmetric )
1727 bool do_flat_sparse =
1728 Kokkos::Impl::is_same<typename Device::memory_space,Kokkos::HostSpace>::value ;
1730 std::cout.precision(8);
1734 std::vector< std::vector<size_t> > fem_graph ;
1735 const size_t graph_length =
1737 std::cout << std::endl <<
"\"FEM NNZ = " << graph_length <<
"\"" << std::endl;
1739 std::cout << std::endl
1741 <<
"\"#Variable\" , " 1742 <<
"\"PolyDegree\" , " 1744 <<
"\"#TensorEntry\" , " 1745 <<
"\"VectorSize\" , " 1746 <<
"\"Original-Matrix-Free-Block-MXV-Time\" , " 1747 <<
"\"Original-Matrix-Free-Block-MXV-Speedup\" , " 1748 <<
"\"Original-Matrix-Free-Block-MXV-GFLOPS\" , " 1749 <<
"\"Block-Crs-Tensor MXV-Speedup\" , " 1750 <<
"\"Block-Crs-Tensor MXV-GFLOPS\" , ";
1752 std::cout <<
"\"Block-Lexicographic-Sparse-3-Tensor MXV-Speedup\" , " 1753 <<
"\"Block-Lexicographic-Sparse-3-Tensor MXV-GFLOPS\" , " 1754 <<
"\"Lexicographic FLOPS / Crs FLOPS\" , ";
1755 std::cout << std::endl ;
1757 for (
int p = minp ; p <= maxp ; ++p ) {
1758 std::vector<int> var_degree( nvar , p );
1760 const std::vector<double> perf_crs_tensor =
1761 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1762 var_degree , nGrid , nIter , symmetric );
1764 std::vector<double> perf_lexo_sparse_3_tensor;
1765 if (do_flat_sparse) {
1766 perf_lexo_sparse_3_tensor =
1767 test_lexo_block_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1770 const std::vector<double> perf_original_mat_free_block =
1771 test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1772 var_degree , nGrid , nIter , test_block , symmetric );
1774 std::cout << nGrid <<
" , " 1777 << perf_crs_tensor[4] <<
" , " 1778 << perf_crs_tensor[3] <<
" , " 1779 << perf_original_mat_free_block[0] <<
" , " 1780 << perf_original_mat_free_block[1] <<
" , " 1781 << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] <<
" , " 1782 << perf_original_mat_free_block[2] <<
" , " 1783 << perf_original_mat_free_block[1] / perf_crs_tensor[1] <<
" , " 1784 << perf_crs_tensor[2] <<
" , ";
1785 if (do_flat_sparse) {
1786 std::cout << perf_original_mat_free_block[1] / perf_lexo_sparse_3_tensor[1] <<
" , " 1787 << perf_lexo_sparse_3_tensor[2] <<
" , " 1788 << perf_lexo_sparse_3_tensor[5] / perf_crs_tensor[5];
1792 std::cout << std::endl ;
1798 template<
class Scalar,
class Device ,
class SparseMatOps >
1804 const bool test_block ,
1805 const bool symmetric )
1807 std::cout.precision(8);
1811 std::vector< std::vector<size_t> > fem_graph ;
1812 const size_t graph_length =
1814 std::cout << std::endl <<
"\"FEM NNZ = " << graph_length <<
"\"" << std::endl;
1816 std::cout << std::endl
1818 <<
"\"#Variable\" , " 1819 <<
"\"PolyDegree\" , " 1821 <<
"\"#TensorEntry\" , " 1822 <<
"\"VectorSize\" , " 1823 <<
"\"Original-Matrix-Free-Block-MXV-Time\" , " 1824 <<
"\"Original-Matrix-Free-Block-MXV-Speedup\" , " 1825 <<
"\"Original-Matrix-Free-Block-MXV-GFLOPS\" , " 1826 <<
"\"Block-Crs-Tensor MXV-Speedup\" , " 1827 <<
"\"Block-Crs-Tensor MXV-GFLOPS\" , " 1828 <<
"\"Linear-Sparse-3-Tensor MXV-Speedup\" , " 1829 <<
"\"Linear-Sparse-3-Tensor MXV-GFLOPS\" , " 1832 for (
int nvar = minvar ; nvar <= maxvar ; nvar+=varinc ) {
1833 std::vector<int> var_degree( nvar , 1 );
1835 const std::vector<double> perf_crs_tensor =
1836 test_product_tensor_matrix<Scalar,Stokhos::CrsProductTensor<Scalar,Device>,Device>(
1837 var_degree , nGrid , nIter , symmetric );
1839 const std::vector<double> perf_linear_sparse_3_tensor =
1840 test_linear_tensor<Scalar,Device>( var_degree , nGrid , nIter , symmetric );
1842 const std::vector<double> perf_original_mat_free_block =
1843 test_original_matrix_free_vec<Scalar,Device,SparseMatOps>(
1844 var_degree , nGrid , nIter , test_block , symmetric );
1846 std::cout << nGrid <<
" , " 1849 << perf_crs_tensor[4] <<
" , " 1850 << perf_crs_tensor[3] <<
" , " 1851 << perf_original_mat_free_block[0] <<
" , " 1852 << perf_original_mat_free_block[1] <<
" , " 1853 << perf_original_mat_free_block[1] / perf_original_mat_free_block[1] <<
" , " 1854 << perf_original_mat_free_block[2] <<
" , " 1855 << perf_original_mat_free_block[1] / perf_crs_tensor[1] <<
" , " 1856 << perf_crs_tensor[2] <<
" , " 1857 << perf_original_mat_free_block[1] / perf_linear_sparse_3_tensor[1] <<
" , " 1858 << perf_linear_sparse_3_tensor[2] <<
" , " 1865 template<
class Scalar,
class Device >
std::vector< double > test_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void performance_test_driver_linear(const int minvar, const int maxvar, const int varinc, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Bases defined by combinatorial product of polynomial bases.
std::vector< double > test_product_tensor_diagonal_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Multivariate orthogonal polynomial basis generated from a total order tensor product of univariate po...
Data structure storing a sparse 3-tensor C(i,j,k) in a a compressed format.
size_t generate_fem_graph(size_t N, std::vector< std::vector< size_t > > &graph)
void performance_test_driver_poly_deg(const int nvar, const int minp, const int maxp, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Symmetric diagonal storage for a dense matrix.
static scalar_type sparse_cijk_tol()
std::vector< double > test_product_flat_original_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Teuchos::RCP< LTBSparse3Tensor< ordinal_type, value_type > > computeTripleProductTensorLTBBlockLeaf(const TotalOrderBasis< ordinal_type, value_type, LexographicLess< MultiIndex< ordinal_type > > > &product_basis, bool symmetric=false)
std::vector< double > test_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Stokhos::LegendreBasis< int, double > basis_type
Data structure storing a sparse 3-tensor C(i,j,k) in a a tree-based format for lexicographically orde...
void multiply(const CrsMatrix< MatrixValue, Device, Layout > &A, const InputMultiVectorType &x, OutputMultiVectorType &y, const std::vector< OrdinalType > &col_indices, SingleColumnMultivectorMultiply)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType * x
std::vector< double > test_lexo_block_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
void performance_test_driver_all(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
static scalar_type sparse_cijk_tol()
IntType map_fem_graph_coord(const IntType &N, const IntType &i, const IntType &j, const IntType &k)
std::vector< double > test_simple_tiled_product_tensor_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
void deep_copy(const Stokhos::CrsMatrix< ValueType, DstDevice, Layout > &dst, const Stokhos::CrsMatrix< ValueType, SrcDevice, Layout > &src)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< XD, XP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< YD, YP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< ZD, ZP... > >::value >::type update(const typename Kokkos::View< XD, XP... >::array_type::non_const_value_type &alpha, const Kokkos::View< XD, XP... > &x, const typename Kokkos::View< YD, YP... >::array_type::non_const_value_type &beta, const Kokkos::View< YD, YP... > &y, const typename Kokkos::View< ZD, ZP... >::array_type::non_const_value_type &gamma, const Kokkos::View< ZD, ZP... > &z)
Stokhos::Sparse3Tensor< int, double > Cijk_type
void performance_test_driver(const Teuchos::RCP< const Teuchos::Comm< int > > &comm, const int use_print, const int use_trials, const int use_nodes[], const bool check, Kokkos::Example::FENL::DeviceConfig dev_config)
Abstract base class for 1-D orthogonal polynomials.
Teuchos::RCP< Epetra_CrsGraph > sparse3Tensor2CrsGraph(const Stokhos::OrthogPolyBasis< ordinal_type, value_type > &basis, const Stokhos::Sparse3Tensor< ordinal_type, value_type > &Cijk, const Epetra_Comm &comm)
Build an Epetra_CrsGraph from a sparse 3 tensor.
CRS matrix of dense blocks.
A comparison functor implementing a strict weak ordering based lexographic ordering.
Sacado::MP::Vector< storage_type > vec_type
std::vector< double > test_linear_tensor(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
std::vector< double > test_original_matrix_free_vec(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)
void performance_test_driver_poly(const int pdeg, const int minvar, const int maxvar, const int nGrid, const int nIter, const bool test_block, const bool symmetric)
std::vector< double > test_product_flat_commuted_matrix(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool symmetric)
const IndexType const IndexType const IndexType const IndexType const ValueType const ValueType ValueType * y
void update(const ValueType &alpha, VectorType &x, const ValueType &beta, const VectorType &y)
std::enable_if< Kokkos::is_view_uq_pce< Kokkos::View< InputType, InputP... > >::value &&Kokkos::is_view_uq_pce< Kokkos::View< OutputType, OutputP... > >::value >::type spmv(const char mode[], const AlphaType &a, const MatrixType &A, const Kokkos::View< InputType, InputP... > &x, const BetaType &b, const Kokkos::View< OutputType, OutputP... > &y, const RANK_ONE)
std::vector< double > test_original_matrix_free_view(const std::vector< int > &var_degree, const int nGrid, const int iterCount, const bool test_block, const bool symmetric)