43 #include "Teuchos_UnitTestHarness.hpp" 44 #include "Teuchos_UnitTestRepository.hpp" 45 #include "Teuchos_GlobalMPISession.hpp" 49 #include "Kokkos_Core.hpp" 52 #ifdef KOKKOS_HAVE_PTHREAD 57 #if defined(KOKKOS_HAVE_OPENMP) && defined(HAVE_STOKHOS_MKL) 62 #include "Stokhos_KokkosArrayKernelsUnitTestNew.hpp" 69 #include "Stokhos_KokkosArrayKernelsUnitTestNewDecl.hpp" 77 typedef int size_type;
85 const int iEntryEnd = tensor.entry_end(i);
86 for (
int iEntry = iEntryBeg ; iEntry < iEntryEnd ; ++iEntry ) {
87 const int kj = tensor.coord( iEntry );
88 const int j = kj & 0x0ffff;
89 const int k = kj >> 16;
93 if (
j == k) c2 *= 2.0;
101 out <<
"(" << ii <<
"," << jj <<
"," << kk <<
"): " << c
102 <<
" == " << c2 <<
" failed!" << std::endl;
115 Teuchos::ParameterList params;
116 params.set(
"Tile Size",10);
117 params.set(
"Max Tiles",10000);
127 const size_t n_tile = tensor.
num_tiles();
128 for (
size_t tile = 0 ; tile < n_tile ; ++tile ) {
129 const size_t i_offset = tensor.offset(tile, 0);
130 const size_t j_offset = tensor.offset(tile, 1);
131 const size_t k_offset = tensor.offset(tile, 2);
132 const size_t n_row = tensor.num_rows(tile);
134 for (
size_t i=0; i<n_row; ++i) {
135 const size_t iEntryBeg = tensor.entry_begin(tile,i);
136 const size_t iEntryEnd = tensor.entry_end(tile,i);
137 for (
size_t iEntry = iEntryBeg ; iEntry < iEntryEnd ; ++iEntry ) {
138 const size_t j = tensor.coord(iEntry,0);
139 const size_t k = tensor.coord(iEntry,1);
141 int ii = i + i_offset;
142 int jj =
j + j_offset;
143 int kk = k + k_offset;
149 out <<
"(" << ii <<
"," << jj <<
"," << kk <<
"): " << c
150 <<
" == " << c2 <<
" failed!" << std::endl;
164 Teuchos::ParameterList params;
165 params.set(
"Tile Size",10);
168 Stokhos::create_simple_tiled_product_tensor<Device>(
173 for (
size_t i_tile = 0; i_tile<n_i_tile; ++i_tile) {
174 const size_t i_begin = tensor.i_begin(i_tile);
175 const size_t i_size = tensor.i_size(i_tile);
177 const size_t n_j_tile = tensor.num_j_tiles(i_tile);
178 for (
size_t j_tile = 0; j_tile<n_j_tile; ++j_tile) {
179 const size_t j_begin = tensor.j_begin(i_tile, j_tile);
182 const size_t n_k_tile = tensor.num_k_tiles(i_tile, j_tile);
183 for (
size_t k_tile = 0; k_tile<n_k_tile; ++k_tile) {
184 const size_t k_begin = tensor.k_begin(i_tile, j_tile, k_tile);
187 for (
size_t i=0; i<i_size; ++i) {
188 const size_t iEntryBeg = tensor.entry_begin(i_tile,j_tile,k_tile,i);
189 const size_t iEntryEnd = tensor.entry_end(i_tile,j_tile,k_tile,i);
190 for (
size_t iEntry = iEntryBeg ; iEntry < iEntryEnd ; ++iEntry ) {
191 const size_t j = tensor.coord(iEntry,0);
192 const size_t k = tensor.coord(iEntry,1);
194 int ii = i + i_begin;
195 int jj =
j + j_begin;
196 int kk = k + k_begin;
205 out <<
"(" << ii <<
"," << jj <<
"," << kk <<
"): " << c
206 <<
" == " << c2 <<
" failed!" << std::endl;
214 TEUCHOS_TEST_EQUALITY( num_entry,
setup.
Cijk->num_entries(), out, success );
217 template <
typename Scalar,
typename Device,
bool Pack>
219 Teuchos::FancyOStream& out) {
226 Stokhos::create_coo_product_tensor<Device, Pack>(
229 const size_t nEntry = tensor.entry_count();
231 for (
size_t entry = 0 ; entry < nEntry ; ++entry ) {
232 tensor.coord(entry, i,
j, k);
234 if (
j == k) c2 *= 2.0;
238 out <<
"(" << i <<
"," <<
j <<
"," << k <<
"): " << c
239 <<
" == " << c2 <<
" failed!" << std::endl;
248 success = test_coo_product_tensor_cijk<Scalar,Device,true>(
setup, out);
252 success = test_coo_product_tensor_cijk<Scalar,Device,false>(
setup, out);
260 typedef size_t size_type;
266 const size_type nk = tensor.
num_k(i);
267 const size_type kBeg = tensor.k_begin(i);
268 const size_type kEnd = kBeg + nk;
269 for (size_type kEntry = kBeg; kEntry < kEnd; ++kEntry) {
270 const size_type k = tensor.k_coord(kEntry);
271 const size_type nj = tensor.num_j(kEntry);
272 const size_type jBeg = tensor.j_begin(kEntry);
273 const size_type jEnd = jBeg + nj;
274 for (size_type jEntry = jBeg; jEntry < jEnd; ++jEntry) {
275 const size_type
j = tensor.j_coord(jEntry);
277 if (
j == k) c2 *= 2.0;
280 out <<
"(" << i <<
"," <<
j <<
"," << k <<
"): " << c
281 <<
" == " << c2 <<
" failed!" << std::endl;
294 typedef size_t size_type;
298 const size_type nk = tensor.
num_k();
300 for ( size_type k = 0; k < nk; ++k) {
301 const size_type nj = tensor.num_j(k);
302 const size_type jBeg = tensor.j_begin(k);
303 const size_type jEnd = jBeg + nj;
304 for (size_type jEntry = jBeg; jEntry < jEnd; ++jEntry) {
305 const size_type
j = tensor.j_coord(jEntry);
306 const size_type ni = tensor.num_i(jEntry);
307 const size_type iBeg = tensor.i_begin(jEntry);
308 const size_type iEnd = iBeg + ni;
309 for (size_type iEntry = iBeg; iEntry < iEnd; ++iEntry) {
310 const size_type i = tensor.i_coord(iEntry);
312 if (
j == k) c2 *= 2.0;
315 out <<
"(" << i <<
"," <<
j <<
"," << k <<
"): " << c
316 <<
" == " << c2 <<
" failed!" << std::endl;
324 #define UNIT_TEST_GROUP_SCALAR_HOST_DEVICE( SCALAR, DEVICE ) \ 325 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( Kokkos_SG_SpMv, CrsProductTensorCijk, SCALAR, DEVICE ) \ 326 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( Kokkos_SG_SpMv, TiledCrsProductTensorCijk, SCALAR, DEVICE ) \ 327 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( Kokkos_SG_SpMv, SimpleTiledCrsProductTensorCijk, SCALAR, DEVICE ) \ 328 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( Kokkos_SG_SpMv, CooProductTensorCijk_Packed, SCALAR, DEVICE ) \ 329 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( Kokkos_SG_SpMv, CooProductTensorCijk_Unpacked, SCALAR, DEVICE ) \ 330 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( Kokkos_SG_SpMv, FlatSparseCijk, SCALAR, DEVICE ) \ 331 TEUCHOS_UNIT_TEST_TEMPLATE_2_INSTANT( Kokkos_SG_SpMv, FlatSparseCijk_kji, SCALAR, DEVICE ) 333 #ifdef KOKKOS_HAVE_PTHREAD 334 using Kokkos::Threads;
339 #ifdef KOKKOS_HAVE_OPENMP 340 using Kokkos::OpenMP;
344 #ifdef HAVE_STOKHOS_MKL 347 typedef Kokkos::OpenMP Device;
348 typedef Stokhos::MKLMultiply SparseMatOps;
349 success = test_crs_matrix_free<Scalar,Device,SparseMatOps>(
356 using Kokkos::Serial;
361 Teuchos::GlobalMPISession mpiSession(&argc, &
argv);
363 const size_t team_count =
364 Kokkos::hwloc::get_available_numa_count() *
365 Kokkos::hwloc::get_available_cores_per_numa();
366 const size_t threads_per_team =
367 Kokkos::hwloc::get_available_threads_per_core();
377 #ifdef KOKKOS_HAVE_OPENMP 379 Kokkos::OpenMP::initialize( team_count * threads_per_team );
383 #ifdef KOKKOS_HAVE_CUDA 385 Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
386 Kokkos::Cuda::print_configuration( std::cout );
393 int ret = Teuchos::UnitTestRepository::runUnitTestsFromMain(argc,
argv);
396 #ifdef KOKKOS_HAVE_PTHREAD 397 Kokkos::Threads::finalize();
399 #ifdef KOKKOS_HAVE_OPENMP 400 Kokkos::OpenMP::finalize();
402 #ifdef KOKKOS_HAVE_CUDA 403 Kokkos::Cuda::finalize();
KOKKOS_INLINE_FUNCTION size_type num_k(size_type i) const
Number of k entries with a coordinate 'i'.
KOKKOS_INLINE_FUNCTION size_type num_i_tiles() const
Number i-tiles.
KOKKOS_INLINE_FUNCTION size_type num_tiles() const
Number tiles.
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
void setup(int p_=5, int d_=2)
#define UNIT_TEST_GROUP_SCALAR_HOST_DEVICE(SCALAR, DEVICE)
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
int main(int argc, char *argv[])
KOKKOS_INLINE_FUNCTION PCE< Storage > abs(const PCE< Storage > &a)
RCP< product_basis_type > basis
Teuchos::Array< int > inv_perm
#define UNIT_TEST_GROUP_SCALAR_DEVICE(SCALAR, DEVICE)
bool test_coo_product_tensor_cijk(const UnitTestSetup &setup, Teuchos::FancyOStream &out)
KOKKOS_INLINE_FUNCTION size_type entry_begin(size_type i) const
Begin entries with a coordinate 'i'.
Sparse product tensor with replicated entries to provide subsets with a given coordinate.
Sparse product tensor using 'COO'-like storage format.
TEUCHOS_UNIT_TEST_TEMPLATE_2_DECL(Kokkos_SG_SpMv, CrsProductTensorCijk, Scalar, Device)
KOKKOS_INLINE_FUNCTION size_type num_k() const
Number of k entries.