Amesos2 - Direct Sparse Solver Interfaces  Version of the Day
Amesos2_Superludist_def.hpp
Go to the documentation of this file.
1 // @HEADER
2 //
3 // ***********************************************************************
4 //
5 // Amesos2: Templated Direct Sparse Solver Package
6 // Copyright 2011 Sandia Corporation
7 //
8 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
9 // the U.S. Government retains certain rights in this software.
10 //
11 // Redistribution and use in source and binary forms, with or without
12 // modification, are permitted provided that the following conditions are
13 // met:
14 //
15 // 1. Redistributions of source code must retain the above copyright
16 // notice, this list of conditions and the following disclaimer.
17 //
18 // 2. Redistributions in binary form must reproduce the above copyright
19 // notice, this list of conditions and the following disclaimer in the
20 // documentation and/or other materials provided with the distribution.
21 //
22 // 3. Neither the name of the Corporation nor the names of the
23 // contributors may be used to endorse or promote products derived from
24 // this software without specific prior written permission.
25 //
26 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
27 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
30 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
31 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
33 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
34 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
35 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
36 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37 //
38 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
39 //
40 // ***********************************************************************
41 //
42 // @HEADER
43 
52 #ifndef AMESOS2_SUPERLUDIST_DEF_HPP
53 #define AMESOS2_SUPERLUDIST_DEF_HPP
54 
55 #include <Teuchos_Tuple.hpp>
56 #include <Teuchos_StandardParameterEntryValidators.hpp>
57 #include <Teuchos_DefaultMpiComm.hpp>
58 
61 #include "Amesos2_Util.hpp"
62 
63 
64 namespace Amesos2 {
65 
66 
67  template <class Matrix, class Vector>
68  Superludist<Matrix,Vector>::Superludist(Teuchos::RCP<const Matrix> A,
69  Teuchos::RCP<Vector> X,
70  Teuchos::RCP<const Vector> B)
71  : SolverCore<Amesos2::Superludist,Matrix,Vector>(A, X, B)
72  , nzvals_() // initialization to empty arrays
73  , colind_()
74  , rowptr_()
75  , bvals_()
76  , xvals_()
77  , in_grid_(false)
78  {
79  using Teuchos::Comm;
80  // It's OK to depend on MpiComm explicitly here, because
81  // SuperLU_DIST requires MPI anyway.
82  using Teuchos::MpiComm;
83  using Teuchos::outArg;
84  using Teuchos::ParameterList;
85  using Teuchos::parameterList;
86  using Teuchos::RCP;
87  using Teuchos::rcp;
88  using Teuchos::rcp_dynamic_cast;
89  using Teuchos::REDUCE_SUM;
90  using Teuchos::reduceAll;
91  typedef global_ordinal_type GO;
92  typedef Tpetra::Map<local_ordinal_type, GO, node_type> map_type;
93 
95  // Set up the SuperLU_DIST processor grid //
97 
98  RCP<const Comm<int> > comm = this->getComm ();
99  const int myRank = comm->getRank ();
100  const int numProcs = comm->getSize ();
101 
102  SLUD::int_t nprow, npcol;
103  get_default_grid_size (numProcs, nprow, npcol);
104 
105  {
106  // FIXME (mfh 16 Dec 2014) getComm() just returns
107  // matrixA_->getComm(), so it's not clear why we need to ask for
108  // the matrix's communicator separately here.
109  RCP<const Comm<int> > matComm = this->matrixA_->getComm ();
110  TEUCHOS_TEST_FOR_EXCEPTION(
111  matComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
112  "constructor: The matrix's communicator is null!");
113  RCP<const MpiComm<int> > matMpiComm =
114  rcp_dynamic_cast<const MpiComm<int> > (matComm);
115  // FIXME (mfh 16 Dec 2014) If the matrix's communicator is a
116  // SerialComm, we probably could just use MPI_COMM_SELF here.
117  // I'm not sure if SuperLU_DIST is smart enough to handle that
118  // case, though.
119  TEUCHOS_TEST_FOR_EXCEPTION(
120  matMpiComm.is_null (), std::logic_error, "Amesos2::Superlustdist "
121  "constructor: The matrix's communicator is not an MpiComm!");
122  TEUCHOS_TEST_FOR_EXCEPTION(
123  matMpiComm->getRawMpiComm ().is_null (), std::logic_error, "Amesos2::"
124  "Superlustdist constructor: The matrix's communicator claims to be a "
125  "Teuchos::MpiComm<int>, but its getRawPtrComm() method returns "
126  "Teuchos::null! This means that the underlying MPI_Comm doesn't even "
127  "exist, which likely implies that the Teuchos::MpiComm was constructed "
128  "incorrectly. It means something different than if the MPI_Comm were "
129  "MPI_COMM_NULL.");
130  MPI_Comm rawMpiComm = (* (matMpiComm->getRawMpiComm ())) ();
131  data_.mat_comm = rawMpiComm;
132  // This looks a bit like ScaLAPACK's grid initialization (which
133  // technically takes place in the BLACS, not in ScaLAPACK
134  // proper). See http://netlib.org/scalapack/slug/node34.html.
135  // The main difference is that SuperLU_DIST depends explicitly
136  // on MPI, while the BLACS hides its communication protocol.
137  SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
138  }
139 
141  // Set some default parameters. //
142  // //
143  // Must do this after grid has been created in //
144  // case user specifies the nprow and npcol parameters //
146  SLUD::set_default_options_dist(&data_.options);
147 
148  RCP<ParameterList> default_params =
149  parameterList (* (this->getValidParameters ()));
150  this->setParameters (default_params);
151 
152  // Set some internal options
153  data_.options.Fact = SLUD::DOFACT;
154  data_.equed = SLUD::NOEQUIL; // No equilibration has yet been performed
155  data_.options.SolveInitialized = SLUD::NO;
156  data_.options.RefineInitialized = SLUD::NO;
157  data_.rowequ = false;
158  data_.colequ = false;
159  data_.perm_r.resize(this->globalNumRows_);
160  data_.perm_c.resize(this->globalNumCols_);
161 
163  // Set up a communicator for the parallel column ordering and //
164  // parallel symbolic factorization. //
166  data_.symb_comm = MPI_COMM_NULL;
167 
168  // domains is the next power of 2 less than nprow*npcol. This
169  // value will be used for creating an MPI communicator for the
170  // pre-ordering and symbolic factorization methods.
171  data_.domains = (int) ( pow(2.0, floor(log10((double)nprow*npcol)/log10(2.0))) );
172 
173  const int color = (myRank < data_.domains) ? 0 : MPI_UNDEFINED;
174  MPI_Comm_split (data_.mat_comm, color, myRank, &(data_.symb_comm));
175 
177  // Set up a row Map that only includes processes that are in the
178  // SuperLU process grid. This will be used for redistributing A.
180 
181  // mfh 16 Dec 2014: We could use createWeightedContigMapWithNode
182  // with myProcParticipates as the weight, but that costs an extra
183  // all-reduce.
184 
185  // Set to 1 if I am in the grid, and I get some of the matrix rows.
186  int myProcParticipates = 0;
187  if (myRank < nprow * npcol) {
188  in_grid_ = true;
189  myProcParticipates = 1;
190  }
191 
192  // Compute how many processes in the communicator belong to the
193  // process grid.
194  int numParticipatingProcs = 0;
195  reduceAll<int, int> (*comm, REDUCE_SUM, myProcParticipates,
196  outArg (numParticipatingProcs));
197  TEUCHOS_TEST_FOR_EXCEPTION(
198  this->globalNumRows_ != 0 && numParticipatingProcs == 0,
199  std::logic_error, "Amesos2::Superludist constructor: The matrix has "
200  << this->globalNumRows_ << " > 0 global row(s), but no processes in the "
201  "communicator want to participate in its factorization! nprow = "
202  << nprow << " and npcol = " << npcol << ".");
203 
204  // Divide up the rows among the participating processes.
205  size_t myNumRows = 0;
206  {
207  const GO GNR = static_cast<GO> (this->globalNumRows_);
208  const GO quotient = (numParticipatingProcs == 0) ? static_cast<GO> (0) :
209  GNR / static_cast<GO> (numParticipatingProcs);
210  const GO remainder =
211  GNR - quotient * static_cast<GO> (numParticipatingProcs);
212  const GO lclNumRows = (static_cast<GO> (myRank) < remainder) ?
213  (quotient + static_cast<GO> (1)) : quotient;
214  myNumRows = static_cast<size_t> (lclNumRows);
215  }
216 
217  // TODO: might only need to initialize if parallel symbolic factorization is requested.
218  const GO indexBase = this->matrixA_->getRowMap ()->getIndexBase ();
220  rcp (new map_type (this->globalNumRows_, myNumRows, indexBase, comm));
221 
223  // Do some other initialization //
225 
226  data_.A.Store = NULL;
227  function_map::LUstructInit(this->globalNumRows_, this->globalNumCols_, &(data_.lu));
228  SLUD::PStatInit(&(data_.stat));
229  // We do not use ScalePermstructInit because we will use our own
230  // arrays for storing perm_r and perm_c
231  data_.scale_perm.perm_r = data_.perm_r.getRawPtr();
232  data_.scale_perm.perm_c = data_.perm_c.getRawPtr();
233  }
234 
235 
236  template <class Matrix, class Vector>
238  {
239  /* Free SuperLU_DIST data_types
240  * - Matrices
241  * - Vectors
242  * - Stat object
243  * - ScalePerm, LUstruct, grid, and solve objects
244  *
245  * Note: the function definitions are the same regardless whether
246  * complex or real, so we arbitrarily use the D namespace
247  */
248  if ( this->status_.getNumPreOrder() > 0 ){
249  free( data_.sizes );
250  free( data_.fstVtxSep );
251  }
252 
253  // Cleanup old matrix store memory if it's non-NULL. Our
254  // Teuchos::Array's will destroy rowind, colptr, and nzval for us
255  if( data_.A.Store != NULL ){
256  SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
257  }
258 
259  // LU data is initialized in numericFactorization_impl()
260  if ( this->status_.getNumNumericFact() > 0 ){
261  function_map::Destroy_LU(this->globalNumRows_, &(data_.grid), &(data_.lu));
262  }
263  function_map::LUstructFree(&(data_.lu));
264 
265  // If a symbolic factorization is ever performed without a
266  // follow-up numericfactorization, there are some arrays in the
267  // Pslu_freeable struct which will never be free'd by
268  // SuperLU_DIST.
269  if ( this->status_.symbolicFactorizationDone() &&
270  !this->status_.numericFactorizationDone() ){
271  if ( data_.pslu_freeable.xlsub != NULL ){
272  free( data_.pslu_freeable.xlsub );
273  free( data_.pslu_freeable.lsub );
274  }
275  if ( data_.pslu_freeable.xusub != NULL ){
276  free( data_.pslu_freeable.xusub );
277  free( data_.pslu_freeable.usub );
278  }
279  if ( data_.pslu_freeable.supno_loc != NULL ){
280  free( data_.pslu_freeable.supno_loc );
281  free( data_.pslu_freeable.xsup_beg_loc );
282  free( data_.pslu_freeable.xsup_end_loc );
283  }
284  free( data_.pslu_freeable.globToLoc );
285  }
286 
287  SLUD::PStatFree( &(data_.stat) ) ;
288 
289  // Teuchos::Arrays will free R, C, perm_r, and perm_c
290  // SLUD::D::ScalePermstructFree(&(data_.scale_perm));
291 
292  if ( data_.options.SolveInitialized == SLUD::YES )
293  function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
294 
295  SLUD::superlu_gridexit(&(data_.grid)); // TODO: are there any
296  // cases where grid
297  // wouldn't be initialized?
298 
299  if ( data_.symb_comm != MPI_COMM_NULL ) MPI_Comm_free(&(data_.symb_comm));
300  }
301 
302  template<class Matrix, class Vector>
303  int
305  {
306  // We will always use the NATURAL row ordering to avoid the
307  // sequential bottleneck present when doing any other row
308  // ordering scheme from SuperLU_DIST
309  //
310  // Set perm_r to be the natural ordering
311  SLUD::int_t slu_rows_ub = Teuchos::as<SLUD::int_t>(this->globalNumRows_);
312  for( SLUD::int_t i = 0; i < slu_rows_ub; ++i ) data_.perm_r[i] = i;
313 
314  // loadA_impl(); // Refresh matrix values
315 
316  if( in_grid_ ){
317  // If this function has been called at least once, then the
318  // sizes, and fstVtxSep arrays were allocated in
319  // get_perm_c_parmetis. Delete them before calling that
320  // function again. These arrays will also be dealloc'd in the
321  // deconstructor.
322  if( this->status_.getNumPreOrder() > 0 ){
323  free( data_.sizes );
324  free( data_.fstVtxSep );
325  }
326 #ifdef HAVE_AMESOS2_TIMERS
327  Teuchos::TimeMonitor preOrderTime( this->timers_.preOrderTime_ );
328 #endif
329 
330  float info = 0.0;
331  info = SLUD::get_perm_c_parmetis( &(data_.A),
332  data_.perm_r.getRawPtr(), data_.perm_c.getRawPtr(),
333  data_.grid.nprow * data_.grid.npcol, data_.domains,
334  &(data_.sizes), &(data_.fstVtxSep),
335  &(data_.grid), &(data_.symb_comm) );
336 
337  TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
338  std::runtime_error,
339  "SuperLU_DIST pre-ordering ran out of memory after allocating "
340  << info << " bytes of memory" );
341  }
342 
343  // Ordering will be applied directly before numeric factorization,
344  // after we have a chance to get updated coefficients from the
345  // matrix
346 
347  return EXIT_SUCCESS;
348  }
349 
350 
351 
352  template <class Matrix, class Vector>
353  int
355  {
356  // loadA_impl(); // Refresh matrix values
357 
358  if( in_grid_ ){
359 
360 #ifdef HAVE_AMESOS2_TIMERS
361  Teuchos::TimeMonitor symFactTime( this->timers_.symFactTime_ );
362 #endif
363 
364  float info = 0.0;
365  info = SLUD::symbfact_dist((data_.grid.nprow) * (data_.grid.npcol),
366  data_.domains, &(data_.A), data_.perm_c.getRawPtr(),
367  data_.perm_r.getRawPtr(), data_.sizes,
368  data_.fstVtxSep, &(data_.pslu_freeable),
369  &(data_.grid.comm), &(data_.symb_comm),
370  &(data_.mem_usage));
371 
372  TEUCHOS_TEST_FOR_EXCEPTION( info > 0.0,
373  std::runtime_error,
374  "SuperLU_DIST symbolic factorization ran out of memory after"
375  " allocating " << info << " bytes of memory" );
376  }
377  same_symbolic_ = false;
378  same_solve_struct_ = false;
379 
380  return EXIT_SUCCESS;
381  }
382 
383 
384  template <class Matrix, class Vector>
385  int
387  using Teuchos::as;
388 
389  // loadA_impl(); // Refresh the matrix values
390 
391  // if( data_.options.Equil == SLUD::YES ){
392  // // Apply the scalings computed in preOrdering
393  // function_map::laqgs(&(data_.A), data_.R.getRawPtr(),
394  // data_.C.getRawPtr(), data_.rowcnd, data_.colcnd,
395  // data_.amax, &(data_.equed));
396 
397  // data_.rowequ = (data_.equed == SLUD::ROW) || (data_.equed == SLUD::BOTH);
398  // data_.colequ = (data_.equed == SLUD::COL) || (data_.equed == SLUD::BOTH);
399  // }
400 
401  if( in_grid_ ){
402  // Apply the column ordering, so that AC is the column-permuted A, and compute etree
403  size_t nnz_loc = ((SLUD::NRformat_loc*)data_.A.Store)->nnz_loc;
404  for( size_t i = 0; i < nnz_loc; ++i ) colind_[i] = data_.perm_c[colind_[i]];
405 
406  // Distribute data from the symbolic factorization
407  if( same_symbolic_ ){
408  // Note: with the SamePattern_SameRowPerm options, it does not
409  // matter that the glu_freeable member has never been
410  // initialized, because it is never accessed. It is a
411  // placeholder arg. The real work is done in data_.lu
412  function_map::pdistribute(SLUD::SamePattern_SameRowPerm,
413  as<SLUD::int_t>(this->globalNumRows_), // aka "n"
414  &(data_.A), &(data_.scale_perm),
415  &(data_.glu_freeable), &(data_.lu),
416  &(data_.grid));
417  } else {
418  function_map::dist_psymbtonum(SLUD::DOFACT,
419  as<SLUD::int_t>(this->globalNumRows_), // aka "n"
420  &(data_.A), &(data_.scale_perm),
421  &(data_.pslu_freeable), &(data_.lu),
422  &(data_.grid));
423  }
424 
425  // Retrieve the normI of A (required by gstrf).
426  double anorm = function_map::plangs((char *)"I", &(data_.A), &(data_.grid));
427 
428  int info = 0;
429  {
430 #ifdef HAVE_AMESOS2_TIMERS
431  Teuchos::TimeMonitor numFactTimer(this->timers_.numFactTime_);
432 #endif
433 
434  function_map::gstrf(&(data_.options), this->globalNumRows_,
435  this->globalNumCols_, anorm, &(data_.lu),
436  &(data_.grid), &(data_.stat), &info);
437  }
438 
439  // Check output
440  TEUCHOS_TEST_FOR_EXCEPTION( info > 0,
441  std::runtime_error,
442  "L and U factors have been computed but U("
443  << info << "," << info << ") is exactly zero "
444  "(i.e. U is singular)");
445  }
446 
447  // The other option, that info_st < 0, denotes invalid parameters
448  // to the function, but we'll assume for now that that won't
449  // happen.
450 
451  data_.options.Fact = SLUD::FACTORED;
452  same_symbolic_ = true;
453 
454  return EXIT_SUCCESS;
455  }
456 
457 
458  template <class Matrix, class Vector>
459  int
461  const Teuchos::Ptr<const MultiVecAdapter<Vector> > B) const
462  {
463  using Teuchos::as;
464 
465  // local_len_rhs is how many of the multivector rows belong to
466  // this processor in the SuperLU_DIST processor grid.
467  const size_t local_len_rhs = superlu_rowmap_->getNodeNumElements();
468  const global_size_type nrhs = X->getGlobalNumVectors();
469  const global_ordinal_type first_global_row_b = superlu_rowmap_->getMinGlobalIndex();
470 
471  // make sure our multivector storage is sized appropriately
472  bvals_.resize(nrhs * local_len_rhs);
473  xvals_.resize(nrhs * local_len_rhs);
474 
475  // We assume the global length of the two vectors have already been
476  // checked for compatibility
477 
478  { // get the values from B
479 #ifdef HAVE_AMESOS2_TIMERS
480  Teuchos::TimeMonitor convTimer(this->timers_.vecConvTime_);
481 #endif
482 
483  {
484  // The input dense matrix for B should be distributed in the
485  // same manner as the superlu_dist matrix. That is, if a
486  // processor has m_loc rows of A, then it should also have
487  // m_loc rows of B (and the same rows). We accomplish this by
488  // distributing the multivector rows with the same Map that
489  // the matrix A's rows are distributed.
490 #ifdef HAVE_AMESOS2_TIMERS
491  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
492 #endif
493 
494  // get grid-distributed mv data. The multivector data will be
495  // distributed across the processes in the SuperLU_DIST grid.
496  typedef Util::get_1d_copy_helper<MultiVecAdapter<Vector>,slu_type> copy_helper;
497  copy_helper::do_get(B,
498  bvals_(),
499  local_len_rhs,
500  Teuchos::ptrInArg(*superlu_rowmap_));
501  }
502  } // end block for conversion time
503 
504  if( in_grid_ ){
505  // if( data_.options.trans == SLUD::NOTRANS ){
506  // if( data_.rowequ ){ // row equilibration has been done on AC
507  // // scale bxvals_ by diag(R)
508  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
509  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
510  // }
511  // } else if( data_.colequ ){ // column equilibration has been done on AC
512  // // scale bxvals_ by diag(C)
513  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
514  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
515  // }
516 
517  // Initialize the SOLVEstruct_t.
518  //
519  // We are able to reuse the solve struct if we have not changed
520  // the sparsity pattern of L and U since the last solve
521  if( !same_solve_struct_ ){
522  if( data_.options.SolveInitialized == SLUD::YES ){
523  function_map::SolveFinalize(&(data_.options), &(data_.solve_struct));
524  }
525  function_map::SolveInit(&(data_.options), &(data_.A), data_.perm_r.getRawPtr(),
526  data_.perm_c.getRawPtr(), as<SLUD::int_t>(nrhs), &(data_.lu),
527  &(data_.grid), &(data_.solve_struct));
528  // Flag that we can reuse this solve_struct unless another
529  // symbolicFactorization is called between here and the next
530  // solve.
531  same_solve_struct_ = true;
532  }
533 
534  int ierr = 0; // returned error code
535  {
536 #ifdef HAVE_AMESOS2_TIMERS
537  Teuchos::TimeMonitor solveTimer(this->timers_.solveTime_);
538 #endif
539 
540  function_map::gstrs(as<SLUD::int_t>(this->globalNumRows_), &(data_.lu),
541  &(data_.scale_perm), &(data_.grid), bvals_.getRawPtr(),
542  as<SLUD::int_t>(local_len_rhs), as<SLUD::int_t>(first_global_row_b),
543  as<SLUD::int_t>(local_len_rhs), as<int>(nrhs),
544  &(data_.solve_struct), &(data_.stat), &ierr);
545  } // end block for solve time
546 
547  TEUCHOS_TEST_FOR_EXCEPTION( ierr < 0,
548  std::runtime_error,
549  "Argument " << -ierr << " to gstrs had an illegal value" );
550 
551  // "Un-scale" the solution so that it is a solution of the original system
552  // if( data_.options.trans == SLUD::NOTRANS ){
553  // if( data_.colequ ){ // column equilibration has been done on AC
554  // // scale bxvals_ by diag(C)
555  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.C(),
556  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
557  // }
558  // } else if( data_.rowequ ){ // row equilibration has been done on AC
559  // // scale bxvals_ by diag(R)
560  // Util::scale(bxvals_(), as<size_t>(len_rhs), ldbx_, data_.R(),
561  // SLUD::slu_mt_mult<slu_type,magnitude_type>());
562  // }
563  { // permute B to a solution of the original system
564 #ifdef HAVE_AMESOS2_TIMERS
565  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
566 #endif
567  SLUD::int_t ld = as<SLUD::int_t>(local_len_rhs);
568  function_map::permute_Dense_Matrix(as<SLUD::int_t>(first_global_row_b),
569  as<SLUD::int_t>(local_len_rhs),
570  data_.solve_struct.row_to_proc,
571  data_.solve_struct.inv_perm_c,
572  bvals_.getRawPtr(), ld,
573  xvals_.getRawPtr(), ld,
574  as<int>(nrhs),
575  &(data_.grid));
576  }
577  }
578 
579  /* Update X's global values */
580  {
581 #ifdef HAVE_AMESOS2_TIMERS
582  Teuchos::TimeMonitor redistTimer(this->timers_.vecRedistTime_);
583 #endif
584 
585  typedef Util::put_1d_data_helper<MultiVecAdapter<Vector>,slu_type> put_helper;
586  put_helper::do_put(X,
587  xvals_(),
588  local_len_rhs,
589  Teuchos::ptrInArg(*superlu_rowmap_));
590  }
591 
592  return EXIT_SUCCESS;
593  }
594 
595 
596  template <class Matrix, class Vector>
597  bool
599  {
600  // SuperLU_DIST requires square matrices
601  return( this->globalNumRows_ == this->globalNumCols_ );
602  }
603 
604 
605  template <class Matrix, class Vector>
606  void
607  Superludist<Matrix,Vector>::setParameters_impl(const Teuchos::RCP<Teuchos::ParameterList> & parameterList )
608  {
609  using Teuchos::as;
610  using Teuchos::RCP;
611  using Teuchos::getIntegralValue;
612  using Teuchos::ParameterEntryValidator;
613 
614  RCP<const Teuchos::ParameterList> valid_params = getValidParameters_impl();
615 
616  if( parameterList->isParameter("npcol") || parameterList->isParameter("nprow") ){
617  TEUCHOS_TEST_FOR_EXCEPTION( !(parameterList->isParameter("nprow") &&
618  parameterList->isParameter("npcol")),
619  std::invalid_argument,
620  "nprow and npcol must be set together" );
621 
622  SLUD::int_t nprow = parameterList->template get<SLUD::int_t>("nprow");
623  SLUD::int_t npcol = parameterList->template get<SLUD::int_t>("npcol");
624 
625  TEUCHOS_TEST_FOR_EXCEPTION( nprow * npcol > this->getComm()->getSize(),
626  std::invalid_argument,
627  "nprow and npcol combination invalid" );
628 
629  if( (npcol != data_.grid.npcol) || (nprow != data_.grid.nprow) ){
630  // De-allocate the default grid that was initialized in the constructor
631  SLUD::superlu_gridexit(&(data_.grid));
632  // Create a new grid
633  SLUD::superlu_gridinit(data_.mat_comm, nprow, npcol, &(data_.grid));
634  } // else our grid has not changed size since the last initialization
635  }
636 
637  TEUCHOS_TEST_FOR_EXCEPTION( this->control_.useTranspose_,
638  std::invalid_argument,
639  "SuperLU_DIST does not support solving the tranpose system" );
640 
641  data_.options.Trans = SLUD::NOTRANS; // should always be set this way;
642 
643  // TODO: Uncomment when supported
644  // bool equil = parameterList->get<bool>("Equil", true);
645  // data_.options.Equil = equil ? SLUD::YES : SLUD::NO;
646  data_.options.Equil = SLUD::NO;
647 
648  if( parameterList->isParameter("ColPerm") ){
649  RCP<const ParameterEntryValidator> colperm_validator = valid_params->getEntry("ColPerm").validator();
650  parameterList->getEntry("ColPerm").setValidator(colperm_validator);
651 
652  data_.options.ColPerm = getIntegralValue<SLUD::colperm_t>(*parameterList, "ColPerm");
653  }
654 
655  // Always use the "NOROWPERM" option to avoid a serial bottleneck
656  // with the weighted bipartite matching algorithm used for the
657  // "LargeDiag" RowPerm. Note the inconsistency with the SuperLU
658  // User guide (which states that the value should be "NATURAL").
659  data_.options.RowPerm = SLUD::NOROWPERM;
660 
661  // TODO: Uncomment when supported
662  // if( parameterList->isParameter("IterRefine") ){
663  // RCP<const ParameterEntryValidator> iter_refine_validator = valid_params->getEntry("IterRefine").validator();
664  // parameterList->getEntry("IterRefine").setValidator(iter_refine_validator);
665 
666  // data_.options.IterRefine = getIntegralValue<SLUD::IterRefine_t>(*parameterList, "IterRefine");
667  // }
668  data_.options.IterRefine = SLUD::NOREFINE;
669 
670  bool replace_tiny = parameterList->get<bool>("ReplaceTinyPivot", true);
671  data_.options.ReplaceTinyPivot = replace_tiny ? SLUD::YES : SLUD::NO;
672  }
673 
674 
675  template <class Matrix, class Vector>
676  Teuchos::RCP<const Teuchos::ParameterList>
678  {
679  using std::string;
680  using Teuchos::tuple;
681  using Teuchos::ParameterList;
682  using Teuchos::EnhancedNumberValidator;
683  using Teuchos::setStringToIntegralParameter;
684  using Teuchos::stringToIntegralParameterEntryValidator;
685 
686  static Teuchos::RCP<const Teuchos::ParameterList> valid_params;
687 
688  if( is_null(valid_params) ){
689  Teuchos::RCP<Teuchos::ParameterList> pl = Teuchos::parameterList();
690 
691  Teuchos::RCP<EnhancedNumberValidator<SLUD::int_t> > col_row_validator
692  = Teuchos::rcp( new EnhancedNumberValidator<SLUD::int_t>() );
693  col_row_validator->setMin(1);
694 
695  pl->set("npcol", data_.grid.npcol,
696  "Number of columns in the processor grid. "
697  "Must be set with nprow", col_row_validator);
698  pl->set("nprow", data_.grid.nprow,
699  "Number of rows in the SuperLU_DIST processor grid. "
700  "Must be set together with npcol", col_row_validator);
701 
702  // validator will catch any value besides NOTRANS
703  setStringToIntegralParameter<SLUD::trans_t>("Trans", "NOTRANS",
704  "Solve for the transpose system or not",
705  tuple<string>("NOTRANS"),
706  tuple<string>("Do not solve with transpose"),
707  tuple<SLUD::trans_t>(SLUD::NOTRANS),
708  pl.getRawPtr());
709 
710  // TODO: uncomment when supported
711  // pl->set("Equil", false, "Whether to equilibrate the system before solve");
712 
713  // TODO: uncomment when supported
714  // setStringToIntegralParameter<SLUD::IterRefine_t>("IterRefine", "NOREFINE",
715  // "Type of iterative refinement to use",
716  // tuple<string>("NOREFINE", "DOUBLE"),
717  // tuple<string>("Do not use iterative refinement",
718  // "Do double iterative refinement"),
719  // tuple<SLUD::IterRefine_t>(SLUD::NOREFINE,
720  // SLUD::DOUBLE),
721  // pl.getRawPtr());
722 
723  pl->set("ReplaceTinyPivot", true,
724  "Specifies whether to replace tiny diagonals during LU factorization");
725 
726  setStringToIntegralParameter<SLUD::colperm_t>("ColPerm", "PARMETIS",
727  "Specifies how to permute the columns of the "
728  "matrix for sparsity preservation",
729  tuple<string>("NATURAL", "PARMETIS"),
730  tuple<string>("Natural ordering",
731  "ParMETIS ordering on A^T + A"),
732  tuple<SLUD::colperm_t>(SLUD::NATURAL,
733  SLUD::PARMETIS),
734  pl.getRawPtr());
735 
736  valid_params = pl;
737  }
738 
739  return valid_params;
740  }
741 
742 
743  template <class Matrix, class Vector>
744  void
746  SLUD::int_t& nprow,
747  SLUD::int_t& npcol) const {
748  TEUCHOS_TEST_FOR_EXCEPTION( nprocs < 1,
749  std::invalid_argument,
750  "Number of MPI processes must be at least 1" );
751  SLUD::int_t c, r = 1;
752  while( r*r <= nprocs ) r++;
753  nprow = npcol = --r; // fall back to square grid
754  c = nprocs / r;
755  while( (r--)*c != nprocs ){
756  c = nprocs / r; // note integer division
757  }
758  ++r;
759  // prefer the square grid over a single row (which will only happen
760  // in the case of a prime nprocs
761  if( r > 1 || nprocs < 9){ // nprocs < 9 is a heuristic for the small cases
762  nprow = r;
763  npcol = c;
764  }
765  }
766 
767 
768  template <class Matrix, class Vector>
769  bool
771  // Extract the necessary information from mat and call SLU function
772  using Teuchos::Array;
773  using Teuchos::ArrayView;
774  using Teuchos::ptrInArg;
775  using Teuchos::as;
776 
777  using SLUD::int_t;
778 
779 #ifdef HAVE_AMESOS2_TIMERS
780  Teuchos::TimeMonitor convTimer(this->timers_.mtxConvTime_);
781 #endif
782 
783  // Cleanup old store memory if it's non-NULL
784  if( data_.A.Store != NULL ){
785  SLUD::Destroy_SuperMatrix_Store_dist( &(data_.A) );
786  data_.A.Store = NULL;
787  }
788 
789  Teuchos::RCP<const MatrixAdapter<Matrix> > redist_mat
790  = this->matrixA_->get(ptrInArg(*superlu_rowmap_));
791 
792  int_t l_nnz, l_rows, g_rows, g_cols, fst_global_row;
793  l_nnz = as<int_t>(redist_mat->getLocalNNZ());
794  l_rows = as<int_t>(redist_mat->getLocalNumRows());
795  g_rows = as<int_t>(redist_mat->getGlobalNumRows());
796  g_cols = g_rows; // we deal with square matrices
797  fst_global_row = as<int_t>(superlu_rowmap_->getMinGlobalIndex());
798 
799  nzvals_.resize(l_nnz);
800  colind_.resize(l_nnz);
801  rowptr_.resize(l_rows + 1);
802 
803  int_t nnz_ret = 0;
804  {
805 #ifdef HAVE_AMESOS2_TIMERS
806  Teuchos::TimeMonitor mtxRedistTimer( this->timers_.mtxRedistTime_ );
807 #endif
808 
811  slu_type, int_t, int_t >::do_get(redist_mat.ptr(),
812  nzvals_(), colind_(), rowptr_(),
813  nnz_ret,
814  ptrInArg(*superlu_rowmap_),
815  ARBITRARY);
816  }
817 
818  TEUCHOS_TEST_FOR_EXCEPTION( nnz_ret != l_nnz,
819  std::runtime_error,
820  "Did not get the expected number of non-zero vals");
821 
822  // Get the SLU data type for this type of matrix
823  SLUD::Dtype_t dtype = type_map::dtype;
824 
825  if( in_grid_ ){
826  function_map::create_CompRowLoc_Matrix(&(data_.A),
827  g_rows, g_cols,
828  l_nnz, l_rows, fst_global_row,
829  nzvals_.getRawPtr(),
830  colind_.getRawPtr(),
831  rowptr_.getRawPtr(),
832  SLUD::SLU_NR_loc,
833  dtype, SLUD::SLU_GE);
834  }
835 
836  return true;
837 }
838 
839 
840  template<class Matrix, class Vector>
841  const char* Superludist<Matrix,Vector>::name = "SuperLU_DIST";
842 
843 
844 } // end namespace Amesos2
845 
846 #endif // AMESOS2_SUPERLUDIST_DEF_HPP
Amesos2::SolverCore: A templated interface for interaction with third-party direct sparse solvers...
Definition: Amesos2_SolverCore_decl.hpp:105
Amesos2 interface to the distributed memory version of SuperLU.
Definition: Amesos2_Superludist_decl.hpp:90
EPhase
Used to indicate a phase in the direct solution.
Definition: Amesos2_TypeDecl.hpp:65
void get_default_grid_size(int nprocs, SLUD::int_t &nprow, SLUD::int_t &npcol) const
Definition: Amesos2_Superludist_def.hpp:745
Similar to get_ccs_helper , but used to get a CRS representation of the given matrix.
Definition: Amesos2_Util.hpp:591
global_size_type globalNumCols_
Number of global columns in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:479
Superludist(Teuchos::RCP< const Matrix > A, Teuchos::RCP< Vector > X, Teuchos::RCP< const Vector > B)
Initialize from Teuchos::RCP.
Definition: Amesos2_Superludist_def.hpp:68
void setParameters_impl(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Definition: Amesos2_Superludist_def.hpp:607
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters_impl() const
Definition: Amesos2_Superludist_def.hpp:677
global_size_type globalNumRows_
Number of global rows in matrixA_.
Definition: Amesos2_SolverCore_decl.hpp:476
Helper class for getting 1-D copies of multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:243
Definition: Amesos2_TypeDecl.hpp:142
Utility functions for Amesos2.
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a const parameter list of all of the valid parameters that this->setParameterList(...) will accept.
Definition: Amesos2_SolverCore_def.hpp:310
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
Returns a pointer to the Teuchos::Comm communicator with this operator.
Definition: Amesos2_SolverCore_decl.hpp:363
int solve_impl(const Teuchos::Ptr< MultiVecAdapter< Vector > > X, const Teuchos::Ptr< const MultiVecAdapter< Vector > > B) const
SuperLU_DIST specific solve.
Definition: Amesos2_Superludist_def.hpp:460
Provides definition of SuperLU_DIST types as well as conversions and type traits. ...
Definition: Amesos2_AbstractConcreteMatrixAdapter.hpp:48
int preOrdering_impl()
Performs pre-ordering on the matrix to increase efficiency.
Definition: Amesos2_Superludist_def.hpp:304
bool matrixShapeOK_impl() const
Determines whether the shape of the matrix is OK for this solver.
Definition: Amesos2_Superludist_def.hpp:598
A Matrix adapter interface for Amesos2.
Definition: Amesos2_MatrixAdapter_decl.hpp:76
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > superlu_rowmap_
Maps rows of the matrix to processors in the SuperLU_DIST processor grid.
Definition: Amesos2_Superludist_decl.hpp:327
~Superludist()
Destructor.
Definition: Amesos2_Superludist_def.hpp:237
super_type & setParameters(const Teuchos::RCP< Teuchos::ParameterList > &parameterList)
Set/update internal variables and solver options.
Definition: Amesos2_SolverCore_def.hpp:278
int symbolicFactorization_impl()
Perform symbolic factorization of the matrix using SuperLU_DIST.
Definition: Amesos2_Superludist_def.hpp:354
bool in_grid_
true if this processor is in SuperLU_DISTS&#39;s 2D process grid
Definition: Amesos2_Superludist_decl.hpp:320
Helper class for putting 1-D data arrays into multivectors.
Definition: Amesos2_MultiVecAdapter_decl.hpp:296
A templated MultiVector class adapter for Amesos2.
Definition: Amesos2_MultiVecAdapter_decl.hpp:175
bool loadA_impl(EPhase current_phase)
Reads matrix data into internal solver structures.
Definition: Amesos2_Superludist_def.hpp:770
Teuchos::RCP< const MatrixAdapter< Matrix > > matrixA_
The LHS operator.
Definition: Amesos2_SolverCore_decl.hpp:455
int numericFactorization_impl()
SuperLU_DIST specific numeric factorization.
Definition: Amesos2_Superludist_def.hpp:386