Ifpack2 Templated Preconditioning Package  Version 1.0
Ifpack2_Relaxation_def.hpp
1 /*@HEADER
2 // ***********************************************************************
3 //
4 // Ifpack2: Templated Object-Oriented Algebraic Preconditioner Package
5 // Copyright (2009) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // ***********************************************************************
38 //@HEADER
39 */
40 
41 #ifndef IFPACK2_RELAXATION_DEF_HPP
42 #define IFPACK2_RELAXATION_DEF_HPP
43 
44 #include "Teuchos_StandardParameterEntryValidators.hpp"
45 #include "Teuchos_TimeMonitor.hpp"
46 #include "Tpetra_CrsMatrix.hpp"
47 #include "Tpetra_BlockCrsMatrix.hpp"
48 #include "Tpetra_BlockView.hpp"
49 #include "Ifpack2_Utilities.hpp"
50 #include "MatrixMarket_Tpetra.hpp"
51 #include "Tpetra_Details_residual.hpp"
52 #include <cstdlib>
53 #include <memory>
54 #include <sstream>
55 #include "KokkosSparse_gauss_seidel.hpp"
56 
57 namespace {
58  // Validate that a given int is nonnegative.
59  class NonnegativeIntValidator : public Teuchos::ParameterEntryValidator {
60  public:
61  // Constructor (does nothing).
62  NonnegativeIntValidator () {}
63 
64  // ParameterEntryValidator wants this method.
65  Teuchos::ParameterEntryValidator::ValidStringsList validStringValues () const {
66  return Teuchos::null;
67  }
68 
69  // Actually validate the parameter's value.
70  void
71  validate (const Teuchos::ParameterEntry& entry,
72  const std::string& paramName,
73  const std::string& sublistName) const
74  {
75  using std::endl;
76  Teuchos::any anyVal = entry.getAny (true);
77  const std::string entryName = entry.getAny (false).typeName ();
78 
79  TEUCHOS_TEST_FOR_EXCEPTION(
80  anyVal.type () != typeid (int),
81  Teuchos::Exceptions::InvalidParameterType,
82  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
83  << "\" has the wrong type." << endl << "Parameter: " << paramName
84  << endl << "Type specified: " << entryName << endl
85  << "Type required: int" << endl);
86 
87  const int val = Teuchos::any_cast<int> (anyVal);
88  TEUCHOS_TEST_FOR_EXCEPTION(
89  val < 0, Teuchos::Exceptions::InvalidParameterValue,
90  "Parameter \"" << paramName << "\" in sublist \"" << sublistName
91  << "\" is negative." << endl << "Parameter: " << paramName
92  << endl << "Value specified: " << val << endl
93  << "Required range: [0, INT_MAX]" << endl);
94  }
95 
96  // ParameterEntryValidator wants this method.
97  const std::string getXMLTypeName () const {
98  return "NonnegativeIntValidator";
99  }
100 
101  // ParameterEntryValidator wants this method.
102  void
103  printDoc (const std::string& docString,
104  std::ostream &out) const
105  {
106  Teuchos::StrUtils::printLines (out, "# ", docString);
107  out << "#\tValidator Used: " << std::endl;
108  out << "#\t\tNonnegativeIntValidator" << std::endl;
109  }
110  };
111 
112  // A way to get a small positive number (eps() for floating-point
113  // types, or 1 for integer types) when Teuchos::ScalarTraits doesn't
114  // define it (for example, for integer values).
115  template<class Scalar, const bool isOrdinal=Teuchos::ScalarTraits<Scalar>::isOrdinal>
116  class SmallTraits {
117  public:
118  // Return eps if Scalar is a floating-point type, else return 1.
119  static const Scalar eps ();
120  };
121 
122  // Partial specialization for when Scalar is not a floating-point type.
123  template<class Scalar>
124  class SmallTraits<Scalar, true> {
125  public:
126  static const Scalar eps () {
127  return Teuchos::ScalarTraits<Scalar>::one ();
128  }
129  };
130 
131  // Partial specialization for when Scalar is a floating-point type.
132  template<class Scalar>
133  class SmallTraits<Scalar, false> {
134  public:
135  static const Scalar eps () {
136  return Teuchos::ScalarTraits<Scalar>::eps ();
137  }
138  };
139 
140  // Work-around for GitHub Issue #5269.
141  template<class Scalar,
142  const bool isComplex = Teuchos::ScalarTraits<Scalar>::isComplex>
143  struct RealTraits {};
144 
145  template<class Scalar>
146  struct RealTraits<Scalar, false> {
147  using val_type = Scalar;
148  using mag_type = Scalar;
149  static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
150  return z;
151  }
152  };
153 
154  template<class Scalar>
155  struct RealTraits<Scalar, true> {
156  using val_type = Scalar;
157  using mag_type = typename Teuchos::ScalarTraits<Scalar>::magnitudeType;
158  static KOKKOS_INLINE_FUNCTION mag_type real (const val_type& z) {
159  return Kokkos::ArithTraits<val_type>::real (z);
160  }
161  };
162 
163  template<class Scalar>
164  KOKKOS_INLINE_FUNCTION typename RealTraits<Scalar>::mag_type
165  getRealValue (const Scalar& z) {
166  return RealTraits<Scalar>::real (z);
167  }
168 
169 } // namespace (anonymous)
170 
171 namespace Ifpack2 {
172 
173 template<class MatrixType>
174 void
175 Relaxation<MatrixType>::
176 updateCachedMultiVector (const Teuchos::RCP<const Tpetra::Map<local_ordinal_type, global_ordinal_type, node_type>>& map,
177  size_t numVecs) const
178 {
179  // Allocate a multivector if the cached one isn't perfect. Checking
180  // for map pointer equality is much cheaper than Map::isSameAs.
181  if (cachedMV_.is_null () ||
182  map.get () != cachedMV_->getMap ().get () ||
183  cachedMV_->getNumVectors () != numVecs) {
184  using MV = Tpetra::MultiVector<scalar_type, local_ordinal_type,
185  global_ordinal_type, node_type>;
186  cachedMV_ = Teuchos::rcp (new MV (map, numVecs, false));
187  }
188 }
189 
190 template<class MatrixType>
192 setMatrix(const Teuchos::RCP<const row_matrix_type>& A)
193 {
194  if (A.getRawPtr() != A_.getRawPtr()) { // it's a different matrix
195  Importer_ = Teuchos::null;
196  pointImporter_ = Teuchos::null;
197  Diagonal_ = Teuchos::null; // ??? what if this comes from the user???
198  isInitialized_ = false;
199  IsComputed_ = false;
200  diagOffsets_ = Kokkos::View<size_t*, device_type>();
201  savedDiagOffsets_ = false;
202  hasBlockCrsMatrix_ = false;
203  serialGaussSeidel_ = Teuchos::null;
204  if (! A.is_null ()) {
205  IsParallel_ = (A->getRowMap ()->getComm ()->getSize () > 1);
206  }
207  A_ = A;
208  }
209 }
210 
211 template<class MatrixType>
213 Relaxation (const Teuchos::RCP<const row_matrix_type>& A)
214 : A_ (A),
215  IsParallel_ ((A.is_null () || A->getRowMap ().is_null () || A->getRowMap ()->getComm ().is_null ()) ?
216  false : // a reasonable default if there's no communicator
217  A->getRowMap ()->getComm ()->getSize () > 1)
218 {
219  this->setObjectLabel ("Ifpack2::Relaxation");
220 }
221 
222 
223 template<class MatrixType>
224 Teuchos::RCP<const Teuchos::ParameterList>
226 {
227  using Teuchos::Array;
228  using Teuchos::ParameterList;
229  using Teuchos::parameterList;
230  using Teuchos::RCP;
231  using Teuchos::rcp;
232  using Teuchos::rcp_const_cast;
233  using Teuchos::rcp_implicit_cast;
234  using Teuchos::setStringToIntegralParameter;
235  typedef Teuchos::ParameterEntryValidator PEV;
236 
237  if (validParams_.is_null ()) {
238  RCP<ParameterList> pl = parameterList ("Ifpack2::Relaxation");
239 
240  // Set a validator that automatically converts from the valid
241  // string options to their enum values.
242  Array<std::string> precTypes (8);
243  precTypes[0] = "Jacobi";
244  precTypes[1] = "Gauss-Seidel";
245  precTypes[2] = "Symmetric Gauss-Seidel";
246  precTypes[3] = "MT Gauss-Seidel";
247  precTypes[4] = "MT Symmetric Gauss-Seidel";
248  precTypes[5] = "Richardson";
249  precTypes[6] = "Two-stage Gauss-Seidel";
250  precTypes[7] = "Two-stage Symmetric Gauss-Seidel";
251  Array<Details::RelaxationType> precTypeEnums (8);
252  precTypeEnums[0] = Details::JACOBI;
253  precTypeEnums[1] = Details::GS;
254  precTypeEnums[2] = Details::SGS;
255  precTypeEnums[3] = Details::MTGS;
256  precTypeEnums[4] = Details::MTSGS;
257  precTypeEnums[5] = Details::RICHARDSON;
258  precTypeEnums[6] = Details::GS2;
259  precTypeEnums[7] = Details::SGS2;
260  const std::string defaultPrecType ("Jacobi");
261  setStringToIntegralParameter<Details::RelaxationType> ("relaxation: type",
262  defaultPrecType, "Relaxation method", precTypes (), precTypeEnums (),
263  pl.getRawPtr ());
264 
265  const int numSweeps = 1;
266  RCP<PEV> numSweepsValidator =
267  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
268  pl->set ("relaxation: sweeps", numSweeps, "Number of relaxation sweeps",
269  rcp_const_cast<const PEV> (numSweepsValidator));
270 
271  // number of 'local' outer sweeps for two-stage GS
272  const int numOuterSweeps = 1;
273  RCP<PEV> numOuterSweepsValidator =
274  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
275  pl->set ("relaxation: outer sweeps", numOuterSweeps, "Number of outer local relaxation sweeps for two-stage GS",
276  rcp_const_cast<const PEV> (numOuterSweepsValidator));
277  // number of 'local' inner sweeps for two-stage GS
278  const int numInnerSweeps = 1;
279  RCP<PEV> numInnerSweepsValidator =
280  rcp_implicit_cast<PEV> (rcp (new NonnegativeIntValidator));
281  pl->set ("relaxation: inner sweeps", numInnerSweeps, "Number of inner local relaxation sweeps for two-stage GS",
282  rcp_const_cast<const PEV> (numInnerSweepsValidator));
283  // specify damping factor for the inner sweeps of two-stage GS
284  const scalar_type innerDampingFactor = STS::one ();
285  pl->set ("relaxation: inner damping factor", innerDampingFactor, "Damping factor for the inner sweep of two-stage GS");
286  // specify whether to use sptrsv instead of inner-iterations for two-stage GS
287  const bool innerSpTrsv = false;
288  pl->set ("relaxation: inner sparse-triangular solve", innerSpTrsv, "Specify whether to use sptrsv instead of JR iterations for two-stage GS");
289  // specify whether to use compact form of recurrence for two-stage GS
290  const bool compactForm = false;
291  pl->set ("relaxation: compact form", compactForm, "Specify whether to use compact form of recurrence for two-stage GS");
292 
293  const scalar_type dampingFactor = STS::one ();
294  pl->set ("relaxation: damping factor", dampingFactor);
295 
296  const bool zeroStartingSolution = true;
297  pl->set ("relaxation: zero starting solution", zeroStartingSolution);
298 
299  const bool doBackwardGS = false;
300  pl->set ("relaxation: backward mode", doBackwardGS);
301 
302  const bool doL1Method = false;
303  pl->set ("relaxation: use l1", doL1Method);
304 
305  const magnitude_type l1eta = (STM::one() + STM::one() + STM::one()) /
306  (STM::one() + STM::one()); // 1.5
307  pl->set ("relaxation: l1 eta", l1eta);
308 
309  const scalar_type minDiagonalValue = STS::zero ();
310  pl->set ("relaxation: min diagonal value", minDiagonalValue);
311 
312  const bool fixTinyDiagEntries = false;
313  pl->set ("relaxation: fix tiny diagonal entries", fixTinyDiagEntries);
314 
315  const bool checkDiagEntries = false;
316  pl->set ("relaxation: check diagonal entries", checkDiagEntries);
317 
318  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = Teuchos::null;
319  pl->set("relaxation: local smoothing indices", localSmoothingIndices);
320 
321  const bool is_matrix_structurally_symmetric = false;
322  pl->set("relaxation: symmetric matrix structure", is_matrix_structurally_symmetric);
323 
324  const bool ifpack2_dump_matrix = false;
325  pl->set("relaxation: ifpack2 dump matrix", ifpack2_dump_matrix);
326 
327  const int cluster_size = 1;
328  pl->set("relaxation: mtgs cluster size", cluster_size);
329 
330  const int long_row_threshold = 0;
331  pl->set("relaxation: long row threshold", long_row_threshold);
332 
333  validParams_ = rcp_const_cast<const ParameterList> (pl);
334  }
335  return validParams_;
336 }
337 
338 
339 template<class MatrixType>
340 void Relaxation<MatrixType>::setParametersImpl (Teuchos::ParameterList& pl)
341 {
342  using Teuchos::getIntegralValue;
343  using Teuchos::ParameterList;
344  using Teuchos::RCP;
345  typedef scalar_type ST; // just to make code below shorter
346 
347  if (pl.isType<double>("relaxation: damping factor")) {
348  // Make sure that ST=complex can run with a damping factor that is
349  // a double.
350  ST df = pl.get<double>("relaxation: damping factor");
351  pl.remove("relaxation: damping factor");
352  pl.set("relaxation: damping factor",df);
353  }
354 
355  pl.validateParametersAndSetDefaults (* getValidParameters ());
356 
357  const Details::RelaxationType precType =
358  getIntegralValue<Details::RelaxationType> (pl, "relaxation: type");
359  const int numSweeps = pl.get<int> ("relaxation: sweeps");
360  const ST dampingFactor = pl.get<ST> ("relaxation: damping factor");
361  const bool zeroStartSol = pl.get<bool> ("relaxation: zero starting solution");
362  const bool doBackwardGS = pl.get<bool> ("relaxation: backward mode");
363  const bool doL1Method = pl.get<bool> ("relaxation: use l1");
364  const magnitude_type l1Eta = pl.get<magnitude_type> ("relaxation: l1 eta");
365  const ST minDiagonalValue = pl.get<ST> ("relaxation: min diagonal value");
366  const bool fixTinyDiagEntries = pl.get<bool> ("relaxation: fix tiny diagonal entries");
367  const bool checkDiagEntries = pl.get<bool> ("relaxation: check diagonal entries");
368  const bool is_matrix_structurally_symmetric = pl.get<bool> ("relaxation: symmetric matrix structure");
369  const bool ifpack2_dump_matrix = pl.get<bool> ("relaxation: ifpack2 dump matrix");
370  int cluster_size = 1;
371  if(pl.isParameter ("relaxation: mtgs cluster size")) //optional parameter
372  cluster_size = pl.get<int> ("relaxation: mtgs cluster size");
373  int long_row_threshold = 0;
374  if(pl.isParameter ("relaxation: long row threshold")) //optional parameter
375  long_row_threshold = pl.get<int> ("relaxation: long row threshold");
376 
377  Teuchos::ArrayRCP<local_ordinal_type> localSmoothingIndices = pl.get<Teuchos::ArrayRCP<local_ordinal_type> >("relaxation: local smoothing indices");
378 
379  // for Two-stage Gauss-Seidel
380  if (!std::is_same<double, ST>::value && pl.isType<double>("relaxation: inner damping factor")) {
381  // Make sure that ST=complex can run with a damping factor that is
382  // a double.
383  ST df = pl.get<double>("relaxation: inner damping factor");
384  pl.remove("relaxation: inner damping factor");
385  pl.set("relaxation: inner damping factor",df);
386  }
387  //If long row algorithm was requested, make sure non-cluster (point) multicolor Gauss-Seidel (aka MTGS/MTSGS) will be used.
388  if (long_row_threshold > 0) {
389  TEUCHOS_TEST_FOR_EXCEPTION(
390  cluster_size != 1, std::invalid_argument, "Ifpack2::Relaxation: "
391  "Requested long row MTGS/MTSGS algorithm and cluster GS/SGS, but those are not compatible.");
392  TEUCHOS_TEST_FOR_EXCEPTION(
393  precType != Details::RelaxationType::MTGS && precType != Details::RelaxationType::MTSGS,
394  std::invalid_argument, "Ifpack2::Relaxation: "
395  "Requested long row MTGS/MTSGS algorithm, but this is only compatible with preconditioner types "
396  "'MT Gauss-Seidel' and 'MT Symmetric Gauss-Seidel'.");
397  }
398 
399  const ST innerDampingFactor = pl.get<ST> ("relaxation: inner damping factor");
400  const int numInnerSweeps = pl.get<int> ("relaxation: inner sweeps");
401  const int numOuterSweeps = pl.get<int> ("relaxation: outer sweeps");
402  const bool innerSpTrsv = pl.get<bool> ("relaxation: inner sparse-triangular solve");
403  const bool compactForm = pl.get<bool> ("relaxation: compact form");
404 
405  // "Commit" the changes, now that we've validated everything.
406  PrecType_ = precType;
407  NumSweeps_ = numSweeps;
408  DampingFactor_ = dampingFactor;
409  ZeroStartingSolution_ = zeroStartSol;
410  DoBackwardGS_ = doBackwardGS;
411  DoL1Method_ = doL1Method;
412  L1Eta_ = l1Eta;
413  MinDiagonalValue_ = minDiagonalValue;
414  fixTinyDiagEntries_ = fixTinyDiagEntries;
415  checkDiagEntries_ = checkDiagEntries;
416  clusterSize_ = cluster_size;
417  longRowThreshold_ = long_row_threshold;
418  is_matrix_structurally_symmetric_ = is_matrix_structurally_symmetric;
419  ifpack2_dump_matrix_ = ifpack2_dump_matrix;
420  localSmoothingIndices_ = localSmoothingIndices;
421  // for Two-stage GS
422  NumInnerSweeps_ = numInnerSweeps;
423  NumOuterSweeps_ = numOuterSweeps;
424  InnerSpTrsv_ = innerSpTrsv;
425  InnerDampingFactor_ = innerDampingFactor;
426  CompactForm_ = compactForm;
427 }
428 
429 
430 template<class MatrixType>
431 void Relaxation<MatrixType>::setParameters (const Teuchos::ParameterList& pl)
432 {
433  // FIXME (aprokop 18 Oct 2013) Casting away const is bad here.
434  // but otherwise, we will get [unused] in pl
435  this->setParametersImpl(const_cast<Teuchos::ParameterList&>(pl));
436 }
437 
438 
439 template<class MatrixType>
440 Teuchos::RCP<const Teuchos::Comm<int> >
442  TEUCHOS_TEST_FOR_EXCEPTION(
443  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getComm: "
444  "The input matrix A is null. Please call setMatrix() with a nonnull "
445  "input matrix before calling this method.");
446  return A_->getRowMap ()->getComm ();
447 }
448 
449 
450 template<class MatrixType>
451 Teuchos::RCP<const typename Relaxation<MatrixType>::row_matrix_type>
453  return A_;
454 }
455 
456 
457 template<class MatrixType>
458 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
459  typename MatrixType::global_ordinal_type,
460  typename MatrixType::node_type> >
462  TEUCHOS_TEST_FOR_EXCEPTION(
463  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getDomainMap: "
464  "The input matrix A is null. Please call setMatrix() with a nonnull "
465  "input matrix before calling this method.");
466  return A_->getDomainMap ();
467 }
468 
469 
470 template<class MatrixType>
471 Teuchos::RCP<const Tpetra::Map<typename MatrixType::local_ordinal_type,
472  typename MatrixType::global_ordinal_type,
473  typename MatrixType::node_type> >
475  TEUCHOS_TEST_FOR_EXCEPTION(
476  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getRangeMap: "
477  "The input matrix A is null. Please call setMatrix() with a nonnull "
478  "input matrix before calling this method.");
479  return A_->getRangeMap ();
480 }
481 
482 
483 template<class MatrixType>
485  return true;
486 }
487 
488 
489 template<class MatrixType>
491  return(NumInitialize_);
492 }
493 
494 
495 template<class MatrixType>
497  return(NumCompute_);
498 }
499 
500 
501 template<class MatrixType>
503  return(NumApply_);
504 }
505 
506 
507 template<class MatrixType>
509  return(InitializeTime_);
510 }
511 
512 
513 template<class MatrixType>
515  return(ComputeTime_);
516 }
517 
518 
519 template<class MatrixType>
521  return(ApplyTime_);
522 }
523 
524 
525 template<class MatrixType>
527  return(ComputeFlops_);
528 }
529 
530 
531 template<class MatrixType>
533  return(ApplyFlops_);
534 }
535 
536 
537 
538 template<class MatrixType>
540  TEUCHOS_TEST_FOR_EXCEPTION(
541  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::getNodeSmootherComplexity: "
542  "The input matrix A is null. Please call setMatrix() with a nonnull "
543  "input matrix, then call compute(), before calling this method.");
544  // Relaxation methods cost roughly one apply + one diagonal inverse per iteration
545  return A_->getNodeNumRows() + A_->getNodeNumEntries();
546 }
547 
548 
549 template<class MatrixType>
550 void
552 apply (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
553  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
554  Teuchos::ETransp /* mode */,
555  scalar_type alpha,
556  scalar_type beta) const
557 {
558  using Teuchos::as;
559  using Teuchos::RCP;
560  using Teuchos::rcp;
561  using Teuchos::rcpFromRef;
562  typedef Tpetra::MultiVector<scalar_type, local_ordinal_type,
564  TEUCHOS_TEST_FOR_EXCEPTION(
565  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::apply: "
566  "The input matrix A is null. Please call setMatrix() with a nonnull "
567  "input matrix, then call compute(), before calling this method.");
568  TEUCHOS_TEST_FOR_EXCEPTION(
569  ! isComputed (),
570  std::runtime_error,
571  "Ifpack2::Relaxation::apply: You must call compute() on this Ifpack2 "
572  "preconditioner instance before you may call apply(). You may call "
573  "isComputed() to find out if compute() has been called already.");
574  TEUCHOS_TEST_FOR_EXCEPTION(
575  X.getNumVectors() != Y.getNumVectors(),
576  std::runtime_error,
577  "Ifpack2::Relaxation::apply: X and Y have different numbers of columns. "
578  "X has " << X.getNumVectors() << " columns, but Y has "
579  << Y.getNumVectors() << " columns.");
580  TEUCHOS_TEST_FOR_EXCEPTION(
581  beta != STS::zero (), std::logic_error,
582  "Ifpack2::Relaxation::apply: beta = " << beta << " != 0 case not "
583  "implemented.");
584 
585  const std::string timerName ("Ifpack2::Relaxation::apply");
586  Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
587  if (timer.is_null ()) {
588  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
589  }
590 
591  double startTime = timer->wallTime();
592  {
593  Teuchos::TimeMonitor timeMon (*timer);
594  // Special case: alpha == 0.
595  if (alpha == STS::zero ()) {
596  // No floating-point operations, so no need to update a count.
597  Y.putScalar (STS::zero ());
598  }
599  else {
600  // If X and Y alias one another, then we need to create an
601  // auxiliary vector, Xcopy (a deep copy of X).
602  RCP<const MV> Xcopy;
603  {
604  if (X.aliases(Y)) {
605  Xcopy = rcp (new MV (X, Teuchos::Copy));
606  } else {
607  Xcopy = rcpFromRef (X);
608  }
609  }
610 
611  // Each of the following methods updates the flop count itself.
612  // All implementations handle zeroing out the starting solution
613  // (if necessary) themselves.
614  switch (PrecType_) {
615  case Ifpack2::Details::JACOBI:
616  ApplyInverseJacobi(*Xcopy,Y);
617  break;
618  case Ifpack2::Details::GS:
619  ApplyInverseSerialGS(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
620  break;
621  case Ifpack2::Details::SGS:
622  ApplyInverseSerialGS(*Xcopy, Y, Tpetra::Symmetric);
623  break;
624  case Ifpack2::Details::MTGS:
625  case Ifpack2::Details::GS2:
626  ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, DoBackwardGS_ ? Tpetra::Backward : Tpetra::Forward);
627  break;
628  case Ifpack2::Details::MTSGS:
629  case Ifpack2::Details::SGS2:
630  ApplyInverseMTGS_CrsMatrix(*Xcopy, Y, Tpetra::Symmetric);
631  break;
632  case Ifpack2::Details::RICHARDSON:
633  ApplyInverseRichardson(*Xcopy,Y);
634  break;
635 
636  default:
637  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error,
638  "Ifpack2::Relaxation::apply: Invalid preconditioner type enum value "
639  << PrecType_ << ". Please report this bug to the Ifpack2 developers.");
640  }
641  if (alpha != STS::one ()) {
642  Y.scale (alpha);
643  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
644  const double numVectors = as<double> (Y.getNumVectors ());
645  ApplyFlops_ += numGlobalRows * numVectors;
646  }
647  }
648  }
649  ApplyTime_ += (timer->wallTime() - startTime);
650  ++NumApply_;
651 }
652 
653 
654 template<class MatrixType>
655 void
657 applyMat (const Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& X,
658  Tpetra::MultiVector<scalar_type, local_ordinal_type, global_ordinal_type, node_type>& Y,
659  Teuchos::ETransp mode) const
660 {
661  TEUCHOS_TEST_FOR_EXCEPTION(
662  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
663  "The input matrix A is null. Please call setMatrix() with a nonnull "
664  "input matrix, then call compute(), before calling this method.");
665  TEUCHOS_TEST_FOR_EXCEPTION(
666  ! isComputed (), std::runtime_error, "Ifpack2::Relaxation::applyMat: "
667  "isComputed() must be true before you may call applyMat(). "
668  "Please call compute() before calling this method.");
669  TEUCHOS_TEST_FOR_EXCEPTION(
670  X.getNumVectors () != Y.getNumVectors (), std::invalid_argument,
671  "Ifpack2::Relaxation::applyMat: X.getNumVectors() = " << X.getNumVectors ()
672  << " != Y.getNumVectors() = " << Y.getNumVectors () << ".");
673  A_->apply (X, Y, mode);
674 }
675 
676 
677 template<class MatrixType>
679 {
680  const char methodName[] = "Ifpack2::Relaxation::initialize";
681 
682  TEUCHOS_TEST_FOR_EXCEPTION
683  (A_.is_null (), std::runtime_error, methodName << ": The "
684  "input matrix A is null. Please call setMatrix() with "
685  "a nonnull input matrix before calling this method.");
686 
687  Teuchos::RCP<Teuchos::Time> timer =
688  Teuchos::TimeMonitor::getNewCounter (methodName);
689 
690  double startTime = timer->wallTime();
691 
692  { // Timing of initialize starts here
693  Teuchos::TimeMonitor timeMon (*timer);
694  isInitialized_ = false;
695 
696  {
697  auto rowMap = A_->getRowMap ();
698  auto comm = rowMap.is_null () ? Teuchos::null : rowMap->getComm ();
699  IsParallel_ = ! comm.is_null () && comm->getSize () > 1;
700  }
701 
702  // mfh 21 Mar 2013, 07 May 2019: The Import object may be null,
703  // but in that case, the domain and column Maps are the same and
704  // we don't need to Import anyway.
705  //
706  // mfh 07 May 2019: A_->getGraph() might be an
707  // OverlappingRowGraph, which doesn't have an Import object.
708  // However, in that case, the comm should have size 1.
709  Importer_ = IsParallel_ ? A_->getGraph ()->getImporter () :
710  Teuchos::null;
711 
712  {
713  Teuchos::RCP<const block_crs_matrix_type> A_bcrs =
714  Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
715  hasBlockCrsMatrix_ = ! A_bcrs.is_null ();
716  }
717 
718  serialGaussSeidel_ = Teuchos::null;
719 
720  if (PrecType_ == Details::MTGS || PrecType_ == Details::MTSGS ||
721  PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
722  const crs_matrix_type* crsMat =
723  dynamic_cast<const crs_matrix_type*> (A_.get());
724  TEUCHOS_TEST_FOR_EXCEPTION
725  (crsMat == nullptr, std::logic_error, methodName << ": "
726  "Multithreaded Gauss-Seidel methods currently only work "
727  "when the input matrix is a Tpetra::CrsMatrix.");
728 
729  // FIXME (mfh 27 May 2019) Dumping the matrix belongs in
730  // compute, not initialize, since users may change the matrix's
731  // values at any time before calling compute.
732  if (ifpack2_dump_matrix_) {
733  static int sequence_number = 0;
734  const std::string file_name = "Ifpack2_MT_GS_" +
735  std::to_string (sequence_number++) + ".mtx";
736  Teuchos::RCP<const crs_matrix_type> rcp_crs_mat =
737  Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
738  if (! rcp_crs_mat.is_null ()) {
739  using writer_type = Tpetra::MatrixMarket::Writer<crs_matrix_type>;
740  writer_type::writeSparseFile (file_name, rcp_crs_mat);
741  }
742  }
743 
744  this->mtKernelHandle_ = Teuchos::rcp (new mt_kernel_handle_type ());
745  if (mtKernelHandle_->get_gs_handle () == nullptr) {
746  if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2)
747  mtKernelHandle_->create_gs_handle (KokkosSparse::GS_TWOSTAGE);
748  else if(this->clusterSize_ == 1) {
749  mtKernelHandle_->create_gs_handle ();
750  mtKernelHandle_->get_point_gs_handle()->set_long_row_threshold(longRowThreshold_);
751  }
752  else
753  mtKernelHandle_->create_gs_handle (KokkosSparse::CLUSTER_DEFAULT, this->clusterSize_);
754  }
755  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
756  if (PrecType_ == Details::GS2 || PrecType_ == Details::SGS2) {
757  // set parameters for two-stage GS
758  mtKernelHandle_->set_gs_set_num_inner_sweeps (NumInnerSweeps_);
759  mtKernelHandle_->set_gs_set_num_outer_sweeps (NumOuterSweeps_);
760  mtKernelHandle_->set_gs_set_inner_damp_factor (InnerDampingFactor_);
761  mtKernelHandle_->set_gs_twostage (!InnerSpTrsv_, A_->getNodeNumRows ());
762  mtKernelHandle_->set_gs_twostage_compact_form (CompactForm_);
763  }
764 
765  using KokkosSparse::Experimental::gauss_seidel_symbolic;
766  gauss_seidel_symbolic<mt_kernel_handle_type,
767  lno_row_view_t,
768  lno_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
769  A_->getNodeNumRows (),
770  A_->getNodeNumCols (),
771  kcsr.graph.row_map,
772  kcsr.graph.entries,
773  is_matrix_structurally_symmetric_);
774  }
775  } // timing of initialize stops here
776 
777  InitializeTime_ += (timer->wallTime() - startTime);
778  ++NumInitialize_;
779  isInitialized_ = true;
780 }
781 
782 namespace Impl {
783 template <typename BlockDiagView>
784 struct InvertDiagBlocks {
785  typedef int value_type;
786  typedef typename BlockDiagView::size_type Size;
787 
788 private:
789  typedef Kokkos::MemoryTraits<Kokkos::Unmanaged> Unmanaged;
790  template <typename View>
791  using UnmanagedView = Kokkos::View<typename View::data_type, typename View::array_layout,
792  typename View::device_type, Unmanaged>;
793 
794  typedef typename BlockDiagView::non_const_value_type Scalar;
795  typedef typename BlockDiagView::device_type Device;
796  typedef Kokkos::View<Scalar**, Kokkos::LayoutRight, Device> RWrk;
797  typedef Kokkos::View<int**, Kokkos::LayoutRight, Device> IWrk;
798 
799  UnmanagedView<BlockDiagView> block_diag_;
800  // TODO Use thread team and scratch memory space. In this first
801  // pass, provide workspace for each block.
802  RWrk rwrk_buf_;
803  UnmanagedView<RWrk> rwrk_;
804  IWrk iwrk_buf_;
805  UnmanagedView<IWrk> iwrk_;
806 
807 public:
808  InvertDiagBlocks (BlockDiagView& block_diag)
809  : block_diag_(block_diag)
810  {
811  const auto blksz = block_diag.extent(1);
812  Kokkos::resize(rwrk_buf_, block_diag_.extent(0), blksz);
813  rwrk_ = rwrk_buf_;
814  Kokkos::resize(iwrk_buf_, block_diag_.extent(0), blksz);
815  iwrk_ = iwrk_buf_;
816  }
817 
818  KOKKOS_INLINE_FUNCTION
819  void operator() (const Size i, int& jinfo) const {
820  auto D_cur = Kokkos::subview(block_diag_, i, Kokkos::ALL(), Kokkos::ALL());
821  auto ipiv = Kokkos::subview(iwrk_, i, Kokkos::ALL());
822  auto work = Kokkos::subview(rwrk_, i, Kokkos::ALL());
823  int info = 0;
824  Tpetra::GETF2(D_cur, ipiv, info);
825  if (info) {
826  ++jinfo;
827  return;
828  }
829  Tpetra::GETRI(D_cur, ipiv, work, info);
830  if (info) ++jinfo;
831  }
832 
833  // Report the number of blocks with errors.
834  KOKKOS_INLINE_FUNCTION
835  void join (volatile value_type& dst, volatile value_type const& src) const { dst += src; }
836 };
837 }
838 
839 template<class MatrixType>
840 void Relaxation<MatrixType>::computeBlockCrs ()
841 {
842  using Kokkos::ALL;
843  using Teuchos::Array;
844  using Teuchos::ArrayRCP;
845  using Teuchos::ArrayView;
846  using Teuchos::as;
847  using Teuchos::Comm;
848  using Teuchos::RCP;
849  using Teuchos::rcp;
850  using Teuchos::REDUCE_MAX;
851  using Teuchos::REDUCE_MIN;
852  using Teuchos::REDUCE_SUM;
853  using Teuchos::rcp_dynamic_cast;
854  using Teuchos::reduceAll;
855  typedef local_ordinal_type LO;
856 
857  const std::string timerName ("Ifpack2::Relaxation::computeBlockCrs");
858  Teuchos::RCP<Teuchos::Time> timer = Teuchos::TimeMonitor::lookupCounter (timerName);
859  if (timer.is_null ()) {
860  timer = Teuchos::TimeMonitor::getNewCounter (timerName);
861  }
862  double startTime = timer->wallTime();
863  {
864  Teuchos::TimeMonitor timeMon (*timer);
865 
866  TEUCHOS_TEST_FOR_EXCEPTION(
867  A_.is_null (), std::runtime_error, "Ifpack2::Relaxation::"
868  "computeBlockCrs: The input matrix A is null. Please call setMatrix() "
869  "with a nonnull input matrix, then call initialize(), before calling "
870  "this method.");
871  auto blockCrsA = rcp_dynamic_cast<const block_crs_matrix_type> (A_);
872  TEUCHOS_TEST_FOR_EXCEPTION(
873  blockCrsA.is_null(), std::logic_error, "Ifpack2::Relaxation::"
874  "computeBlockCrs: A_ is not a BlockCrsMatrix, but it should be if we "
875  "got this far. Please report this bug to the Ifpack2 developers.");
876 
877  const scalar_type one = STS::one ();
878 
879  // Reset state.
880  IsComputed_ = false;
881 
882  const LO lclNumMeshRows =
883  blockCrsA->getCrsGraph ().getNodeNumRows ();
884  const LO blockSize = blockCrsA->getBlockSize ();
885 
886  if (! savedDiagOffsets_) {
887  blockDiag_ = block_diag_type (); // clear it before reallocating
888  blockDiag_ = block_diag_type ("Ifpack2::Relaxation::blockDiag_",
889  lclNumMeshRows, blockSize, blockSize);
890  if (Teuchos::as<LO>(diagOffsets_.extent (0) ) < lclNumMeshRows) {
891  // Clear diagOffsets_ first (by assigning an empty View to it)
892  // to save memory, before reallocating.
893  diagOffsets_ = Kokkos::View<size_t*, device_type> ();
894  diagOffsets_ = Kokkos::View<size_t*, device_type> ("offsets", lclNumMeshRows);
895  }
896  blockCrsA->getCrsGraph ().getLocalDiagOffsets (diagOffsets_);
897  TEUCHOS_TEST_FOR_EXCEPTION
898  (static_cast<size_t> (diagOffsets_.extent (0)) !=
899  static_cast<size_t> (blockDiag_.extent (0)),
900  std::logic_error, "diagOffsets_.extent(0) = " <<
901  diagOffsets_.extent (0) << " != blockDiag_.extent(0) = "
902  << blockDiag_.extent (0) <<
903  ". Please report this bug to the Ifpack2 developers.");
904  savedDiagOffsets_ = true;
905  }
906  blockCrsA->getLocalDiagCopy (blockDiag_, diagOffsets_);
907 
908  // Use an unmanaged View in this method, so that when we take
909  // subviews of it (to get each diagonal block), we don't have to
910  // touch the reference count. Reference count updates are a
911  // thread scalability bottleneck and have a performance cost even
912  // without using threads.
913  unmanaged_block_diag_type blockDiag = blockDiag_;
914 
915  if (DoL1Method_ && IsParallel_) {
916  const scalar_type two = one + one;
917  const size_t maxLength = A_->getNodeMaxNumRowEntries ();
918  nonconst_local_inds_host_view_type indices ("indices",maxLength);
919  nonconst_values_host_view_type values_ ("values",maxLength * blockSize * blockSize);
920  size_t numEntries = 0;
921 
922  for (LO i = 0; i < lclNumMeshRows; ++i) {
923  // FIXME (mfh 16 Dec 2015) Get views instead of copies.
924  blockCrsA->getLocalRowCopy (i, indices, values_, numEntries);
925  scalar_type * values = reinterpret_cast<scalar_type*>(values_.data());
926 
927  auto diagBlock = Kokkos::subview (blockDiag, i, ALL (), ALL ());
928  for (LO subRow = 0; subRow < blockSize; ++subRow) {
929  magnitude_type diagonal_boost = STM::zero ();
930  for (size_t k = 0 ; k < numEntries ; ++k) {
931  if (indices[k] >= lclNumMeshRows) {
932  const size_t offset = blockSize*blockSize*k + subRow*blockSize;
933  for (LO subCol = 0; subCol < blockSize; ++subCol) {
934  diagonal_boost += STS::magnitude (values[offset+subCol] / two);
935  }
936  }
937  }
938  if (STS::magnitude (diagBlock(subRow, subRow)) < L1Eta_ * diagonal_boost) {
939  diagBlock(subRow, subRow) += diagonal_boost;
940  }
941  }
942  }
943  }
944 
945  int info = 0;
946  {
947  Impl::InvertDiagBlocks<unmanaged_block_diag_type> idb(blockDiag);
948  typedef typename unmanaged_block_diag_type::execution_space exec_space;
949  typedef Kokkos::RangePolicy<exec_space, LO> range_type;
950 
951  Kokkos::parallel_reduce (range_type (0, lclNumMeshRows), idb, info);
952  // FIXME (mfh 19 May 2017) Different processes may not
953  // necessarily have this error all at the same time, so it would
954  // be better just to preserve a local error state and let users
955  // check.
956  TEUCHOS_TEST_FOR_EXCEPTION
957  (info > 0, std::runtime_error, "GETF2 or GETRI failed on = " << info
958  << " diagonal blocks.");
959  }
960 
961  // In a debug build, do an extra test to make sure that all the
962  // factorizations were computed correctly.
963 #ifdef HAVE_IFPACK2_DEBUG
964  const int numResults = 2;
965  // Use "max = -min" trick to get min and max in a single all-reduce.
966  int lclResults[2], gblResults[2];
967  lclResults[0] = info;
968  lclResults[1] = -info;
969  gblResults[0] = 0;
970  gblResults[1] = 0;
971  reduceAll<int, int> (* (A_->getGraph ()->getComm ()), REDUCE_MIN,
972  numResults, lclResults, gblResults);
973  TEUCHOS_TEST_FOR_EXCEPTION
974  (gblResults[0] != 0 || gblResults[1] != 0, std::runtime_error,
975  "Ifpack2::Relaxation::compute: When processing the input "
976  "Tpetra::BlockCrsMatrix, one or more diagonal block LU factorizations "
977  "failed on one or more (MPI) processes.");
978 #endif // HAVE_IFPACK2_DEBUG
979  serialGaussSeidel_ = rcp(new SerialGaussSeidel(blockCrsA, blockDiag_, localSmoothingIndices_, DampingFactor_));
980  } // end TimeMonitor scope
981 
982  ComputeTime_ += (timer->wallTime() - startTime);
983  ++NumCompute_;
984  IsComputed_ = true;
985 }
986 
987 template<class MatrixType>
989 {
990  using Teuchos::Array;
991  using Teuchos::ArrayRCP;
992  using Teuchos::ArrayView;
993  using Teuchos::as;
994  using Teuchos::Comm;
995  using Teuchos::RCP;
996  using Teuchos::rcp;
997  using Teuchos::REDUCE_MAX;
998  using Teuchos::REDUCE_MIN;
999  using Teuchos::REDUCE_SUM;
1000  using Teuchos::rcp_dynamic_cast;
1001  using Teuchos::reduceAll;
1002  using LO = local_ordinal_type;
1003  using vector_type = Tpetra::Vector<scalar_type, local_ordinal_type,
1005  using IST = typename vector_type::impl_scalar_type;
1006  using KAT = Kokkos::ArithTraits<IST>;
1007 
1008  const char methodName[] = "Ifpack2::Relaxation::compute";
1009  const scalar_type zero = STS::zero ();
1010  const scalar_type one = STS::one ();
1011 
1012  TEUCHOS_TEST_FOR_EXCEPTION
1013  (A_.is_null (), std::runtime_error, methodName << ": "
1014  "The input matrix A is null. Please call setMatrix() with a nonnull "
1015  "input matrix, then call initialize(), before calling this method.");
1016 
1017  // We don't count initialization in compute() time.
1018  if (! isInitialized ()) {
1019  initialize ();
1020  }
1021 
1022  if (hasBlockCrsMatrix_) {
1023  computeBlockCrs ();
1024  return;
1025  }
1026 
1027  Teuchos::RCP<Teuchos::Time> timer =
1028  Teuchos::TimeMonitor::getNewCounter (methodName);
1029  double startTime = timer->wallTime();
1030 
1031  { // Timing of compute starts here.
1032  Teuchos::TimeMonitor timeMon (*timer);
1033 
1034  // Precompute some quantities for "fixing" zero or tiny diagonal
1035  // entries. We'll only use them if this "fixing" is enabled.
1036  //
1037  // SmallTraits covers for the lack of eps() in
1038  // Teuchos::ScalarTraits when its template parameter is not a
1039  // floating-point type. (Ifpack2 sometimes gets instantiated for
1040  // integer Scalar types.)
1041  IST oneOverMinDiagVal = KAT::one () / static_cast<IST> (SmallTraits<scalar_type>::eps ());
1042  if ( MinDiagonalValue_ != zero)
1043  oneOverMinDiagVal = KAT::one () / static_cast<IST> (MinDiagonalValue_);
1044 
1045  // It's helpful not to have to recompute this magnitude each time.
1046  const magnitude_type minDiagValMag = STS::magnitude (MinDiagonalValue_);
1047 
1048  const LO numMyRows = static_cast<LO> (A_->getNodeNumRows ());
1049 
1050  TEUCHOS_TEST_FOR_EXCEPTION
1051  (NumSweeps_ < 0, std::logic_error, methodName
1052  << ": NumSweeps_ = " << NumSweeps_ << " < 0. "
1053  "Please report this bug to the Ifpack2 developers.");
1054  IsComputed_ = false;
1055 
1056  if (Diagonal_.is_null()) {
1057  // A_->getLocalDiagCopy fills in all Vector entries, even if the
1058  // matrix has no stored entries in the corresponding rows.
1059  Diagonal_ = rcp (new vector_type (A_->getRowMap (), false));
1060  }
1061 
1062  if (checkDiagEntries_) {
1063  //
1064  // Check diagonal elements, replace zero elements with the minimum
1065  // diagonal value, and store their inverses. Count the number of
1066  // "small" and zero diagonal entries while we're at it.
1067  //
1068  size_t numSmallDiagEntries = 0; // "small" includes zero
1069  size_t numZeroDiagEntries = 0; // # zero diagonal entries
1070  size_t numNegDiagEntries = 0; // # negative (real parts of) diagonal entries
1071  magnitude_type minMagDiagEntryMag = STM::zero ();
1072  magnitude_type maxMagDiagEntryMag = STM::zero ();
1073 
1074  // FIXME: We are extracting the diagonal more than once. That
1075  // isn't very efficient, but we shouldn't be checking
1076  // diagonal entries at scale anyway.
1077  A_->getLocalDiagCopy (*Diagonal_); // slow path for row matrix
1078  std::unique_ptr<vector_type> origDiag;
1079  origDiag = std::unique_ptr<vector_type> (new vector_type (*Diagonal_, Teuchos::Copy));
1080  auto diag2d = Diagonal_->getLocalViewHost(Tpetra::Access::ReadWrite);
1081  auto diag = Kokkos::subview(diag2d, Kokkos::ALL(), 0);
1082  // As we go, keep track of the diagonal entries with the
1083  // least and greatest magnitude. We could use the trick of
1084  // starting min with +Inf and max with -Inf, but that
1085  // doesn't work if scalar_type is a built-in integer type.
1086  // Thus, we have to start by reading the first diagonal
1087  // entry redundantly.
1088  if (numMyRows != 0) {
1089  const magnitude_type d_0_mag = KAT::abs (diag(0));
1090  minMagDiagEntryMag = d_0_mag;
1091  maxMagDiagEntryMag = d_0_mag;
1092  }
1093 
1094  // Go through all the diagonal entries. Compute counts of
1095  // small-magnitude, zero, and negative-real-part entries.
1096  // Invert the diagonal entries that aren't too small. For
1097  // those too small in magnitude, replace them with
1098  // 1/MinDiagonalValue_ (or 1/eps if MinDiagonalValue_
1099  // happens to be zero).
1100  for (LO i = 0; i < numMyRows; ++i) {
1101  const IST d_i = diag(i);
1102  const magnitude_type d_i_mag = KAT::abs (d_i);
1103  // Work-around for GitHub Issue #5269.
1104  //const magnitude_type d_i_real = KAT::real (d_i);
1105  const auto d_i_real = getRealValue (d_i);
1106 
1107  // We can't compare complex numbers, but we can compare their
1108  // real parts.
1109  if (d_i_real < STM::zero ()) {
1110  ++numNegDiagEntries;
1111  }
1112  if (d_i_mag < minMagDiagEntryMag) {
1113  minMagDiagEntryMag = d_i_mag;
1114  }
1115  if (d_i_mag > maxMagDiagEntryMag) {
1116  maxMagDiagEntryMag = d_i_mag;
1117  }
1118 
1119  if (fixTinyDiagEntries_) {
1120  // <= not <, in case minDiagValMag is zero.
1121  if (d_i_mag <= minDiagValMag) {
1122  ++numSmallDiagEntries;
1123  if (d_i_mag == STM::zero ()) {
1124  ++numZeroDiagEntries;
1125  }
1126  diag(i) = oneOverMinDiagVal;
1127  }
1128  else {
1129  diag(i) = KAT::one () / d_i;
1130  }
1131  }
1132  else { // Don't fix zero or tiny diagonal entries.
1133  // <= not <, in case minDiagValMag is zero.
1134  if (d_i_mag <= minDiagValMag) {
1135  ++numSmallDiagEntries;
1136  if (d_i_mag == STM::zero ()) {
1137  ++numZeroDiagEntries;
1138  }
1139  }
1140  diag(i) = KAT::one () / d_i;
1141  }
1142  }
1143 
1144  // Collect global data about the diagonal entries. Only do this
1145  // (which involves O(1) all-reduces) if the user asked us to do
1146  // the extra work.
1147  //
1148  // FIXME (mfh 28 Mar 2013) This is wrong if some processes have
1149  // zero rows. Fixing this requires one of two options:
1150  //
1151  // 1. Use a custom reduction operation that ignores processes
1152  // with zero diagonal entries.
1153  // 2. Split the communicator, compute all-reduces using the
1154  // subcommunicator over processes that have a nonzero number
1155  // of diagonal entries, and then broadcast from one of those
1156  // processes (if there is one) to the processes in the other
1157  // subcommunicator.
1158  const Comm<int>& comm = * (A_->getRowMap ()->getComm ());
1159 
1160  // Compute global min and max magnitude of entries.
1161  Array<magnitude_type> localVals (2);
1162  localVals[0] = minMagDiagEntryMag;
1163  // (- (min (- x))) is the same as (max x).
1164  localVals[1] = -maxMagDiagEntryMag;
1165  Array<magnitude_type> globalVals (2);
1166  reduceAll<int, magnitude_type> (comm, REDUCE_MIN, 2,
1167  localVals.getRawPtr (),
1168  globalVals.getRawPtr ());
1169  globalMinMagDiagEntryMag_ = globalVals[0];
1170  globalMaxMagDiagEntryMag_ = -globalVals[1];
1171 
1172  Array<size_t> localCounts (3);
1173  localCounts[0] = numSmallDiagEntries;
1174  localCounts[1] = numZeroDiagEntries;
1175  localCounts[2] = numNegDiagEntries;
1176  Array<size_t> globalCounts (3);
1177  reduceAll<int, size_t> (comm, REDUCE_SUM, 3,
1178  localCounts.getRawPtr (),
1179  globalCounts.getRawPtr ());
1180  globalNumSmallDiagEntries_ = globalCounts[0];
1181  globalNumZeroDiagEntries_ = globalCounts[1];
1182  globalNumNegDiagEntries_ = globalCounts[2];
1183 
1184  // Compute and save the difference between the computed inverse
1185  // diagonal, and the original diagonal's inverse.
1186  vector_type diff (A_->getRowMap ());
1187  diff.reciprocal (*origDiag);
1188  diff.update (-one, *Diagonal_, one);
1189  globalDiagNormDiff_ = diff.norm2 ();
1190  }
1191 
1192  // Extract the diagonal entries. The CrsMatrix graph version is
1193  // faster than the row matrix version for subsequent calls to
1194  // compute(), since it caches the diagonal offsets.
1195 
1196  bool debugAgainstSlowPath = false;
1197 
1198  auto crsMat = rcp_dynamic_cast<const crs_matrix_type> (A_);
1199 
1200  if (crsMat.get() && crsMat->isFillComplete ()) {
1201  // The invDiagKernel object computes diagonal offsets if
1202  // necessary. The "compute" call extracts diagonal enties,
1203  // optionally applies the L1 method and replacement of small
1204  // entries, and then inverts.
1205  if (invDiagKernel_.is_null())
1206  invDiagKernel_ = rcp(new Ifpack2::Details::InverseDiagonalKernel<op_type>(A_));
1207  else
1208  invDiagKernel_->setMatrix(A_);
1209  invDiagKernel_->compute(*Diagonal_,
1210  DoL1Method_ && IsParallel_, L1Eta_,
1211  fixTinyDiagEntries_, minDiagValMag);
1212  savedDiagOffsets_ = true;
1213 
1214  // mfh 27 May 2019: Later on, we should introduce an IFPACK2_DEBUG
1215  // environment variable to control this behavior at run time.
1216 #ifdef HAVE_IFPACK2_DEBUG
1217  debugAgainstSlowPath = true;
1218 #endif
1219  }
1220 
1221  if (crsMat.is_null() || ! crsMat->isFillComplete () || debugAgainstSlowPath) {
1222  // We could not call the CrsMatrix version above, or want to
1223  // debug by comparing against the slow path.
1224 
1225  // FIXME: The L1 method in this code path runs on host, since we
1226  // don't have device access for row matrices.
1227 
1228  RCP<vector_type> Diagonal;
1229  if (debugAgainstSlowPath)
1230  Diagonal = rcp(new vector_type(A_->getRowMap ()));
1231  else
1232  Diagonal = Diagonal_;
1233 
1234  A_->getLocalDiagCopy (*Diagonal); // slow path for row matrix
1235 
1236  // Setup for L1 Methods.
1237  // Here we add half the value of the off-processor entries in the row,
1238  // but only if diagonal isn't sufficiently large.
1239  //
1240  // This follows from Equation (6.5) in: Baker, Falgout, Kolev and
1241  // Yang. "Multigrid Smoothers for Ultraparallel Computing." SIAM
1242  // J. Sci. Comput., Vol. 33, No. 5. (2011), pp. 2864-2887.
1243  //
1244  if (DoL1Method_ && IsParallel_) {
1245  const row_matrix_type& A_row = *A_;
1246  auto diag = Diagonal->getLocalViewHost(Tpetra::Access::ReadWrite);
1247  const magnitude_type two = STM::one () + STM::one ();
1248  const size_t maxLength = A_row.getNodeMaxNumRowEntries ();
1249  nonconst_local_inds_host_view_type indices("indices",maxLength);
1250  nonconst_values_host_view_type values("values",maxLength);
1251  size_t numEntries;
1252 
1253  for (LO i = 0; i < numMyRows; ++i) {
1254  A_row.getLocalRowCopy (i, indices, values, numEntries);
1255  magnitude_type diagonal_boost = STM::zero ();
1256  for (size_t k = 0 ; k < numEntries; ++k) {
1257  if (indices[k] >= numMyRows) {
1258  diagonal_boost += STS::magnitude (values[k] / two);
1259  }
1260  }
1261  if (KAT::magnitude (diag(i, 0)) < L1Eta_ * diagonal_boost) {
1262  diag(i, 0) += diagonal_boost;
1263  }
1264  }
1265  }
1266 
1267  //
1268  // Invert the matrix's diagonal entries (in Diagonal).
1269  //
1270  if (fixTinyDiagEntries_) {
1271  // Go through all the diagonal entries. Invert those that
1272  // aren't too small in magnitude. For those that are too
1273  // small in magnitude, replace them with oneOverMinDiagVal.
1274  auto localDiag = Diagonal->getLocalViewDevice(Tpetra::Access::ReadWrite);
1275  Kokkos::parallel_for(Kokkos::RangePolicy<MyExecSpace>(0, localDiag.extent(0)),
1276  KOKKOS_LAMBDA (const IST& d_i) {
1277  const magnitude_type d_i_mag = KAT::magnitude (d_i);
1278  // <= not <, in case minDiagValMag is zero.
1279  if (d_i_mag <= minDiagValMag) {
1280  return oneOverMinDiagVal;
1281  }
1282  else {
1283  // For Stokhos types, operator/ returns an expression
1284  // type. Explicitly convert to IST before returning.
1285  return IST (KAT::one () / d_i);
1286  }
1287  });
1288  }
1289  else { // don't fix tiny or zero diagonal entries
1290  Diagonal->reciprocal (*Diagonal);
1291  }
1292 
1293  if (debugAgainstSlowPath) {
1294  // Validate the fast-path diagonal against the slow-path diagonal.
1295  Diagonal->update (STS::one (), *Diagonal_, -STS::one ());
1296  const magnitude_type err = Diagonal->normInf ();
1297  // The two diagonals should be exactly the same, so their
1298  // difference should be exactly zero.
1299  TEUCHOS_TEST_FOR_EXCEPTION
1300  (err != STM::zero(), std::logic_error, methodName << ": "
1301  << "\"fast-path\" diagonal computation failed. "
1302  "\\|D1 - D2\\|_inf = " << err << ".");
1303  }
1304  }
1305 
1306  // Count floating-point operations of computing the inverse diagonal.
1307  //
1308  // FIXME (mfh 30 Mar 2013) Shouldn't counts be global, not local?
1309  if (STS::isComplex) { // magnitude: at least 3 flops per diagonal entry
1310  ComputeFlops_ += 4.0 * numMyRows;
1311  }
1312  else {
1313  ComputeFlops_ += numMyRows;
1314  }
1315 
1316  if (PrecType_ == Ifpack2::Details::MTGS ||
1317  PrecType_ == Ifpack2::Details::MTSGS ||
1318  PrecType_ == Ifpack2::Details::GS2 ||
1319  PrecType_ == Ifpack2::Details::SGS2) {
1320 
1321  //KokkosKernels GaussSeidel Initialization.
1322 
1323  TEUCHOS_TEST_FOR_EXCEPTION
1324  (crsMat.is_null(), std::logic_error, methodName << ": "
1325  "Multithreaded Gauss-Seidel methods currently only work "
1326  "when the input matrix is a Tpetra::CrsMatrix.");
1327  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
1328 
1329  //TODO BMK: This should be ReadOnly, and KokkosKernels should accept a
1330  //const-valued view for user-provided D^-1. OK for now, Diagonal_ is nonconst.
1331  auto diagView_2d = Diagonal_->getLocalViewDevice (Tpetra::Access::ReadWrite);
1332  auto diagView_1d = Kokkos::subview (diagView_2d, Kokkos::ALL (), 0);
1333  using KokkosSparse::Experimental::gauss_seidel_numeric;
1334  gauss_seidel_numeric<mt_kernel_handle_type,
1335  lno_row_view_t,
1336  lno_nonzero_view_t,
1337  scalar_nonzero_view_t> (mtKernelHandle_.getRawPtr (),
1338  A_->getNodeNumRows (),
1339  A_->getNodeNumCols (),
1340  kcsr.graph.row_map,
1341  kcsr.graph.entries,
1342  kcsr.values,
1343  diagView_1d,
1344  is_matrix_structurally_symmetric_);
1345  }
1346  else if(PrecType_ == Ifpack2::Details::GS || PrecType_ == Ifpack2::Details::SGS) {
1347  if(crsMat)
1348  serialGaussSeidel_ = rcp(new SerialGaussSeidel(crsMat, Diagonal_, localSmoothingIndices_, DampingFactor_));
1349  else
1350  serialGaussSeidel_ = rcp(new SerialGaussSeidel(A_, Diagonal_, localSmoothingIndices_, DampingFactor_));
1351  }
1352  } // end TimeMonitor scope
1353 
1354  ComputeTime_ += (timer->wallTime() - startTime);
1355  ++NumCompute_;
1356  IsComputed_ = true;
1357 }
1358 
1359 
1360 template<class MatrixType>
1361 void
1363 ApplyInverseRichardson (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1364  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1365 {
1366  using Teuchos::as;
1367  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1368  const double numVectors = as<double> (X.getNumVectors ());
1369  if (ZeroStartingSolution_) {
1370  // For the first Richardson sweep, if we are allowed to assume that
1371  // the initial guess is zero, then Richardson is just alpha times the RHS
1372  // Compute it as Y(i,j) = DampingFactor_ * X(i,j).
1373  Y.scale(DampingFactor_,X);
1374 
1375  // Count (global) floating-point operations. Ifpack2 represents
1376  // this as a floating-point number rather than an integer, so that
1377  // overflow (for a very large number of calls, or a very large
1378  // problem) is approximate instead of catastrophic.
1379  double flopUpdate = 0.0;
1380  if (DampingFactor_ == STS::one ()) {
1381  // Y(i,j) = X(i,j): one multiply for each entry of Y.
1382  flopUpdate = numGlobalRows * numVectors;
1383  } else {
1384  // Y(i,j) = DampingFactor_ * X(i,j):
1385  // One multiplies per entry of Y.
1386  flopUpdate = numGlobalRows * numVectors;
1387  }
1388  ApplyFlops_ += flopUpdate;
1389  if (NumSweeps_ == 1) {
1390  return;
1391  }
1392  }
1393  // If we were allowed to assume that the starting guess was zero,
1394  // then we have already done the first sweep above.
1395  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1396 
1397  // Allocate a multivector if the cached one isn't perfect
1398  updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1399 
1400  for (int j = startSweep; j < NumSweeps_; ++j) {
1401  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1402  Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1403  Y.update(DampingFactor_,*cachedMV_,STS::one());
1404  }
1405 
1406  // For each column of output, for each pass over the matrix:
1407  //
1408  // - One + and one * for each matrix entry
1409  // - One / and one + for each row of the matrix
1410  // - If the damping factor is not one: one * for each row of the
1411  // matrix. (It's not fair to count this if the damping factor is
1412  // one, since the implementation could skip it. Whether it does
1413  // or not is the implementation's choice.)
1414 
1415  // Floating-point operations due to the damping factor, per matrix
1416  // row, per direction, per columm of output.
1417  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1418  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1419  ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1420  (2.0 * numGlobalNonzeros + dampingFlops);
1421 }
1422 
1423 
1424 template<class MatrixType>
1425 void
1426 Relaxation<MatrixType>::
1427 ApplyInverseJacobi (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1428  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y) const
1429 {
1430  using Teuchos::as;
1431  if (hasBlockCrsMatrix_) {
1432  ApplyInverseJacobi_BlockCrsMatrix (X, Y);
1433  return;
1434  }
1435 
1436  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1437  const double numVectors = as<double> (X.getNumVectors ());
1438  if (ZeroStartingSolution_) {
1439  // For the first Jacobi sweep, if we are allowed to assume that
1440  // the initial guess is zero, then Jacobi is just diagonal
1441  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1442  // Compute it as Y(i,j) = DampingFactor_ * X(i,j) * D(i).
1443  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, X, STS::zero ());
1444 
1445  // Count (global) floating-point operations. Ifpack2 represents
1446  // this as a floating-point number rather than an integer, so that
1447  // overflow (for a very large number of calls, or a very large
1448  // problem) is approximate instead of catastrophic.
1449  double flopUpdate = 0.0;
1450  if (DampingFactor_ == STS::one ()) {
1451  // Y(i,j) = X(i,j) * D(i): one multiply for each entry of Y.
1452  flopUpdate = numGlobalRows * numVectors;
1453  } else {
1454  // Y(i,j) = DampingFactor_ * X(i,j) * D(i):
1455  // Two multiplies per entry of Y.
1456  flopUpdate = 2.0 * numGlobalRows * numVectors;
1457  }
1458  ApplyFlops_ += flopUpdate;
1459  if (NumSweeps_ == 1) {
1460  return;
1461  }
1462  }
1463  // If we were allowed to assume that the starting guess was zero,
1464  // then we have already done the first sweep above.
1465  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1466 
1467  // Allocate a multivector if the cached one isn't perfect
1468  updateCachedMultiVector(Y.getMap(),as<size_t>(numVectors));
1469 
1470  for (int j = startSweep; j < NumSweeps_; ++j) {
1471  // Each iteration: Y = Y + \omega D^{-1} (X - A*Y)
1472  Tpetra::Details::residual(*A_,Y,X,*cachedMV_);
1473  Y.elementWiseMultiply (DampingFactor_, *Diagonal_, *cachedMV_, STS::one ());
1474  }
1475 
1476  // For each column of output, for each pass over the matrix:
1477  //
1478  // - One + and one * for each matrix entry
1479  // - One / and one + for each row of the matrix
1480  // - If the damping factor is not one: one * for each row of the
1481  // matrix. (It's not fair to count this if the damping factor is
1482  // one, since the implementation could skip it. Whether it does
1483  // or not is the implementation's choice.)
1484 
1485  // Floating-point operations due to the damping factor, per matrix
1486  // row, per direction, per columm of output.
1487  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1488  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
1489  ApplyFlops_ += as<double> (NumSweeps_ - startSweep) * numVectors *
1490  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1491 }
1492 
1493 template<class MatrixType>
1494 void
1495 Relaxation<MatrixType>::
1496 ApplyInverseJacobi_BlockCrsMatrix (const Tpetra::MultiVector<scalar_type,
1497  local_ordinal_type,
1498  global_ordinal_type,
1499  node_type>& X,
1500  Tpetra::MultiVector<scalar_type,
1501  local_ordinal_type,
1502  global_ordinal_type,
1503  node_type>& Y) const
1504 {
1505  using Tpetra::BlockMultiVector;
1506  using BMV = BlockMultiVector<scalar_type, local_ordinal_type,
1507  global_ordinal_type, node_type>;
1508 
1509  const block_crs_matrix_type* blockMatConst =
1510  dynamic_cast<const block_crs_matrix_type*> (A_.getRawPtr ());
1511  TEUCHOS_TEST_FOR_EXCEPTION
1512  (blockMatConst == nullptr, std::logic_error, "This method should "
1513  "never be called if the matrix A_ is not a BlockCrsMatrix. "
1514  "Please report this bug to the Ifpack2 developers.");
1515  // mfh 23 Jan 2016: Unfortunately, the const cast is necessary.
1516  // This is because applyBlock() is nonconst (more accurate), while
1517  // apply() is const (required by Tpetra::Operator interface, but a
1518  // lie, because it possibly allocates temporary buffers).
1519  block_crs_matrix_type* blockMat =
1520  const_cast<block_crs_matrix_type*> (blockMatConst);
1521 
1522  auto meshRowMap = blockMat->getRowMap ();
1523  auto meshColMap = blockMat->getColMap ();
1524  const local_ordinal_type blockSize = blockMat->getBlockSize ();
1525 
1526  BMV X_blk (X, *meshColMap, blockSize);
1527  BMV Y_blk (Y, *meshRowMap, blockSize);
1528 
1529  if (ZeroStartingSolution_) {
1530  // For the first sweep, if we are allowed to assume that the
1531  // initial guess is zero, then block Jacobi is just block diagonal
1532  // scaling. (A_ij * x_j = 0 for i != j, since x_j = 0.)
1533  Y_blk.blockWiseMultiply (DampingFactor_, blockDiag_, X_blk);
1534  if (NumSweeps_ == 1) {
1535  return;
1536  }
1537  }
1538 
1539  auto pointRowMap = Y.getMap ();
1540  const size_t numVecs = X.getNumVectors ();
1541 
1542  // We don't need to initialize the result MV, since the sparse
1543  // mat-vec will clobber its contents anyway.
1544  BMV A_times_Y (*meshRowMap, *pointRowMap, blockSize, numVecs);
1545 
1546  // If we were allowed to assume that the starting guess was zero,
1547  // then we have already done the first sweep above.
1548  const int startSweep = ZeroStartingSolution_ ? 1 : 0;
1549 
1550  for (int j = startSweep; j < NumSweeps_; ++j) {
1551  blockMat->applyBlock (Y_blk, A_times_Y);
1552  // Y := Y + \omega D^{-1} (X - A*Y). Use A_times_Y as scratch.
1553  Y_blk.blockJacobiUpdate (DampingFactor_, blockDiag_,
1554  X_blk, A_times_Y, STS::one ());
1555  }
1556 }
1557 
1558 template<class MatrixType>
1559 void
1560 Relaxation<MatrixType>::
1561 ApplyInverseSerialGS (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1562  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1563  Tpetra::ESweepDirection direction) const
1564 {
1565  using this_type = Relaxation<MatrixType>;
1566  // The CrsMatrix version is faster, because it can access the sparse
1567  // matrix data directly, rather than by copying out each row's data
1568  // in turn. Thus, we check whether the RowMatrix is really a
1569  // CrsMatrix.
1570  //
1571  // FIXME (mfh 07 Jul 2013) See note on crs_matrix_type typedef
1572  // declaration in Ifpack2_Relaxation_decl.hpp header file. The code
1573  // will still be correct if the cast fails, but it will use an
1574  // unoptimized kernel.
1575  auto blockCrsMat = Teuchos::rcp_dynamic_cast<const block_crs_matrix_type> (A_);
1576  auto crsMat = Teuchos::rcp_dynamic_cast<const crs_matrix_type> (A_);
1577  if (blockCrsMat.get()) {
1578  const_cast<this_type&> (*this).ApplyInverseSerialGS_BlockCrsMatrix (*blockCrsMat, X, Y, direction);
1579  }
1580  else if (crsMat.get()) {
1581  ApplyInverseSerialGS_CrsMatrix (*crsMat, X, Y, direction);
1582  }
1583  else {
1584  ApplyInverseSerialGS_RowMatrix (X, Y, direction);
1585  }
1586 }
1587 
1588 
1589 template<class MatrixType>
1590 void
1591 Relaxation<MatrixType>::
1592 ApplyInverseSerialGS_RowMatrix (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1593  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1594  Tpetra::ESweepDirection direction) const {
1595  using Teuchos::Array;
1596  using Teuchos::ArrayRCP;
1597  using Teuchos::ArrayView;
1598  using Teuchos::as;
1599  using Teuchos::RCP;
1600  using Teuchos::rcp;
1601  using Teuchos::rcpFromRef;
1602  typedef Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type> MV;
1603 
1604  // Tpetra's GS implementation for CrsMatrix handles zeroing out the
1605  // starting multivector itself. The generic RowMatrix version here
1606  // does not, so we have to zero out Y here.
1607  if (ZeroStartingSolution_) {
1608  Y.putScalar (STS::zero ());
1609  }
1610 
1611  size_t NumVectors = X.getNumVectors();
1612 
1613  RCP<MV> Y2;
1614  if (IsParallel_) {
1615  if (Importer_.is_null ()) { // domain and column Maps are the same.
1616  updateCachedMultiVector (Y.getMap (), NumVectors);
1617  }
1618  else {
1619  updateCachedMultiVector (Importer_->getTargetMap (), NumVectors);
1620  }
1621  Y2 = cachedMV_;
1622  }
1623  else {
1624  Y2 = rcpFromRef (Y);
1625  }
1626 
1627  for (int j = 0; j < NumSweeps_; ++j) {
1628  // data exchange is here, once per sweep
1629  if (IsParallel_) {
1630  if (Importer_.is_null ()) {
1631  // FIXME (mfh 27 May 2019) This doesn't deep copy -- not
1632  // clear if this is correct. Reevaluate at some point.
1633 
1634  *Y2 = Y; // just copy, since domain and column Maps are the same
1635  } else {
1636  Y2->doImport (Y, *Importer_, Tpetra::INSERT);
1637  }
1638  }
1639  serialGaussSeidel_->apply(*Y2, X, direction);
1640 
1641  // FIXME (mfh 02 Jan 2013) This is only correct if row Map == range Map.
1642  if (IsParallel_) {
1643  Tpetra::deep_copy (Y, *Y2);
1644  }
1645  }
1646 
1647  // See flop count discussion in implementation of ApplyInverseGS_CrsMatrix().
1648  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1649  const double numVectors = as<double> (X.getNumVectors ());
1650  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
1651  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
1652  ApplyFlops_ += 2.0 * NumSweeps_ * numVectors *
1653  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1654 }
1655 
1656 
1657 template<class MatrixType>
1658 void
1659 Relaxation<MatrixType>::
1660 ApplyInverseSerialGS_CrsMatrix(const crs_matrix_type& A,
1661  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1662  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1663  Tpetra::ESweepDirection direction) const
1664 {
1665  using Teuchos::null;
1666  using Teuchos::RCP;
1667  using Teuchos::rcp;
1668  using Teuchos::rcpFromRef;
1669  using Teuchos::rcp_const_cast;
1670  typedef scalar_type Scalar;
1671  const char prefix[] = "Ifpack2::Relaxation::SerialGS: ";
1672  const scalar_type ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1673 
1674  TEUCHOS_TEST_FOR_EXCEPTION(
1675  ! A.isFillComplete (), std::runtime_error,
1676  prefix << "The matrix is not fill complete.");
1677  TEUCHOS_TEST_FOR_EXCEPTION(
1678  NumSweeps_ < 0, std::invalid_argument,
1679  prefix << "The number of sweeps must be nonnegative, "
1680  "but you provided numSweeps = " << NumSweeps_ << " < 0.");
1681 
1682  if (NumSweeps_ == 0) {
1683  return;
1684  }
1685 
1686  RCP<const import_type> importer = A.getGraph ()->getImporter ();
1687 
1688  RCP<const map_type> domainMap = A.getDomainMap ();
1689  RCP<const map_type> rangeMap = A.getRangeMap ();
1690  RCP<const map_type> rowMap = A.getGraph ()->getRowMap ();
1691  RCP<const map_type> colMap = A.getGraph ()->getColMap ();
1692 
1693 #ifdef HAVE_IFPACK2_DEBUG
1694  {
1695  // The relation 'isSameAs' is transitive. It's also a
1696  // collective, so we don't have to do a "shared" test for
1697  // exception (i.e., a global reduction on the test value).
1698  TEUCHOS_TEST_FOR_EXCEPTION(
1699  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1700  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1701  "multivector X be in the domain Map of the matrix.");
1702  TEUCHOS_TEST_FOR_EXCEPTION(
1703  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1704  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1705  "B be in the range Map of the matrix.");
1706  TEUCHOS_TEST_FOR_EXCEPTION(
1707  ! Diagonal_->getMap ()->isSameAs (*rowMap), std::runtime_error,
1708  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the input "
1709  "D be in the row Map of the matrix.");
1710  TEUCHOS_TEST_FOR_EXCEPTION(
1711  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1712  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the row Map and the "
1713  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1714  TEUCHOS_TEST_FOR_EXCEPTION(
1715  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1716  "Tpetra::CrsMatrix::gaussSeidelCopy requires that the domain Map and "
1717  "the range Map of the matrix be the same.");
1718  }
1719 #endif
1720 
1721  // Fetch a (possibly cached) temporary column Map multivector
1722  // X_colMap, and a domain Map view X_domainMap of it. Both have
1723  // constant stride by construction. We know that the domain Map
1724  // must include the column Map, because our Gauss-Seidel kernel
1725  // requires that the row Map, domain Map, and range Map are all
1726  // the same, and that each process owns all of its own diagonal
1727  // entries of the matrix.
1728 
1729  RCP<multivector_type> X_colMap;
1730  RCP<multivector_type> X_domainMap;
1731  bool copyBackOutput = false;
1732  if (importer.is_null ()) {
1733  X_colMap = Teuchos::rcpFromRef (X);
1734  X_domainMap = Teuchos::rcpFromRef (X);
1735  // Column Map and domain Map are the same, so there are no
1736  // remote entries. Thus, if we are not setting the initial
1737  // guess to zero, we don't have to worry about setting remote
1738  // entries to zero, even though we are not doing an Import in
1739  // this case.
1740  if (ZeroStartingSolution_) {
1741  X_colMap->putScalar (ZERO);
1742  }
1743  // No need to copy back to X at end.
1744  }
1745  else { // Column Map and domain Map are _not_ the same.
1746  updateCachedMultiVector(colMap, X.getNumVectors());
1747  X_colMap = cachedMV_;
1748  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
1749 
1750  if (ZeroStartingSolution_) {
1751  // No need for an Import, since we're filling with zeros.
1752  X_colMap->putScalar (ZERO);
1753  } else {
1754  // We could just copy X into X_domainMap. However, that
1755  // wastes a copy, because the Import also does a copy (plus
1756  // communication). Since the typical use case for
1757  // Gauss-Seidel is a small number of sweeps (2 is typical), we
1758  // don't want to waste that copy. Thus, we do the Import
1759  // here, and skip the first Import in the first sweep.
1760  // Importing directly from X effects the copy into X_domainMap
1761  // (which is a view of X_colMap).
1762  X_colMap->doImport (X, *importer, Tpetra::INSERT);
1763  }
1764  copyBackOutput = true; // Don't forget to copy back at end.
1765  } // if column and domain Maps are (not) the same
1766 
1767  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1768  if (! importer.is_null () && sweep > 0) {
1769  // We already did the first Import for the zeroth sweep above,
1770  // if it was necessary.
1771  X_colMap->doImport (*X_domainMap, *importer, Tpetra::INSERT);
1772  }
1773  // Do local Gauss-Seidel (forward, backward or symmetric)
1774  serialGaussSeidel_->apply(*X_colMap, B, direction);
1775  }
1776 
1777  if (copyBackOutput) {
1778  try {
1779  deep_copy (X , *X_domainMap); // Copy result back into X.
1780  } catch (std::exception& e) {
1781  TEUCHOS_TEST_FOR_EXCEPTION(
1782  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
1783  "threw an exception: " << e.what ());
1784  }
1785  }
1786 
1787  // For each column of output, for each sweep over the matrix:
1788  //
1789  // - One + and one * for each matrix entry
1790  // - One / and one + for each row of the matrix
1791  // - If the damping factor is not one: one * for each row of the
1792  // matrix. (It's not fair to count this if the damping factor is
1793  // one, since the implementation could skip it. Whether it does
1794  // or not is the implementation's choice.)
1795 
1796  // Floating-point operations due to the damping factor, per matrix
1797  // row, per direction, per columm of output.
1798  const double dampingFlops = (DampingFactor_ == STS::one()) ? 0.0 : 1.0;
1799  const double numVectors = X.getNumVectors ();
1800  const double numGlobalRows = A_->getGlobalNumRows ();
1801  const double numGlobalNonzeros = A_->getGlobalNumEntries ();
1802  ApplyFlops_ += NumSweeps_ * numVectors *
1803  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
1804 }
1805 
1806 template<class MatrixType>
1807 void
1808 Relaxation<MatrixType>::
1809 ApplyInverseSerialGS_BlockCrsMatrix (const block_crs_matrix_type& A,
1810  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1811  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& Y,
1812  Tpetra::ESweepDirection direction)
1813 {
1814  using Tpetra::INSERT;
1815  using Teuchos::RCP;
1816  using Teuchos::rcp;
1817  using Teuchos::rcpFromRef;
1818 
1819  //FIXME: (tcf) 8/21/2014 -- may be problematic for multiple right hand sides
1820  //
1821  // NOTE (mfh 12 Sep 2014) I don't think it should be a problem for
1822  // multiple right-hand sides, unless the input or output MultiVector
1823  // does not have constant stride. We should check for that case
1824  // here, in case it doesn't work in localGaussSeidel (which is
1825  // entirely possible).
1826  block_multivector_type yBlock(Y, *(A.getGraph ()->getDomainMap()), A.getBlockSize());
1827  const block_multivector_type xBlock(X, *(A.getColMap ()), A.getBlockSize());
1828 
1829  bool performImport = false;
1830  RCP<block_multivector_type> yBlockCol;
1831  if (Importer_.is_null()) {
1832  yBlockCol = rcpFromRef(yBlock);
1833  }
1834  else {
1835  if (yBlockColumnPointMap_.is_null() ||
1836  yBlockColumnPointMap_->getNumVectors() != yBlock.getNumVectors() ||
1837  yBlockColumnPointMap_->getBlockSize() != yBlock.getBlockSize()) {
1838  yBlockColumnPointMap_ =
1839  rcp(new block_multivector_type(*(A.getColMap()), A.getBlockSize(),
1840  static_cast<local_ordinal_type>(yBlock.getNumVectors())));
1841  }
1842  yBlockCol = yBlockColumnPointMap_;
1843  if (pointImporter_.is_null()) {
1844  auto srcMap = rcp(new map_type(yBlock.getPointMap()));
1845  auto tgtMap = rcp(new map_type(yBlockCol->getPointMap()));
1846  pointImporter_ = rcp(new import_type(srcMap, tgtMap));
1847  }
1848  performImport = true;
1849  }
1850 
1851  multivector_type yBlock_mv;
1852  multivector_type yBlockCol_mv;
1853  RCP<const multivector_type> yBlockColPointDomain;
1854  if (performImport) { // create views (shallow copies)
1855  yBlock_mv = yBlock.getMultiVectorView();
1856  yBlockCol_mv = yBlockCol->getMultiVectorView();
1857  yBlockColPointDomain =
1858  yBlockCol_mv.offsetView(A.getDomainMap(), 0);
1859  }
1860 
1861  if (ZeroStartingSolution_) {
1862  yBlockCol->putScalar(STS::zero ());
1863  }
1864  else if (performImport) {
1865  yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1866  }
1867 
1868  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
1869  if (performImport && sweep > 0) {
1870  yBlockCol_mv.doImport(yBlock_mv, *pointImporter_, Tpetra::INSERT);
1871  }
1872  serialGaussSeidel_->applyBlock(*yBlockCol, xBlock, direction);
1873  if (performImport) {
1874  Tpetra::deep_copy(Y, *yBlockColPointDomain);
1875  }
1876  }
1877 }
1878 
1879 template<class MatrixType>
1880 void
1881 Relaxation<MatrixType>::
1882 ApplyInverseMTGS_CrsMatrix(
1883  const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& B,
1884  Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_type,node_type>& X,
1885  Tpetra::ESweepDirection direction) const
1886 {
1887  using Teuchos::null;
1888  using Teuchos::RCP;
1889  using Teuchos::rcp;
1890  using Teuchos::rcpFromRef;
1891  using Teuchos::rcp_const_cast;
1892  using Teuchos::as;
1893 
1894  typedef scalar_type Scalar;
1895 
1896  const char prefix[] = "Ifpack2::Relaxation::(reordered)MTGaussSeidel: ";
1897  const Scalar ZERO = Teuchos::ScalarTraits<Scalar>::zero ();
1898 
1899  const crs_matrix_type* crsMat = dynamic_cast<const crs_matrix_type*> (A_.get());
1900  TEUCHOS_TEST_FOR_EXCEPTION
1901  (crsMat == nullptr, std::logic_error, "Ifpack2::Relaxation::apply: "
1902  "Multithreaded Gauss-Seidel methods currently only work when the "
1903  "input matrix is a Tpetra::CrsMatrix.");
1904 
1905  //Teuchos::ArrayView<local_ordinal_type> rowIndices; // unused, as of 04 Jan 2017
1906  TEUCHOS_TEST_FOR_EXCEPTION
1907  (! localSmoothingIndices_.is_null (), std::logic_error,
1908  "Ifpack2's implementation of Multithreaded Gauss-Seidel does not "
1909  "implement the case where the user supplies an iteration order. "
1910  "This error used to appear as \"MT GaussSeidel ignores the given "
1911  "order\". "
1912  "I tried to add more explanation, but I didn't implement \"MT "
1913  "GaussSeidel\" [sic]. "
1914  "You'll have to ask the person who did.");
1915 
1916  TEUCHOS_TEST_FOR_EXCEPTION
1917  (crsMat == nullptr, std::logic_error, prefix << "The matrix is null."
1918  " This should never happen. Please report this bug to the Ifpack2 "
1919  "developers.");
1920  TEUCHOS_TEST_FOR_EXCEPTION
1921  (! crsMat->isFillComplete (), std::runtime_error, prefix << "The "
1922  "input CrsMatrix is not fill complete. Please call fillComplete "
1923  "on the matrix, then do setup again, before calling apply(). "
1924  "\"Do setup\" means that if only the matrix's values have changed "
1925  "since last setup, you need only call compute(). If the matrix's "
1926  "structure may also have changed, you must first call initialize(), "
1927  "then call compute(). If you have not set up this preconditioner "
1928  "for this matrix before, you must first call initialize(), then "
1929  "call compute().");
1930  TEUCHOS_TEST_FOR_EXCEPTION
1931  (NumSweeps_ < 0, std::logic_error, prefix << ": NumSweeps_ = "
1932  << NumSweeps_ << " < 0. Please report this bug to the Ifpack2 "
1933  "developers.");
1934  if (NumSweeps_ == 0) {
1935  return;
1936  }
1937 
1938  RCP<const import_type> importer = crsMat->getGraph ()->getImporter ();
1939  TEUCHOS_TEST_FOR_EXCEPTION(
1940  ! crsMat->getGraph ()->getExporter ().is_null (), std::runtime_error,
1941  "This method's implementation currently requires that the matrix's row, "
1942  "domain, and range Maps be the same. This cannot be the case, because "
1943  "the matrix has a nontrivial Export object.");
1944 
1945  RCP<const map_type> domainMap = crsMat->getDomainMap ();
1946  RCP<const map_type> rangeMap = crsMat->getRangeMap ();
1947  RCP<const map_type> rowMap = crsMat->getGraph ()->getRowMap ();
1948  RCP<const map_type> colMap = crsMat->getGraph ()->getColMap ();
1949 
1950 #ifdef HAVE_IFPACK2_DEBUG
1951  {
1952  // The relation 'isSameAs' is transitive. It's also a
1953  // collective, so we don't have to do a "shared" test for
1954  // exception (i.e., a global reduction on the test value).
1955  TEUCHOS_TEST_FOR_EXCEPTION(
1956  ! X.getMap ()->isSameAs (*domainMap), std::runtime_error,
1957  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1958  "multivector X be in the domain Map of the matrix.");
1959  TEUCHOS_TEST_FOR_EXCEPTION(
1960  ! B.getMap ()->isSameAs (*rangeMap), std::runtime_error,
1961  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1962  "B be in the range Map of the matrix.");
1963  TEUCHOS_TEST_FOR_EXCEPTION(
1964  ! D.getMap ()->isSameAs (*rowMap), std::runtime_error,
1965  "Ifpack2::Relaxation::MTGaussSeidel requires that the input "
1966  "D be in the row Map of the matrix.");
1967  TEUCHOS_TEST_FOR_EXCEPTION(
1968  ! rowMap->isSameAs (*rangeMap), std::runtime_error,
1969  "Ifpack2::Relaxation::MTGaussSeidel requires that the row Map and the "
1970  "range Map be the same (in the sense of Tpetra::Map::isSameAs).");
1971  TEUCHOS_TEST_FOR_EXCEPTION(
1972  ! domainMap->isSameAs (*rangeMap), std::runtime_error,
1973  "Ifpack2::Relaxation::MTGaussSeidel requires that the domain Map and "
1974  "the range Map of the matrix be the same.");
1975  }
1976 #else
1977  // Forestall any compiler warnings for unused variables.
1978  (void) rangeMap;
1979  (void) rowMap;
1980 #endif // HAVE_IFPACK2_DEBUG
1981 
1982  // Fetch a (possibly cached) temporary column Map multivector
1983  // X_colMap, and a domain Map view X_domainMap of it. Both have
1984  // constant stride by construction. We know that the domain Map
1985  // must include the column Map, because our Gauss-Seidel kernel
1986  // requires that the row Map, domain Map, and range Map are all
1987  // the same, and that each process owns all of its own diagonal
1988  // entries of the matrix.
1989 
1990  RCP<multivector_type> X_colMap;
1991  RCP<multivector_type> X_domainMap;
1992  bool copyBackOutput = false;
1993  if (importer.is_null ()) {
1994  if (X.isConstantStride ()) {
1995  X_colMap = rcpFromRef (X);
1996  X_domainMap = rcpFromRef (X);
1997 
1998  // Column Map and domain Map are the same, so there are no
1999  // remote entries. Thus, if we are not setting the initial
2000  // guess to zero, we don't have to worry about setting remote
2001  // entries to zero, even though we are not doing an Import in
2002  // this case.
2003  if (ZeroStartingSolution_) {
2004  X_colMap->putScalar (ZERO);
2005  }
2006  // No need to copy back to X at end.
2007  }
2008  else {
2009  // We must copy X into a constant stride multivector.
2010  // Just use the cached column Map multivector for that.
2011  // force=true means fill with zeros, so no need to fill
2012  // remote entries (not in domain Map) with zeros.
2013  updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2014  X_colMap = cachedMV_;
2015  // X_domainMap is always a domain Map view of the column Map
2016  // multivector. In this case, the domain and column Maps are
2017  // the same, so X_domainMap _is_ X_colMap.
2018  X_domainMap = X_colMap;
2019  if (! ZeroStartingSolution_) { // Don't copy if zero initial guess
2020  try {
2021  deep_copy (*X_domainMap , X); // Copy X into constant stride MV
2022  } catch (std::exception& e) {
2023  std::ostringstream os;
2024  os << "Ifpack2::Relaxation::MTGaussSeidel: "
2025  "deep_copy(*X_domainMap, X) threw an exception: "
2026  << e.what () << ".";
2027  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
2028  }
2029  }
2030  copyBackOutput = true; // Don't forget to copy back at end.
2031  /*
2032  TPETRA_EFFICIENCY_WARNING(
2033  ! X.isConstantStride (),
2034  std::runtime_error,
2035  "MTGaussSeidel: The current implementation of the Gauss-Seidel "
2036  "kernel requires that X and B both have constant stride. Since X "
2037  "does not have constant stride, we had to make a copy. This is a "
2038  "limitation of the current implementation and not your fault, but we "
2039  "still report it as an efficiency warning for your information.");
2040  */
2041  }
2042  }
2043  else { // Column Map and domain Map are _not_ the same.
2044  updateCachedMultiVector(colMap,as<size_t>(X.getNumVectors()));
2045  X_colMap = cachedMV_;
2046 
2047  X_domainMap = X_colMap->offsetViewNonConst (domainMap, 0);
2048 
2049  if (ZeroStartingSolution_) {
2050  // No need for an Import, since we're filling with zeros.
2051  X_colMap->putScalar (ZERO);
2052  } else {
2053  // We could just copy X into X_domainMap. However, that
2054  // wastes a copy, because the Import also does a copy (plus
2055  // communication). Since the typical use case for
2056  // Gauss-Seidel is a small number of sweeps (2 is typical), we
2057  // don't want to waste that copy. Thus, we do the Import
2058  // here, and skip the first Import in the first sweep.
2059  // Importing directly from X effects the copy into X_domainMap
2060  // (which is a view of X_colMap).
2061  X_colMap->doImport (X, *importer, Tpetra::CombineMode::INSERT);
2062  }
2063  copyBackOutput = true; // Don't forget to copy back at end.
2064  } // if column and domain Maps are (not) the same
2065 
2066  // The Gauss-Seidel / SOR kernel expects multivectors of constant
2067  // stride. X_colMap is by construction, but B might not be. If
2068  // it's not, we have to make a copy.
2069  RCP<const multivector_type> B_in;
2070  if (B.isConstantStride ()) {
2071  B_in = rcpFromRef (B);
2072  }
2073  else {
2074  // Range Map and row Map are the same in this case, so we can
2075  // use the cached row Map multivector to store a constant stride
2076  // copy of B.
2077  RCP<multivector_type> B_in_nonconst = rcp (new multivector_type(rowMap, B.getNumVectors()));
2078  try {
2079  deep_copy (*B_in_nonconst, B);
2080  } catch (std::exception& e) {
2081  std::ostringstream os;
2082  os << "Ifpack2::Relaxation::MTGaussSeidel: "
2083  "deep_copy(*B_in_nonconst, B) threw an exception: "
2084  << e.what () << ".";
2085  TEUCHOS_TEST_FOR_EXCEPTION(true, std::runtime_error, e.what ());
2086  }
2087  B_in = rcp_const_cast<const multivector_type> (B_in_nonconst);
2088 
2089  /*
2090  TPETRA_EFFICIENCY_WARNING(
2091  ! B.isConstantStride (),
2092  std::runtime_error,
2093  "MTGaussSeidel: The current implementation requires that B have "
2094  "constant stride. Since B does not have constant stride, we had to "
2095  "copy it into a separate constant-stride multivector. This is a "
2096  "limitation of the current implementation and not your fault, but we "
2097  "still report it as an efficiency warning for your information.");
2098  */
2099  }
2100 
2101  local_matrix_device_type kcsr = crsMat->getLocalMatrixDevice ();
2102 
2103  bool update_y_vector = true;
2104  //false as it was done up already, and we dont want to zero it in each sweep.
2105  bool zero_x_vector = false;
2106 
2107  for (int sweep = 0; sweep < NumSweeps_; ++sweep) {
2108  if (! importer.is_null () && sweep > 0) {
2109  // We already did the first Import for the zeroth sweep above,
2110  // if it was necessary.
2111  X_colMap->doImport (*X_domainMap, *importer, Tpetra::CombineMode::INSERT);
2112  }
2113 
2114  if (direction == Tpetra::Symmetric) {
2115  KokkosSparse::Experimental::symmetric_gauss_seidel_apply
2116  (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2117  kcsr.graph.row_map, kcsr.graph.entries, kcsr.values,
2118  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2119  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2120  zero_x_vector, update_y_vector, DampingFactor_, 1);
2121  }
2122  else if (direction == Tpetra::Forward) {
2123  KokkosSparse::Experimental::forward_sweep_gauss_seidel_apply
2124  (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2125  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2126  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2127  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2128  zero_x_vector, update_y_vector, DampingFactor_, 1);
2129  }
2130  else if (direction == Tpetra::Backward) {
2131  KokkosSparse::Experimental::backward_sweep_gauss_seidel_apply
2132  (mtKernelHandle_.getRawPtr(), A_->getNodeNumRows(), A_->getNodeNumCols(),
2133  kcsr.graph.row_map,kcsr.graph.entries, kcsr.values,
2134  X_colMap->getLocalViewDevice(Tpetra::Access::ReadWrite),
2135  B_in->getLocalViewDevice(Tpetra::Access::ReadOnly),
2136  zero_x_vector, update_y_vector, DampingFactor_, 1);
2137  }
2138  else {
2139  TEUCHOS_TEST_FOR_EXCEPTION(
2140  true, std::invalid_argument,
2141  prefix << "The 'direction' enum does not have any of its valid "
2142  "values: Forward, Backward, or Symmetric.");
2143  }
2144  update_y_vector = false;
2145  }
2146 
2147  if (copyBackOutput) {
2148  try {
2149  deep_copy (X , *X_domainMap); // Copy result back into X.
2150  } catch (std::exception& e) {
2151  TEUCHOS_TEST_FOR_EXCEPTION(
2152  true, std::runtime_error, prefix << "deep_copy(X, *X_domainMap) "
2153  "threw an exception: " << e.what ());
2154  }
2155  }
2156 
2157  const double dampingFlops = (DampingFactor_ == STS::one ()) ? 0.0 : 1.0;
2158  const double numVectors = as<double> (X.getNumVectors ());
2159  const double numGlobalRows = as<double> (A_->getGlobalNumRows ());
2160  const double numGlobalNonzeros = as<double> (A_->getGlobalNumEntries ());
2161  double ApplyFlops = NumSweeps_ * numVectors *
2162  (2.0 * numGlobalRows + 2.0 * numGlobalNonzeros + dampingFlops);
2163  if (direction == Tpetra::Symmetric)
2164  ApplyFlops *= 2.0;
2165  ApplyFlops_ += ApplyFlops;
2166 }
2167 
2168 template<class MatrixType>
2170 {
2171  std::ostringstream os;
2172 
2173  // Output is a valid YAML dictionary in flow style. If you don't
2174  // like everything on a single line, you should call describe()
2175  // instead.
2176  os << "\"Ifpack2::Relaxation\": {";
2177 
2178  os << "Initialized: " << (isInitialized () ? "true" : "false") << ", "
2179  << "Computed: " << (isComputed () ? "true" : "false") << ", ";
2180 
2181  // It's useful to print this instance's relaxation method (Jacobi,
2182  // Gauss-Seidel, or symmetric Gauss-Seidel). If you want more info
2183  // than that, call describe() instead.
2184  os << "Type: ";
2185  if (PrecType_ == Ifpack2::Details::JACOBI) {
2186  os << "Jacobi";
2187  } else if (PrecType_ == Ifpack2::Details::GS) {
2188  os << "Gauss-Seidel";
2189  } else if (PrecType_ == Ifpack2::Details::SGS) {
2190  os << "Symmetric Gauss-Seidel";
2191  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2192  os << "MT Gauss-Seidel";
2193  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2194  os << "MT Symmetric Gauss-Seidel";
2195  } else if (PrecType_ == Ifpack2::Details::GS2) {
2196  os << "Two-stage Gauss-Seidel";
2197  } else if (PrecType_ == Ifpack2::Details::SGS2) {
2198  os << "Two-stage Symmetric Gauss-Seidel";
2199  }
2200  else {
2201  os << "INVALID";
2202  }
2203  if(hasBlockCrsMatrix_)
2204  os<<", BlockCrs";
2205 
2206  os << ", " << "sweeps: " << NumSweeps_ << ", "
2207  << "damping factor: " << DampingFactor_ << ", ";
2208 
2209  if (PrecType_ == Ifpack2::Details::GS2 ||
2210  PrecType_ == Ifpack2::Details::SGS2) {
2211  os << "outer sweeps: " << NumOuterSweeps_ << ", "
2212  << "inner sweeps: " << NumInnerSweeps_ << ", "
2213  << "inner damping factor: " << InnerDampingFactor_ << ", ";
2214  }
2215 
2216  if (DoL1Method_) {
2217  os << "use l1: " << DoL1Method_ << ", "
2218  << "l1 eta: " << L1Eta_ << ", ";
2219  }
2220 
2221  if (A_.is_null ()) {
2222  os << "Matrix: null";
2223  }
2224  else {
2225  os << "Global matrix dimensions: ["
2226  << A_->getGlobalNumRows () << ", " << A_->getGlobalNumCols () << "]"
2227  << ", Global nnz: " << A_->getGlobalNumEntries();
2228  }
2229 
2230  os << "}";
2231  return os.str ();
2232 }
2233 
2234 
2235 template<class MatrixType>
2236 void
2238 describe (Teuchos::FancyOStream &out,
2239  const Teuchos::EVerbosityLevel verbLevel) const
2240 {
2241  using Teuchos::OSTab;
2242  using Teuchos::TypeNameTraits;
2243  using Teuchos::VERB_DEFAULT;
2244  using Teuchos::VERB_NONE;
2245  using Teuchos::VERB_LOW;
2246  using Teuchos::VERB_MEDIUM;
2247  using Teuchos::VERB_HIGH;
2248  using Teuchos::VERB_EXTREME;
2249  using std::endl;
2250 
2251  const Teuchos::EVerbosityLevel vl =
2252  (verbLevel == VERB_DEFAULT) ? VERB_LOW : verbLevel;
2253 
2254  const int myRank = this->getComm ()->getRank ();
2255 
2256  // none: print nothing
2257  // low: print O(1) info from Proc 0
2258  // medium:
2259  // high:
2260  // extreme:
2261 
2262  if (vl != VERB_NONE && myRank == 0) {
2263  // Describable interface asks each implementation to start with a tab.
2264  OSTab tab1 (out);
2265  // Output is valid YAML; hence the quotes, to protect the colons.
2266  out << "\"Ifpack2::Relaxation\":" << endl;
2267  OSTab tab2 (out);
2268  out << "MatrixType: \"" << TypeNameTraits<MatrixType>::name () << "\""
2269  << endl;
2270  if (this->getObjectLabel () != "") {
2271  out << "Label: " << this->getObjectLabel () << endl;
2272  }
2273  out << "Initialized: " << (isInitialized () ? "true" : "false") << endl
2274  << "Computed: " << (isComputed () ? "true" : "false") << endl
2275  << "Parameters: " << endl;
2276  {
2277  OSTab tab3 (out);
2278  out << "\"relaxation: type\": ";
2279  if (PrecType_ == Ifpack2::Details::JACOBI) {
2280  out << "Jacobi";
2281  } else if (PrecType_ == Ifpack2::Details::GS) {
2282  out << "Gauss-Seidel";
2283  } else if (PrecType_ == Ifpack2::Details::SGS) {
2284  out << "Symmetric Gauss-Seidel";
2285  } else if (PrecType_ == Ifpack2::Details::MTGS) {
2286  out << "MT Gauss-Seidel";
2287  } else if (PrecType_ == Ifpack2::Details::MTSGS) {
2288  out << "MT Symmetric Gauss-Seidel";
2289  } else if (PrecType_ == Ifpack2::Details::GS2) {
2290  out << "Two-stage Gauss-Seidel";
2291  } else if (PrecType_ == Ifpack2::Details::SGS2) {
2292  out << "Two-stage Symmetric Gauss-Seidel";
2293  } else {
2294  out << "INVALID";
2295  }
2296  // We quote these parameter names because they contain colons.
2297  // YAML uses the colon to distinguish key from value.
2298  out << endl
2299  << "\"relaxation: sweeps\": " << NumSweeps_ << endl
2300  << "\"relaxation: damping factor\": " << DampingFactor_ << endl
2301  << "\"relaxation: min diagonal value\": " << MinDiagonalValue_ << endl
2302  << "\"relaxation: zero starting solution\": " << ZeroStartingSolution_ << endl
2303  << "\"relaxation: backward mode\": " << DoBackwardGS_ << endl
2304  << "\"relaxation: use l1\": " << DoL1Method_ << endl
2305  << "\"relaxation: l1 eta\": " << L1Eta_ << endl;
2306  if (PrecType_ == Ifpack2::Details::GS2 || PrecType_ == Ifpack2::Details::SGS2) {
2307  out << "\"relaxation: inner damping factor\": " << InnerDampingFactor_ << endl;
2308  out << "\"relaxation: outer sweeps\" : " << NumOuterSweeps_ << endl;
2309  out << "\"relaxation: inner sweeps\" : " << NumInnerSweeps_ << endl;
2310  }
2311  }
2312  out << "Computed quantities:" << endl;
2313  {
2314  OSTab tab3 (out);
2315  out << "Global number of rows: " << A_->getGlobalNumRows () << endl
2316  << "Global number of columns: " << A_->getGlobalNumCols () << endl;
2317  }
2318  if (checkDiagEntries_ && isComputed ()) {
2319  out << "Properties of input diagonal entries:" << endl;
2320  {
2321  OSTab tab3 (out);
2322  out << "Magnitude of minimum-magnitude diagonal entry: "
2323  << globalMinMagDiagEntryMag_ << endl
2324  << "Magnitude of maximum-magnitude diagonal entry: "
2325  << globalMaxMagDiagEntryMag_ << endl
2326  << "Number of diagonal entries with small magnitude: "
2327  << globalNumSmallDiagEntries_ << endl
2328  << "Number of zero diagonal entries: "
2329  << globalNumZeroDiagEntries_ << endl
2330  << "Number of diagonal entries with negative real part: "
2331  << globalNumNegDiagEntries_ << endl
2332  << "Abs 2-norm diff between computed and actual inverse "
2333  << "diagonal: " << globalDiagNormDiff_ << endl;
2334  }
2335  }
2336  if (isComputed ()) {
2337  out << "Saved diagonal offsets: "
2338  << (savedDiagOffsets_ ? "true" : "false") << endl;
2339  }
2340  out << "Call counts and total times (in seconds): " << endl;
2341  {
2342  OSTab tab3 (out);
2343  out << "initialize: " << endl;
2344  {
2345  OSTab tab4 (out);
2346  out << "Call count: " << NumInitialize_ << endl;
2347  out << "Total time: " << InitializeTime_ << endl;
2348  }
2349  out << "compute: " << endl;
2350  {
2351  OSTab tab4 (out);
2352  out << "Call count: " << NumCompute_ << endl;
2353  out << "Total time: " << ComputeTime_ << endl;
2354  }
2355  out << "apply: " << endl;
2356  {
2357  OSTab tab4 (out);
2358  out << "Call count: " << NumApply_ << endl;
2359  out << "Total time: " << ApplyTime_ << endl;
2360  }
2361  }
2362  }
2363 }
2364 
2365 
2366 } // namespace Ifpack2
2367 
2368 #define IFPACK2_RELAXATION_INSTANT(S,LO,GO,N) \
2369  template class Ifpack2::Relaxation< Tpetra::RowMatrix<S, LO, GO, N> >;
2370 
2371 #endif // IFPACK2_RELAXATION_DEF_HPP
std::string description() const
A simple one-line description of this object.
Definition: Ifpack2_Relaxation_def.hpp:2169
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getRangeMap() const
Returns the Tpetra::Map object associated with the range of this operator.
Definition: Ifpack2_Relaxation_def.hpp:474
int getNumInitialize() const
Total number of calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:490
double getApplyTime() const
Total time in seconds spent in all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:520
Relaxation(const Teuchos::RCP< const row_matrix_type > &A)
Constructor.
Definition: Ifpack2_Relaxation_def.hpp:213
void compute()
Compute the preconditioner ("numeric setup");.
Definition: Ifpack2_Relaxation_def.hpp:988
void apply(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS, scalar_type alpha=Teuchos::ScalarTraits< scalar_type >::one(), scalar_type beta=Teuchos::ScalarTraits< scalar_type >::zero()) const
Apply the preconditioner to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:552
bool hasTransposeApply() const
Whether apply() and applyMat() let you apply the transpose or conjugate transpose.
Definition: Ifpack2_Relaxation_def.hpp:484
int getNumApply() const
Total number of calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:502
double getInitializeTime() const
Total time in seconds spent in all calls to initialize().
Definition: Ifpack2_Relaxation_def.hpp:508
virtual void setMatrix(const Teuchos::RCP< const row_matrix_type > &A)
Change the matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:192
Teuchos::RCP< const Tpetra::Map< local_ordinal_type, global_ordinal_type, node_type > > getDomainMap() const
Returns the Tpetra::Map object associated with the domain of this operator.
Definition: Ifpack2_Relaxation_def.hpp:461
Teuchos::RCP< const row_matrix_type > getMatrix() const
The matrix to be preconditioned.
Definition: Ifpack2_Relaxation_def.hpp:452
MatrixType::node_type node_type
The Node type used by the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:263
void setParameters(const Teuchos::ParameterList &params)
Set the relaxation / preconditioner parameters.
Definition: Ifpack2_Relaxation_def.hpp:431
File for utility functions.
size_t getNodeSmootherComplexity() const
Get a rough estimate of cost per iteration.
Definition: Ifpack2_Relaxation_def.hpp:539
double getApplyFlops() const
Total number of floating-point operations over all calls to apply().
Definition: Ifpack2_Relaxation_def.hpp:532
Teuchos::RCP< const Teuchos::ParameterList > getValidParameters() const
Return a list of all the parameters that this class accepts.
Definition: Ifpack2_Relaxation_def.hpp:225
MatrixType::global_ordinal_type global_ordinal_type
The type of global indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:260
double getComputeFlops() const
Total number of floating-point operations over all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:526
Compute scaled damped residual for Chebyshev.
Definition: Ifpack2_Details_InverseDiagonalKernel_decl.hpp:77
double getComputeTime() const
Total time in seconds spent in all calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:514
int getNumCompute() const
Total number of calls to compute().
Definition: Ifpack2_Relaxation_def.hpp:496
MatrixType::local_ordinal_type local_ordinal_type
The type of local indices in the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:257
Teuchos::RCP< const Teuchos::Comm< int > > getComm() const
The communicator over which the matrix and vectors are distributed.
Definition: Ifpack2_Relaxation_def.hpp:441
MatrixType::scalar_type scalar_type
The type of the entries of the input MatrixType.
Definition: Ifpack2_Relaxation_decl.hpp:254
void applyMat(const Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &X, Tpetra::MultiVector< scalar_type, local_ordinal_type, global_ordinal_type, node_type > &Y, Teuchos::ETransp mode=Teuchos::NO_TRANS) const
Apply the input matrix to X, returning the result in Y.
Definition: Ifpack2_Relaxation_def.hpp:657
Relaxation preconditioners for Tpetra::RowMatrix and Tpetra::CrsMatrix sparse matrices.
Definition: Ifpack2_Relaxation_decl.hpp:237
void describe(Teuchos::FancyOStream &out, const Teuchos::EVerbosityLevel verbLevel=Teuchos::Describable::verbLevel_default) const
Print the object&#39;s attributes to the given output stream.
Definition: Ifpack2_Relaxation_def.hpp:2238
Preconditioners and smoothers for Tpetra sparse matrices.
Definition: Ifpack2_AdditiveSchwarz_decl.hpp:73
void getValidParameters(Teuchos::ParameterList &params)
Fills a list which contains all the parameters possibly used by Ifpack2.
Definition: Ifpack2_Parameters.cpp:51
void initialize()
Initialize the preconditioner ("symbolic setup").
Definition: Ifpack2_Relaxation_def.hpp:678
Tpetra::RowMatrix< scalar_type, local_ordinal_type, global_ordinal_type, node_type > row_matrix_type
Tpetra::RowMatrix specialization used by this class.
Definition: Ifpack2_Relaxation_decl.hpp:273
Teuchos::ScalarTraits< scalar_type >::magnitudeType magnitude_type
The type of the magnitude (absolute value) of a matrix entry.
Definition: Ifpack2_Relaxation_decl.hpp:269