MueLu  Version of the Day
MueLu_LocalPermutationStrategy_def.hpp
Go to the documentation of this file.
1 /*
2  * MueLu_LocalPermutationStrategy_def.hpp
3  *
4  * Created on: Feb 19, 2013
5  * Author: tobias
6  */
7 
8 #ifndef MUELU_LOCALPERMUTATIONSTRATEGY_DEF_HPP_
9 #define MUELU_LOCALPERMUTATIONSTRATEGY_DEF_HPP_
10 
11 #include <algorithm>
12 
13 #include <Xpetra_MultiVector.hpp>
14 #include <Xpetra_Matrix.hpp>
15 #include <Xpetra_MatrixMatrix.hpp>
16 #include <Xpetra_CrsGraph.hpp>
17 #include <Xpetra_Vector.hpp>
18 #include <Xpetra_VectorFactory.hpp>
19 #include <Xpetra_CrsMatrixWrap.hpp>
20 
21 #include "MueLu_Utilities.hpp"
23 
24 namespace MueLu {
25 
26  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
28 
29  permWidth_ = nDofsPerNode;
30 
31  result_permvecs_.clear();
32 
33  // build permutation string
34  std::stringstream ss;
35  for(size_t t = 0; t<nDofsPerNode; t++)
36  ss << t;
37  std::string cs = ss.str();
38  //std::vector<std::string> result_perms;
39  do {
40  //result_perms.push_back(cs);
41 
42  std::vector<int> newPerm(cs.length(),-1);
43  for(size_t len=0; len<cs.length(); len++) {
44  newPerm[len] = Teuchos::as<int>(cs[len]-'0');
45  }
46  result_permvecs_.push_back(newPerm);
47 
48  } while (std::next_permutation(cs.begin(),cs.end()));
49  }
50 
51  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
52  void LocalPermutationStrategy<Scalar, LocalOrdinal, GlobalOrdinal, Node>::BuildPermutation(const Teuchos::RCP<Matrix> & A, const Teuchos::RCP<const Map> permRowMap, Level & currentLevel, const FactoryBase* genFactory) const {
53  size_t nDofsPerNode = 1;
54  if (A->IsView("stridedMaps")) {
55  Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
56  nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
57  }
58 
59  RCP<const Teuchos::Comm<int> > comm = A->getRowMap()->getComm();
60 
62  std::vector<std::pair<GlobalOrdinal,GlobalOrdinal> > RowColPairs;
63 
64  // check whether we have to (re)build the permutation vector
65  if(permWidth_ != nDofsPerNode)
66  BuildPermutations(nDofsPerNode);
67 
68  // declare local variables used inside loop
69  LocalOrdinal lonDofsPerNode = Teuchos::as<LocalOrdinal> (nDofsPerNode);
70  Teuchos::ArrayView<const LocalOrdinal> indices;
71  Teuchos::ArrayView<const Scalar> vals;
72  Teuchos::SerialDenseMatrix<LocalOrdinal,Scalar> subBlockMatrix(nDofsPerNode, nDofsPerNode, true);
73  std::vector<GlobalOrdinal> growIds(nDofsPerNode);
74 
75  // loop over local nodes
76  // TODO what about nOffset?
77  LocalOrdinal numLocalNodes = A->getRowMap()->getNodeNumElements()/nDofsPerNode;
78  for (LocalOrdinal node = 0; node < numLocalNodes; ++node) {
79 
80  // zero out block matrix
81  subBlockMatrix.putScalar();
82 
83  // loop over all DOFs in current node
84  // Note: were assuming constant number of Dofs per node here!
85  // TODO This is more complicated for variable dofs per node
86  for (LocalOrdinal lrdof = 0; lrdof < lonDofsPerNode; ++lrdof) {
87  GlobalOrdinal grow = getGlobalDofId(A, node, lrdof);
88  growIds[lrdof] = grow;
89 
90  // extract local row information from matrix
91  A->getLocalRowView(A->getRowMap()->getLocalElement(grow), indices, vals);
92 
93  // find column entry with max absolute value
94  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
95  MT maxVal = 0.0;
96  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
97  if(Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]) > maxVal) {
98  maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(vals[j]);
99  }
100  }
101 
102  GlobalOrdinal grnodeid = globalDofId2globalNodeId(A,grow);
103 
104  for (size_t j = 0; j < Teuchos::as<size_t>(indices.size()); j++) {
105  GlobalOrdinal gcol = A->getColMap()->getGlobalElement(indices[j]);
106  GlobalOrdinal gcnodeid = globalDofId2globalNodeId(A,gcol); // -> global node id
107  if (grnodeid == gcnodeid) {
108  if(maxVal != Teuchos::ScalarTraits<MT>::zero ()) {
109  subBlockMatrix(lrdof, gcol % nDofsPerNode) = vals[j]/maxVal;
110  } else
111  {
112  subBlockMatrix(lrdof, gcol % nDofsPerNode) = vals[j]; // there is a problem
113  std::cout << "maxVal never should be zero!!!!" << std::endl;
114  }
115  }
116  }
117  }
118 
119  // now we have the sub block matrix
120 
121  // build permutation string
122  /*std::stringstream ss;
123  for(size_t t = 0; t<nDofsPerNode; t++)
124  ss << t;
125  std::string cs = ss.str();
126  std::vector<std::string> result_perms;
127  do {
128  result_perms.push_back(cs);
129  //std::cout << result_perms.back() << std::endl;
130  } while (std::next_permutation(cs.begin(),cs.end()));*/
131 
132  std::vector<Scalar> performance_vector = std::vector<Scalar>(result_permvecs_.size());
133  for (size_t t = 0; t < result_permvecs_.size(); t++) {
134  std::vector<int> vv = result_permvecs_[t];
135  Scalar value = 1.0;
136  for(size_t j=0; j<vv.size(); j++) {
137  value = value * subBlockMatrix(j,vv[j]);
138  }
139  performance_vector[t] = value;
140  }
141  /*for(size_t t = 0; t < result_perms.size(); t++) {
142  std::string s = result_perms[t];
143  Scalar value = 1.0;
144  for(size_t len=0; len<s.length(); len++) {
145  int col = Teuchos::as<int>(s[len]-'0');
146  value = value * subBlockMatrix(len,col);
147  }
148  performance_vector[t] = value;
149  }*/
150 
151  // find permutation with maximum performance value
152  typedef typename Teuchos::ScalarTraits<Scalar>::magnitudeType MT;
153  MT maxVal = Teuchos::ScalarTraits<MT>::zero();
154  size_t maxPerformancePermutationIdx = 0;
155  for (size_t j = 0; j < Teuchos::as<size_t>(performance_vector.size()); j++) {
156  if(Teuchos::ScalarTraits<Scalar>::magnitude(performance_vector[j]) > maxVal) {
157  maxVal = Teuchos::ScalarTraits<Scalar>::magnitude(performance_vector[j]);
158  maxPerformancePermutationIdx = j;
159  }
160  }
161 
162  // build RowColPairs for best permutation
163  std::vector<int> bestPerformancePermutation = result_permvecs_[maxPerformancePermutationIdx];
164  for(size_t t = 0; t<nDofsPerNode; t++) {
165  RowColPairs.push_back(std::make_pair(growIds[t],growIds[bestPerformancePermutation[t]]));
166  }
167 
168  } // end loop over local nodes
169 
170 
171  // build Pperm and Qperm vectors
172  Teuchos::RCP<Vector> Pperm = VectorFactory::Build(A->getRowMap());
173  Teuchos::RCP<Vector> Qperm = VectorFactory::Build(A->getDomainMap());
174 
175  Pperm->putScalar(0.0);
176  Qperm->putScalar(0.0);
177 
178  Teuchos::ArrayRCP<Scalar> PpermData = Pperm->getDataNonConst(0);
179  Teuchos::ArrayRCP<Scalar> QpermData = Qperm->getDataNonConst(0);
180 
181  typename std::vector<std::pair<GlobalOrdinal, GlobalOrdinal> >::iterator p = RowColPairs.begin();
182  while(p != RowColPairs.end() ) {
183  GlobalOrdinal ik = (*p).first;
184  GlobalOrdinal jk = (*p).second;
185 
186  LocalOrdinal lik = A->getRowMap()->getLocalElement(ik);
187  LocalOrdinal ljk = A->getDomainMap()->getLocalElement(jk);
188 
189  Pperm->replaceLocalValue(lik,ik);
190  Qperm->replaceLocalValue(ljk,ik);
191 
192  p = RowColPairs.erase(p);
193  }
194 
195  if(RowColPairs.size()>0) GetOStream(Warnings0) << "MueLu::LocalPermutationStrategy: There are Row/col pairs left!" << std::endl;
196 
197  // Qperm should be fine
198  // build matrices
199 
200  // create new empty Matrix
201  Teuchos::RCP<CrsMatrixWrap> permPTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile));
202  Teuchos::RCP<CrsMatrixWrap> permQTmatrix = Teuchos::rcp(new CrsMatrixWrap(A->getRowMap(),1,Xpetra::StaticProfile));
203 
204  for(size_t row=0; row<A->getNodeNumRows(); row++) {
205  Teuchos::ArrayRCP<GlobalOrdinal> indoutP(1,Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(PpermData[row]))); // column idx for Perm^T
206  Teuchos::ArrayRCP<GlobalOrdinal> indoutQ(1,Teuchos::as<GO>(Teuchos::ScalarTraits<Scalar>::real(QpermData[row]))); // column idx for Qperm
207  Teuchos::ArrayRCP<Scalar> valout(1,Teuchos::ScalarTraits<Scalar>::one());
208  permPTmatrix->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indoutP.view(0,indoutP.size()), valout.view(0,valout.size()));
209  permQTmatrix->insertGlobalValues (A->getRowMap()->getGlobalElement(row), indoutQ.view(0,indoutQ.size()), valout.view(0,valout.size()));
210  }
211 
212  permPTmatrix->fillComplete();
213  permQTmatrix->fillComplete();
214 
215  Teuchos::RCP<Matrix> permPmatrix = Utilities::Transpose(*permPTmatrix,true);
216 
217  /*for(size_t row=0; row<permPTmatrix->getNodeNumRows(); row++) {
218  if(permPTmatrix->getNumEntriesInLocalRow(row) != 1)
219  GetOStream(Warnings0) <<"#entries in row " << row << " of permPTmatrix is " << permPTmatrix->getNumEntriesInLocalRow(row) << std::endl;
220  if(permPmatrix->getNumEntriesInLocalRow(row) != 1)
221  GetOStream(Warnings0) <<"#entries in row " << row << " of permPmatrix is " << permPmatrix->getNumEntriesInLocalRow(row) << std::endl;
222  if(permQTmatrix->getNumEntriesInLocalRow(row) != 1)
223  GetOStream(Warnings0) <<"#entries in row " << row << " of permQmatrix is " << permQTmatrix->getNumEntriesInLocalRow(row) << std::endl;
224  }*/
225 
226  // build permP * A * permQT
227  Teuchos::RCP<Matrix> ApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*A, false, *permQTmatrix, false, GetOStream(Statistics2),true,true);
228  Teuchos::RCP<Matrix> permPApermQt = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*permPmatrix, false, *ApermQt, false, GetOStream(Statistics2),true,true);
229 
230  /*
231  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("A.mat", *A);
232  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permP.mat", *permPmatrix);
233  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permQt.mat", *permQTmatrix);
234  MueLu::Utils<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Write("permPApermQt.mat", *permPApermQt);
235  */
236  // build scaling matrix
237  Teuchos::RCP<Vector> diagVec = VectorFactory::Build(permPApermQt->getRowMap(),true);
238  Teuchos::RCP<Vector> invDiagVec = VectorFactory::Build(permPApermQt->getRowMap(),true);
239  Teuchos::ArrayRCP< const Scalar > diagVecData = diagVec->getData(0);
240  Teuchos::ArrayRCP< Scalar > invDiagVecData = invDiagVec->getDataNonConst(0);
241 
242  LO lCntZeroDiagonals = 0;
243  permPApermQt->getLocalDiagCopy(*diagVec);
244  for(size_t i = 0; i<diagVec->getMap()->getNodeNumElements(); ++i) {
245  if(diagVecData[i] != 0.0)
246  invDiagVecData[i] = Teuchos::ScalarTraits<Scalar>::one()/diagVecData[i];
247  else {
248  invDiagVecData[i] = Teuchos::ScalarTraits<Scalar>::one();
249  lCntZeroDiagonals++;
250  //GetOStream(Statistics0) << "MueLu::LocalPermutationStrategy: found zero on diagonal in row " << i << std::endl;
251  }
252  }
253 
254 #if 0
255  GO gCntZeroDiagonals = 0;
256  GO glCntZeroDiagonals = Teuchos::as<GlobalOrdinal>(lCntZeroDiagonals); /* LO->GO conversion */
257  MueLu_sumAll(comm,glCntZeroDiagonals,gCntZeroDiagonals);
258  GetOStream(Statistics0) << "MueLu::LocalPermutationStrategy: found " << gCntZeroDiagonals << " zeros on diagonal" << std::endl;
259 #endif
260 
261  Teuchos::RCP<CrsMatrixWrap> diagScalingOp = Teuchos::rcp(new CrsMatrixWrap(permPApermQt->getRowMap(),1,Xpetra::StaticProfile));
262 
263  for(size_t row=0; row<A->getNodeNumRows(); row++) {
264  Teuchos::ArrayRCP<GlobalOrdinal> indout(1,permPApermQt->getRowMap()->getGlobalElement(row)); // column idx for Perm^T
265  Teuchos::ArrayRCP<Scalar> valout(1,invDiagVecData[row]);
266  diagScalingOp->insertGlobalValues(A->getRowMap()->getGlobalElement(row), indout.view(0,indout.size()), valout.view(0,valout.size()));
267  }
268  diagScalingOp->fillComplete();
269 
270  Teuchos::RCP<Matrix> scaledA = Xpetra::MatrixMatrix<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Multiply(*diagScalingOp, false, *permPApermQt, false, GetOStream(Statistics2), true, true);
271  currentLevel.Set("A", Teuchos::rcp_dynamic_cast<Matrix>(scaledA), genFactory);
272 
273  currentLevel.Set("permA", Teuchos::rcp_dynamic_cast<Matrix>(permPApermQt), genFactory);
274  currentLevel.Set("permP", Teuchos::rcp_dynamic_cast<Matrix>(permPmatrix), genFactory);
275  currentLevel.Set("permQT", Teuchos::rcp_dynamic_cast<Matrix>(permQTmatrix), genFactory);
276  currentLevel.Set("permScaling", Teuchos::rcp_dynamic_cast<Matrix>(diagScalingOp), genFactory);
277 
279  // count zeros on diagonal in P -> number of row permutations
280  Teuchos::RCP<Vector> diagPVec = VectorFactory::Build(permPmatrix->getRowMap(),true);
281  permPmatrix->getLocalDiagCopy(*diagPVec);
282  Teuchos::ArrayRCP< const Scalar > diagPVecData = diagPVec->getData(0);
283  GlobalOrdinal lNumRowPermutations = 0;
284  GlobalOrdinal gNumRowPermutations = 0;
285  for(size_t i = 0; i<diagPVec->getMap()->getNodeNumElements(); ++i) {
286  if(diagPVecData[i] == 0.0) {
287  lNumRowPermutations++;
288  }
289  }
290 
291  // sum up all entries in multipleColRequests over all processors
292  MueLu_sumAll(diagPVec->getMap()->getComm(), lNumRowPermutations, gNumRowPermutations);
293 
295  // count zeros on diagonal in Q^T -> number of column permutations
296  Teuchos::RCP<Vector> diagQTVec = VectorFactory::Build(permQTmatrix->getRowMap(),true);
297  permQTmatrix->getLocalDiagCopy(*diagQTVec);
298  Teuchos::ArrayRCP< const Scalar > diagQTVecData = diagQTVec->getData(0);
299  GlobalOrdinal lNumColPermutations = 0;
300  GlobalOrdinal gNumColPermutations = 0;
301  for(size_t i = 0; i<diagQTVec->getMap()->getNodeNumElements(); ++i) {
302  if(diagQTVecData[i] == 0.0) {
303  lNumColPermutations++;
304  }
305  }
306 
307  // sum up all entries in multipleColRequests over all processors
308  MueLu_sumAll(diagQTVec->getMap()->getComm(), lNumColPermutations, gNumColPermutations);
309 
310  currentLevel.Set("#RowPermutations", gNumRowPermutations, genFactory/*this*/);
311  currentLevel.Set("#ColPermutations", gNumColPermutations, genFactory/*this*/);
312  currentLevel.Set("#WideRangeRowPermutations", 0, genFactory/*this*/);
313  currentLevel.Set("#WideRangeColPermutations", 0, genFactory/*this*/);
314 
315  GetOStream(Statistics0) << "#Row permutations/max possible permutations: " << gNumRowPermutations << "/" << diagPVec->getMap()->getGlobalNumElements() << std::endl;
316  GetOStream(Statistics0) << "#Column permutations/max possible permutations: " << gNumColPermutations << "/" << diagQTVec->getMap()->getGlobalNumElements() << std::endl;
317  }
318 
319  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
320  GlobalOrdinal LocalPermutationStrategy<Scalar, LocalOrdinal, GlobalOrdinal, Node>::getGlobalDofId(const Teuchos::RCP<Matrix> & A, LocalOrdinal localNodeId, LocalOrdinal localDof) const {
321  size_t nDofsPerNode = 1;
322  if (A->IsView("stridedMaps")) {
323  Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
324  nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
325  }
326 
327  LocalOrdinal localDofId = localNodeId * nDofsPerNode + localDof;
328 
329  return A->getRowMap()->getGlobalElement(localDofId);
330  }
331 
332  template<class Scalar, class LocalOrdinal, class GlobalOrdinal, class Node>
333  GlobalOrdinal LocalPermutationStrategy<Scalar, LocalOrdinal, GlobalOrdinal, Node>::globalDofId2globalNodeId( const Teuchos::RCP<Matrix> & A, GlobalOrdinal grid ) const {
334  size_t nDofsPerNode = 1;
335  if (A->IsView("stridedMaps")) {
336  Teuchos::RCP<const Map> permRowMapStrided = A->getRowMap("stridedMaps");
337  nDofsPerNode = Teuchos::rcp_dynamic_cast<const StridedMap>(permRowMapStrided)->getFixedBlockSize();
338  }
339 
340  return (GlobalOrdinal) grid / (GlobalOrdinal)nDofsPerNode; // TODO what about nOffset???
341  }
342 
343 } // namespace MueLu
344 
345 
346 #endif /* MUELU_LOCALPERMUTATIONSTRATEGY_DEF_HPP_ */
Important warning messages (one line)
#define MueLu_sumAll(rcpComm, in, out)
static RCP< Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > > Transpose(Xpetra::Matrix< Scalar, LocalOrdinal, GlobalOrdinal, Node > &Op, bool optimizeTranspose=false, const std::string &label=std::string())
GlobalOrdinal GO
LocalOrdinal LO
Namespace for MueLu classes and methods.
GlobalOrdinal getGlobalDofId(const Teuchos::RCP< Matrix > &A, LocalOrdinal localNodeId, LocalOrdinal localDof) const
Print even more statistics.
Base class for factories (e.g., R, P, and A_coarse).
Print statistics that do not involve significant additional computation.
void BuildPermutations(size_t nDofsPerNode) const
Class that holds all level-specific information.
Definition: MueLu_Level.hpp:99
void Set(const std::string &ename, const T &entry, const FactoryBase *factory=NoFactory::get())
static void Multiply(const Matrix &A, bool transposeA, const Matrix &B, bool transposeB, Matrix &C, bool call_FillComplete_on_result=true, bool doOptimizeStorage=true, const std::string &label=std::string(), const RCP< ParameterList > &params=null)
GlobalOrdinal globalDofId2globalNodeId(const Teuchos::RCP< Matrix > &A, GlobalOrdinal grid) const
void BuildPermutation(const Teuchos::RCP< Matrix > &A, const Teuchos::RCP< const Map > permRowMap, Level &currentLevel, const FactoryBase *genFactory) const
build permutation operators