Compadre  1.3.3
Compadre_GMLS_ApplyTargetEvaluations.hpp
Go to the documentation of this file.
1 #ifndef _COMPADRE_GMLS_APPLY_TARGET_EVALUATIONS_HPP_
2 #define _COMPADRE_GMLS_APPLY_TARGET_EVALUATIONS_HPP_
3 
4 #include "Compadre_GMLS.hpp"
5 namespace Compadre {
6 
7 KOKKOS_INLINE_FUNCTION
9 
10  const int target_index = _initial_index_for_batch + teamMember.league_rank();
11 
12 #if defined(COMPADRE_USE_CUDA)
13 // // GPU
14 // for (int j=0; j<_operations.size(); ++j) {
15 // for (int k=0; k<_lro_output_tile_size[j]; ++k) {
16 // for (int m=0; m<_lro_input_tile_size[j]; ++m) {
17 // const int alpha_offset = (_lro_total_offsets[j] + m*_lro_output_tile_size[j] + k)*_neighbor_lists(target_index,0);
18 // const int P_offset =_basis_multiplier*target_NP*(_lro_total_offsets[j] + m*_lro_output_tile_size[j] + k);
19 // Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,
20 // this->getNNeighbors(target_index)), [=] (const int i) {
21 //
22 // double alpha_ij = 0;
23 // if (_sampling_multiplier>1 && m<_sampling_multiplier) {
24 // const int m_neighbor_offset = i+m*this->getNNeighbors(target_index);
25 // Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember,
26 // _basis_multiplier*target_NP), [=] (const int l, double &talpha_ij) {
27 // //for (int l=0; l<_basis_multiplier*target_NP; ++l) {
28 // talpha_ij += P_target_row(P_offset + l)*Q(ORDER_INDICES(m_neighbor_offset,l));
29 // }, alpha_ij);
30 // //}
31 // } else if (_sampling_multiplier == 1) {
32 // Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember,
33 // _basis_multiplier*target_NP), [=] (const int l, double &talpha_ij) {
34 // //for (int l=0; l<_basis_multiplier*target_NP; ++l) {
35 // talpha_ij += P_target_row(P_offset + l)*Q(ORDER_INDICES(i,l));
36 // }, alpha_ij);
37 // //}
38 // }
39 // Kokkos::single(Kokkos::PerThread(teamMember), [&] () {
40 // t1(i) = alpha_ij;
41 // });
42 // });
43 // Kokkos::parallel_for(Kokkos::ThreadVectorRange(teamMember,
44 // this->getNNeighbors(target_index)), [=] (const int i) {
45 // _alphas(ORDER_INDICES(target_index, alpha_offset + i)) = t1(i);
46 // });
47 // teamMember.team_barrier();
48 // }
49 // }
50 // }
51 
52  // GPU
53  for (int e=0; e<getNEvaluationSitesPerTarget(target_index); ++e) {
54  for (int j=0; j<(int)_operations.size(); ++j) {
55  for (int k=0; k<_lro_output_tile_size[j]; ++k) {
56  for (int m=0; m<_lro_input_tile_size[j]; ++m) {
57  int offset_index_jmke = getTargetOffsetIndexDevice(j,m,k,e);
58  int alphas_index = getAlphaIndexDevice(target_index, offset_index_jmke);
59  Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,
60  this->getNNeighbors(target_index) + _added_alpha_size), [&] (const int i) {
61  double alpha_ij = 0;
63  const int m_neighbor_offset = i+m*this->getNNeighbors(target_index);
64  Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember, _basis_multiplier*target_NP),
65  [=] (int& l, double& t_alpha_ij) {
66  t_alpha_ij += P_target_row(offset_index_jmke, l)*Q(l, m_neighbor_offset);
67 
68  compadre_kernel_assert_extreme_debug(P_target_row(offset_index_jmke, l)==P_target_row(offset_index_jmke, l)
69  && "NaN in P_target_row matrix.");
70  compadre_kernel_assert_extreme_debug(Q(l, m_neighbor_offset)==Q(l, m_neighbor_offset)
71  && "NaN in Q coefficient matrix.");
72 
73  }, alpha_ij);
74  } else if (_sampling_multiplier == 1) {
75  Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember, _basis_multiplier*target_NP),
76  [=] (int& l, double& t_alpha_ij) {
77  t_alpha_ij += P_target_row(offset_index_jmke, l)*Q(l,i);
78 
79  compadre_kernel_assert_extreme_debug(P_target_row(offset_index_jmke, l)==P_target_row(offset_index_jmke, l)
80  && "NaN in P_target_row matrix.");
82  && "NaN in Q coefficient matrix.");
83 
84  }, alpha_ij);
85  }
86  Kokkos::single(Kokkos::PerThread(teamMember), [=] () {
87  //_alphas(target_index, offset_index_jmke, i) = alpha_ij;
88  _alphas(alphas_index+i) = alpha_ij;
89  compadre_kernel_assert_extreme_debug(alpha_ij==alpha_ij && "NaN in alphas.");
90  });
91  });
92 
93  }
94  }
95  }
96  }
97 #else
98 
99  // CPU
100  const int alphas_per_tile_per_target = _neighbor_lists.getNumberOfNeighborsDevice(target_index) + _added_alpha_size;
101  const global_index_type base_offset_index_jmke = getTargetOffsetIndexDevice(0,0,0,0);
102  const global_index_type base_alphas_index = getAlphaIndexDevice(target_index, base_offset_index_jmke);
103 
104  scratch_matrix_right_type this_alphas(_alphas.data() + TO_GLOBAL(base_alphas_index), _total_alpha_values*_max_evaluation_sites_per_target, alphas_per_tile_per_target);
105 
106  for (int e=0; e<this->getNEvaluationSitesPerTarget(target_index); ++e) {
107  // evaluating alpha_ij
108  for (size_t j=0; j<_operations.size(); ++j) {
109  for (int k=0; k<_lro_output_tile_size[j]; ++k) {
110  for (int m=0; m<_lro_input_tile_size[j]; ++m) {
111  double alpha_ij = 0;
112  int offset_index_jmke = getTargetOffsetIndexDevice(j,m,k,e);
113  for (int i=0; i<this->getNNeighbors(target_index) + _added_alpha_size; ++i) {
114  Kokkos::parallel_reduce(Kokkos::TeamThreadRange(teamMember,
115  _basis_multiplier*target_NP), [&] (const int l, double &talpha_ij) {
117 
118  talpha_ij += P_target_row(offset_index_jmke, l)*Q(l, i+m*this->getNNeighbors(target_index));
119 
120  compadre_kernel_assert_extreme_debug(P_target_row(offset_index_jmke, l)==P_target_row(offset_index_jmke, l)
121  && "NaN in P_target_row matrix.");
122  compadre_kernel_assert_extreme_debug(Q(l, i+m*this->getNNeighbors(target_index))==Q(l, i+m*this->getNNeighbors(target_index))
123  && "NaN in Q coefficient matrix.");
124 
125  } else if (_sampling_multiplier == 1) {
126 
127  talpha_ij += P_target_row(offset_index_jmke, l)*Q(l, i);
128 
129  compadre_kernel_assert_extreme_debug(P_target_row(offset_index_jmke, l)==P_target_row(offset_index_jmke, l)
130  && "NaN in P_target_row matrix.");
132  && "NaN in Q coefficient matrix.");
133 
134  } else {
135  talpha_ij += 0;
136  }
137  }, alpha_ij);
138  Kokkos::single(Kokkos::PerTeam(teamMember), [&] () {
139  this_alphas(offset_index_jmke,i) = alpha_ij;
140  compadre_kernel_assert_extreme_debug(alpha_ij==alpha_ij && "NaN in alphas.");
141  });
142  }
143  }
144  }
145  }
146  }
147 #endif
148 
149  teamMember.team_barrier();
150 }
151 
152 } // Compadre
153 #endif
Kokkos::View< int * > _lro_input_tile_size
dimensions ^ rank of tensor of output for each sampling functional (device)
Kokkos::View< double *, layout_right > _alphas
generated alpha coefficients (device)
KOKKOS_INLINE_FUNCTION int getNumberOfNeighborsDevice(int target_index) const
Get number of neighbors for a given target (device)
std::size_t global_index_type
Kokkos::View< TargetOperation * > _operations
vector containing target functionals to be applied for reconstruction problem (device) ...
int _max_evaluation_sites_per_target
maximum number of evaluation sites for each target (includes target site)
team_policy::member_type member_type
#define compadre_kernel_assert_extreme_debug(condition)
int _total_alpha_values
used for sizing P_target_row and the _alphas view
int _sampling_multiplier
actual dimension of the sampling functional e.g.
NeighborLists< Kokkos::View< int * > > _neighbor_lists
Accessor to get neighbor list data, offset data, and number of neighbors per target.
Kokkos::View< int * > _lro_output_tile_size
dimensions ^ rank of tensor of output for each target functional (device)
KOKKOS_INLINE_FUNCTION int getNNeighbors(const int target_index) const
Returns number of neighbors for a particular target.
KOKKOS_INLINE_FUNCTION int getNEvaluationSitesPerTarget(const int target_index) const
(OPTIONAL) Returns number of additional evaluation sites for a particular target
#define TO_GLOBAL(variable)
int _initial_index_for_batch
initial index for current batch
KOKKOS_INLINE_FUNCTION int getTargetOffsetIndexDevice(const int lro_num, const int input_component, const int output_component, const int additional_evaluation_local_index=0) const
Handles offset from operation input/output + extra evaluation sites.
Kokkos::View< double **, layout_right, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_matrix_right_type
Kokkos::View< double *, Kokkos::MemoryTraits< Kokkos::Unmanaged > > scratch_vector_type
int _added_alpha_size
additional alpha coefficients due to constraints
int _basis_multiplier
dimension of the reconstructed function e.g.
KOKKOS_INLINE_FUNCTION global_index_type getAlphaIndexDevice(const int target_index, const int alpha_column_offset) const
Gives index into alphas given two axes, which when incremented by the neighbor number transforms acce...
KOKKOS_INLINE_FUNCTION void applyTargetsToCoefficients(const member_type &teamMember, scratch_vector_type t1, scratch_vector_type t2, scratch_matrix_right_type Q, scratch_vector_type w, scratch_matrix_right_type P_target_row, const int target_NP) const
Helper function for applying the evaluations from a target functional to the polynomial coefficients...