anneal_para.h
1 /*
2  -------------------------------------------------------------------
3 
4  Copyright (C) 2017-2018, Andrew W. Steiner
5 
6  This file is part of O2scl.
7 
8  O2scl is free software; you can redistribute it and/or modify
9  it under the terms of the GNU General Public License as published by
10  the Free Software Foundation; either version 3 of the License, or
11  (at your option) any later version.
12 
13  O2scl is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16  GNU General Public License for more details.
17 
18  You should have received a copy of the GNU General Public License
19  along with O2scl. If not, see <http://www.gnu.org/licenses/>.
20 
21  -------------------------------------------------------------------
22 */
23 #ifndef O2SCL_ANNEAL_PARA_H
24 #define O2SCL_ANNEAL_PARA_H
25 
26 #ifdef O2SCL_OPENMP
27 #include <omp.h>
28 #endif
29 
30 #include <o2scl/anneal_gsl.h>
31 #include <o2scl/vector.h>
32 
33 #ifndef DOXYGEN_NO_O2NS
34 namespace o2scl {
35 #endif
36 
37  /** \brief Multidimensional minimization by simulated annealing
38  (OpenMP/MPI version)
39 
40  This class works particularly well for functions which are not
41  trivial to evaluate, i.e. functions where the execution time is
42  more longer than the bookkeeping that \ref anneal_para performs
43  between trials. For functions which satisfy this requirement,
44  this algorithm scales nearly linearly with the number of
45  processors.
46 
47  Verbose I/O for this class happens only outside the parallel
48  regions unless the user places I/O in the streams in the
49  function that is specified.
50  */
51  template<class func_t=multi_funct,
53  class anneal_para : public anneal_gsl<func_t,vec_t> {
54 
55  public:
56 
58 
59  /// The number of OpenMP threads
60  size_t n_threads;
61 
62  /// The MPI starting time
63  double start_time;
64 
65  /// The maximum time
66  double max_time;
67 
68  /** \brief If true, obtain the global minimum over all MPI ranks
69  (default true)
70  */
72 
73  anneal_para() {
74  n_threads=1;
75  start_time=0.0;
76  max_time=0.0;
77  collect_all_ranks=true;
78  }
79 
80  virtual ~anneal_para() {
81  }
82 
83  std::vector<rng_gsl> vrng;
84 
85  /** \brief Make a step to a new attempted minimum
86  */
87  virtual int step(vec_t &x, vec_t &sx, int nvar, size_t ithread) {
88  size_t nstep=this->step_vec.size();
89  for(int i=0;i<nvar;i++) {
90  double u=vrng[ithread].random();
91 
92  // Construct the step in the ith direction
93  double step_i=this->step_norm*this->step_vec[i%nstep];
94  // Fix if the step is too small
95  if (step_i<this->tol_abs*this->min_step_ratio) {
96  step_i=this->tol_abs*this->min_step_ratio;
97  }
98 
99  sx[i]=(2.0*u-1.0)*step_i+x[i];
100  }
101  return 0;
102  }
103 
104  /// \name Basic usage
105  //@{
106  /** \brief Desc
107  */
108  virtual int mmin(size_t nv, vec_t &x0, double &fmin,
109  std::vector<func_t> &func) {
110 
111  // Check that we have at least one variable
112  if (nv==0) {
113  O2SCL_ERR2("Tried to minimize over zero variables ",
114  " in anneal_para::mmin().",exc_einval);
115  }
116 
117  // Check that enough function objects were passed
118  if (func.size()<n_threads) {
119  if (this->verbose>0) {
120  std::cout << "mcmc_para::mcmc(): Not enough functions for "
121  << n_threads << " threads. Setting n_threads to "
122  << func.size() << "." << std::endl;
123  }
124  n_threads=func.size();
125  }
126 
127  // Set number of threads
128 #ifdef O2SCL_OPENMP
129  omp_set_num_threads(n_threads);
130 #pragma omp parallel
131  {
132  n_threads=omp_get_num_threads();
133  }
134 #else
135  n_threads=1;
136 #endif
137 
138  // Set starting time
139 #ifdef O2SCL_MPI
140  start_time=MPI_Wtime();
141 #else
142  start_time=time(0);
143 #endif
144 
145  // Initial value of step normalization
146  this->step_norm=1.0;
147 
148  std::vector<vec_t> x(n_threads), new_x(n_threads);
149  std::vector<vec_t> old_x(n_threads), best_x(n_threads);
150 
151  ubvector E(n_threads), new_E(n_threads), best_E(n_threads);
152  ubvector old_E(n_threads), r(n_threads);
153 
154  double T;
155 
156  int iter=0;
157 
158  /// A different random number generator for each OpenMP thread
159  vrng.resize(n_threads);
160 
161  // Seed the random number generators
162  unsigned long int s=time(0);
163  for(size_t it=1;it<n_threads;it++) {
164  vrng[it].set_seed(s+it);
165  }
166 
167  // Setup initial temperature and step sizes
168  this->start(nv,T);
169 
170 #ifdef O2SCL_OPENMP
171 #pragma omp parallel default(shared)
172 #endif
173  {
174 #ifdef O2SCL_OPENMP
175 #pragma omp for
176 #endif
177  for(size_t it=0;it<n_threads;it++) {
178 
179  // Allocate space
180  x[it].resize(nv);
181  new_x[it].resize(nv);
182  best_x[it].resize(nv);
183  old_x[it].resize(nv);
184 
185  // Copy initial point
186  for(size_t j=0;j<nv;j++) {
187  x[it][j]=x0[j];
188  best_x[it][j]=x0[j];
189  }
190 
191  // Perform first function evaluation
192  E[it]=func[it](nv,x[it]);
193  best_E[it]=E[it];
194  }
195  }
196  // End of parallel region
197 
198  bool done=false;
199  while (!done) {
200 
201  // Count the number of moves accepted for thread 0
202  size_t nmoves=0;
203 
204 #ifdef O2SCL_OPENMP
205 #pragma omp parallel default(shared)
206 #endif
207  {
208 #ifdef O2SCL_OPENMP
209 #pragma omp for
210 #endif
211  for(size_t it=0;it<n_threads;it++) {
212 
213  // Copy old value of x for next() function
214  for(size_t j=0;j<nv;j++) old_x[it][j]=x[it][j];
215  old_E[it]=E[it];
216 
217  for (int i=0;i<this->ntrial;++i) {
218 
219  step(x[it],new_x[it],nv,it);
220 
221  new_E[it]=func[it](nv,new_x[it]);
222 #ifdef O2SCL_MPI
223  double elapsed=MPI_Wtime()-start_time;
224 #else
225  double elapsed=time(0)-start_time;
226 #endif
227  if (max_time>0.0 && elapsed>max_time) {
228  i=this->ntrial;
229  done=true;
230  }
231 
232  // Store best value obtained so far
233  if(new_E[it]<=best_E[it]) {
234  for(size_t j=0;j<nv;j++) best_x[it][j]=new_x[it][j];
235  best_E[it]=new_E[it];
236  }
237 
238  if (done==false) {
239  // Take the crucial step: see if the new point is accepted
240  // or not, as determined by the Boltzmann probability
241  if (new_E[it]<E[it]) {
242  for(size_t j=0;j<nv;j++) x[it][j]=new_x[it][j];
243  E[it]=new_E[it];
244  if (it==0) nmoves++;
245  } else {
246  r[it]=vrng[it].random();
247  if (r[it] < exp(-(new_E[it]-E[it])/(this->boltz*T))) {
248  for(size_t j=0;j<nv;j++) x[it][j]=new_x[it][j];
249  E[it]=new_E[it];
250  if (it==0) nmoves++;
251  }
252  }
253  }
254  }
255 
256  }
257  }
258  // End of parallel region
259 
260  // Find best point over all threads
261  fmin=best_E[0];
262  for(size_t iv=0;iv<nv;iv++) {
263  x0[iv]=best_x[0][iv];
264  }
265  for(size_t i=1;i<n_threads;i++) {
266  if (best_E[i]<fmin) {
267  fmin=best_E[i];
268  for(size_t iv=0;iv<nv;iv++) {
269  x0[iv]=best_x[i][iv];
270  }
271  }
272  }
273 
274  if (this->verbose>0) {
275  this->print_iter(nv,x0,fmin,iter,T,"anneal_gsl");
276  iter++;
277  if (this->verbose>1) {
278  char ch;
279  std::cin >> ch;
280  }
281  }
282 
283  // See if we're finished and proceed to the next step
284  if (done==false) {
285 
286  // std::vector<bool> doesn't work here so we use an
287  // integer type instead
288  std::vector<int> done_arr(n_threads);
289 
290 #ifdef O2SCL_OPENMP
291 #pragma omp parallel default(shared)
292 #endif
293  {
294 #ifdef O2SCL_OPENMP
295 #pragma omp for
296 #endif
297  for(size_t it=0;it<n_threads;it++) {
298  done_arr[it]=0;
299  next(nv,old_x[it],old_E[it],x[it],E[it],T,nmoves,
300  x0,fmin,done_arr[it]);
301  }
302  }
303  // End of parallel region
304 
305  // Decrease the temperature
306  T/=this->T_dec;
307 
308  // We're done when all threads report done
309  done=true;
310  for(size_t it=0;it<n_threads;it++) {
311  if (done_arr[it]==0) done=false;
312  }
313  }
314 
315  }
316 
317 #ifdef O2SCL_MPI
318 
319  if (collect_all_ranks) {
320 
321  int mpi_rank, mpi_size;
322  // Get MPI rank and size
323  MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank);
324  MPI_Comm_size(MPI_COMM_WORLD,&mpi_size);
325  if (mpi_size>1) {
326 
327  double fmin_new;
328  vector<double> x0_new(nv);
329 
330  if (mpi_rank<size-1) {
331 
332  // Get the minimum from the higher rank
333  int tag=0;
334  MPI_Recv(&fmin_new,1,MPI_DOUBLE,mpi_rank+1,tag,MPI_COMM_WORLD,
335  MPI_STATUS_IGNORE);
336  tag=1;
337  MPI_Recv(&(x0_new[0]),nv,MPI_DOUBLE,mpi_rank+1,tag,MPI_COMM_WORLD,
338  MPI_STATUS_IGNORE);
339 
340  // Update x0 and fmin if necessary
341  if (fmin_new<fmin) {
342  for(size_t ik=0;ik<nv;ik++) {
343  x0[ik]=x0_new[ik];
344  }
345  fmin=fmin_new;
346  } else {
347  for(size_t ik=0;ik<nv;ik++) {
348  x0_new[ik]=x0[ik];
349  }
350  }
351  }
352 
353  if (mpi_size>1 && mpi_rank>0) {
354 
355  // Copy the minimum into the buffer
356  for(size_t ik=0;ik<nv;ik++) x0_new[ik]=x0[ik];
357 
358  // Send the minimum down to the lower rank
359  MPI_Send(&fmin,1,MPI_DOUBLE,mpi_rank-1,0,MPI_COMM_WORLD);
360  MPI_Send(&(x0_new[0]),nv,MPI_DOUBLE,mpi_rank-1,1,MPI_COMM_WORLD);
361  }
362 
363  if (mpi_size>1 && mpi_rank>0) {
364  // Obtain the global minimum from the lower rank
365  int tag=2;
366  MPI_Recv(&(fmin_new),1,MPI_DOUBLE,mpi_rank-1,tag,MPI_COMM_WORLD,
367  MPI_STATUS_IGNORE);
368  tag=3;
369  MPI_Recv(&(x0_new[0]),nv,MPI_DOUBLE,mpi_rank-1,tag,MPI_COMM_WORLD,
370  MPI_STATUS_IGNORE);
371 
372  // Update x0 and fmin if necessary
373  if (fmin_new<fmin) {
374  for(size_t ik=0;ik<nv;ik++) {
375  x0[ik]=x0_new[ik];
376  }
377  fmin=fmin_new;
378  }
379  }
380 
381  if (mpi_rank<size-1) {
382  // Send the minimum back to the higher rank
383  MPI_Send(&fmin,1,MPI_DOUBLE,mpi_rank+1,2,MPI_COMM_WORLD);
384  MPI_Send(&(x0_new[0]),nv,MPI_DOUBLE,mpi_rank+1,3,MPI_COMM_WORLD);
385  }
386 
387  // Ensure that x0_new isn't deallocated before the communication is
388  // done
389  MPI_Barrier(MPI_COMM_WORLD);
390 
391  }
392 
393  }
394 
395 #endif
396 
397  return 0;
398  }
399 
400  /// Determine how to change the minimization for the next iteration
401  virtual int next(size_t nvar, vec_t &x_old, double min_old,
402  vec_t &x_new, double min_new, double &T,
403  size_t n_moves, vec_t &best_x, double best_E,
404  int &finished) {
405 
406  if (T/this->T_dec<this->tol_abs) {
407  finished=1;
408  return success;
409  }
410  if (n_moves==0) {
411  // If we haven't made any progress, shrink the step by
412  // decreasing step_norm
413  this->step_norm/=this->step_dec;
414  // Also reset x to best value so far
415  for(size_t i=0;i<nvar;i++) {
416  x_new[i]=best_x[i];
417  }
418  min_new=best_E;
419  }
420  return success;
421  }
422 
423  /** \brief Desc
424  */
425  virtual int mmin(size_t nv, vec_t &x0, double &fmin,
426  func_t &func) {
427 #ifdef O2SCL_OPENMP
428  omp_set_num_threads(n_threads);
429 #pragma omp parallel
430  {
431  n_threads=omp_get_num_threads();
432  }
433 #else
434  n_threads=1;
435 #endif
436  std::vector<func_t> vf(n_threads);
437  for(size_t i=0;i<n_threads;i++) {
438  vf[i]=func;
439  }
440  return mmin(nv,x0,fmin,vf);
441 
442  }
443 
444  /// Return string denoting type ("anneal_para")
445  virtual const char *type() { return "anneal_para"; }
446 
447  };
448 
449 #ifndef DOXYGEN_NO_O2NS
450 }
451 #endif
452 
453 #endif
std::function< double(size_t, const boost::numeric::ublas::vector< double > &)> multi_funct
Multi-dimensional function typedef.
Definition: multi_funct.h:45
double start_time
The MPI starting time.
Definition: anneal_para.h:63
The main O<span style=&#39;position: relative; top: 0.3em; font-size: 0.8em&#39;>2</span>scl O$_2$scl names...
Definition: anneal.h:42
ubvector step_vec
Vector of step sizes.
Definition: anneal_gsl.h:328
size_t n_threads
The number of OpenMP threads.
Definition: anneal_para.h:60
invalid argument supplied by user
Definition: err_hnd.h:59
virtual int step(vec_t &x, vec_t &sx, int nvar, size_t ithread)
Make a step to a new attempted minimum.
Definition: anneal_para.h:87
virtual int mmin(size_t nv, vec_t &x0, double &fmin, func_t &func)
Desc.
Definition: anneal_para.h:425
double T_dec
Factor to decrease temperature by (default 1.5)
Definition: anneal_gsl.h:278
double boltz
Boltzmann factor (default 1.0).
Definition: anneal_gsl.h:263
bool collect_all_ranks
If true, obtain the global minimum over all MPI ranks (default true)
Definition: anneal_para.h:71
#define O2SCL_ERR2(d, d2, n)
Set an error, two-string version.
Definition: err_hnd.h:281
virtual int start(size_t nvar, double &T)
Setup initial temperature and stepsize.
Definition: anneal_gsl.h:355
Multidimensional minimization by simulated annealing (OpenMP/MPI version)
Definition: anneal_para.h:53
double min_step_ratio
Ratio between minimum step size and tol_abs (default 100.0)
Definition: anneal_gsl.h:284
double tol_abs
The independent variable tolerance.
Definition: mmin.h:203
Multidimensional minimization by simulated annealing (GSL)
Definition: anneal_gsl.h:144
virtual int next(size_t nvar, vec_t &x_old, double min_old, vec_t &x_new, double min_new, double &T, size_t n_moves, vec_t &best_x, double best_E, int &finished)
Determine how to change the minimization for the next iteration.
Definition: anneal_para.h:401
virtual int print_iter(size_t nv, vec_t &x, double y, int iter, double tptr, std::string comment)
Print out iteration information.
Definition: anneal.h:98
virtual const char * type()
Return string denoting type ("anneal_para")
Definition: anneal_para.h:445
double max_time
The maximum time.
Definition: anneal_para.h:66
double step_dec
Factor to decrease step size by (default 1.5)
Definition: anneal_gsl.h:281
Success.
Definition: err_hnd.h:47
double step_norm
Normalization for step.
Definition: anneal_gsl.h:325
virtual int mmin(size_t nv, vec_t &x0, double &fmin, std::vector< func_t > &func)
Desc.
Definition: anneal_para.h:108
int ntrial
Maximum number of iterations.
Definition: mmin.h:197

Documentation generated with Doxygen. Provided under the GNU Free Documentation License (see License Information).