Doxygen 1.9.1
Toolkit for Adaptive Stochastic Modeling and Non-Intrusive ApproximatioN: Tasmanian v8.2 (development)
tsgConstructSurrogate.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2017, Miroslav Stoyanov
3  *
4  * This file is part of
5  * Toolkit for Adaptive Stochastic Modeling And Non-Intrusive ApproximatioN: TASMANIAN
6  *
7  * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
8  *
9  * 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions
12  * and the following disclaimer in the documentation and/or other materials provided with the distribution.
13  *
14  * 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse
15  * or promote products derived from this software without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
18  * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19  * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
20  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
21  * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
22  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
23  *
24  * UT-BATTELLE, LLC AND THE UNITED STATES GOVERNMENT MAKE NO REPRESENTATIONS AND DISCLAIM ALL WARRANTIES, BOTH EXPRESSED AND IMPLIED.
25  * THERE ARE NO EXPRESS OR IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, OR THAT THE USE OF THE SOFTWARE WILL NOT INFRINGE ANY PATENT,
26  * COPYRIGHT, TRADEMARK, OR OTHER PROPRIETARY RIGHTS, OR THAT THE SOFTWARE WILL ACCOMPLISH THE INTENDED RESULTS OR THAT THE SOFTWARE OR ITS USE WILL NOT RESULT IN INJURY OR DAMAGE.
27  * THE USER ASSUMES RESPONSIBILITY FOR ALL LIABILITIES, PENALTIES, FINES, CLAIMS, CAUSES OF ACTION, AND COSTS AND EXPENSES, CAUSED BY, RESULTING FROM OR ARISING OUT OF,
28  * IN WHOLE OR IN PART THE USE, STORAGE OR DISPOSAL OF THE SOFTWARE.
29  */
30 
31 #ifndef __TASMANIAN_ADDONS_CONSTRUCT_SURROGATE_HPP
32 #define __TASMANIAN_ADDONS_CONSTRUCT_SURROGATE_HPP
33 
45 #include "tsgCandidateManager.hpp"
46 
47 namespace TasGrid{
48 
66 using ModelSignature = std::function<void(std::vector<double> const &x, std::vector<double> &y, size_t thread_id)>;
67 
72 constexpr bool mode_parallel = true;
73 
78 constexpr bool mode_sequential = false;
79 
84 constexpr bool with_initial_guess = true;
85 
90 constexpr bool no_initial_guess = false;
91 
102 template<bool parallel_construction, bool use_initial_guess>
104  size_t max_num_points, size_t num_parallel_jobs, size_t max_samples_per_job,
105  TasmanianSparseGrid &grid,
106  std::function<std::vector<double>(TasmanianSparseGrid &)> candidates,
107  std::string const &checkpoint_filename){
108 
109  num_parallel_jobs = std::max(size_t(1), num_parallel_jobs);
110  max_samples_per_job = std::max(size_t(1), max_samples_per_job);
111  size_t num_dimensions = (size_t) grid.getNumDimensions();
112  size_t num_outputs = (size_t) grid.getNumOutputs();
113  CandidateManager manager(num_dimensions, max_samples_per_job); // keeps track of started and ordered samples
114  CompleteStorage complete(num_dimensions); // temporarily stores complete samples (batch loading is faster)
115 
116  std::string filename = checkpoint_filename;
117  std::string filename_old = checkpoint_filename + "_old";
118 
119  if (!filename.empty()){ // recover from an existing checkpoint
120  std::ifstream infile(filename, std::ios::binary);
121  try{ // attempt to recover from filename
122  if (!infile.good()) throw std::runtime_error("missing main checkpoint");
123  grid.read(infile, mode_binary);
124  complete.read(infile);
125  }catch(std::runtime_error &){
126  // main file is missing or is corrupt, try the older version
127  std::ifstream oldfile(filename_old, std::ios::binary);
128  try{
129  if (!oldfile.good()) throw std::runtime_error("missing main checkpoint");
130  grid.read(oldfile, mode_binary);
131  complete.read(oldfile);
132  }catch(std::runtime_error &){
133  // nothing could be recovered, start over from the current grid
134  }
135  }
136  }
137 
138  if (!filename.empty()){ // initial checkpoint
139  std::ofstream ofs(filename, std::ios::binary);
140  grid.write(ofs, mode_binary); // write grid to current
141  complete.write(ofs);
142  }
143 
144  // prepare several commonly used steps
145  auto checkpoint = [&]()->void{ // keeps two saved states for the constructed grid
146  if (!filename.empty()){
147  { // copy current into old and write to current
148  std::ifstream current_state(filename, std::ios::binary);
149  std::ofstream previous_state(filename, std::ios::binary);
150  previous_state << current_state.rdbuf();
151  }
152  std::ofstream ofs(filename, std::ios::binary);
153  grid.write(ofs, mode_binary); // write grid to current
154  complete.write(ofs);
155  }
156  };
157 
158  auto load_complete = [&]()->void{ // loads any complete points, does nothing if getNumStored() is zero
159  if (complete.getNumStored() > 0)
160  complete.load(grid);
161  };
162 
163  auto refresh_candidates = [&]()->void{ // loads complete and asks for new candidates
164  load_complete(); // always load before computing new candidates
165  manager = candidates(grid); // get new candidates
166  };
167 
168  size_t total_num_launched = complete.getNumStored() + grid.getNumLoaded(); // count all launched jobs, including the ones already complete
169  auto checkout_sample = [&]()->std::vector<double>{ // get the "most-important" point that has not started yet
170  auto x = manager.next(max_num_points - total_num_launched);
171  if (x.empty()){ // did not find a job, maybe we need to refresh the candidates
172  refresh_candidates();
173  x = manager.next(max_num_points - total_num_launched); // if this is empty, then we have exhausted all possible candidates
174  }
175  return x;
176  };
177 
178  // load the initial guess into y (is using initial guess), otherwise set y to the correct size
179  auto set_initial_guess = [&](std::vector<double> const &x, std::vector<double> &y)->void{
180  if (use_initial_guess){
181  if (grid.getNumLoaded()) grid.evaluateBatch(x, y);
182  else y.clear();
183  }else{
184  y.resize(num_outputs * (x.size() / num_dimensions));
185  }
186  };
187 
188  if (!grid.isUsingConstruction()) // the procedure assumes dynamic construction
189  grid.beginConstruction();
190 
191  refresh_candidates();
192 
193  if (parallel_construction == mode_parallel){ // parallel version
194  // allocate space for all x and y pairs, will be filled by workers and processed by main
195  std::vector<std::vector<double>> x(num_parallel_jobs),
196  y(num_parallel_jobs, std::vector<double>(max_samples_per_job * num_outputs));
197 
198  std::vector<int> work_flag(num_parallel_jobs);
199  constexpr int flag_done = 0;
200  constexpr int flag_computing = 1;
201  constexpr int flag_shutdown = 2;
202 
203  std::condition_variable until_someone_done;
204  std::condition_variable until_new_job;
205  std::mutex access_count_done;
206  int count_done = 0;
207 
208  // lambda that will handle the work
209  auto do_work = [&](size_t thread_id)->void{
210 
211  int my_flag = flag_computing;
212  while(my_flag == flag_computing){
213  model(x[thread_id], y[thread_id], thread_id); // does the model evaluations
214 
215  { // must guarantee sync between work_flag and count_done, use a lock
216  std::lock_guard<std::mutex> lock(access_count_done);
217  work_flag[thread_id] = flag_done;
218  count_done++;
219  }
220  until_someone_done.notify_one(); // just finished some work, notify the main thread
221 
222  { // wait till the main thread gives us an new piece of work
223  std::unique_lock<std::mutex> lock(access_count_done);
224  until_new_job.wait(lock, [&]()->bool{ return (work_flag[thread_id] != flag_done); });
225  my_flag = work_flag[thread_id];
226  }
227  }
228  };
229 
230  // launch initial set of jobs
231  std::vector<std::thread> workers(num_parallel_jobs);
232  for(size_t id=0; id<num_parallel_jobs; id++){
233  x[id] = manager.next(max_num_points - total_num_launched);
234  if (!x[id].empty()){
235  total_num_launched += x[id].size() / num_dimensions;
236  set_initial_guess(x[id], y[id]);
237  work_flag[id] = flag_computing;
238  workers[id] = std::thread(do_work, id);
239  }else{
240  work_flag[id] = flag_shutdown; // not enough samples, cancel the thread
241  }
242  }
243 
244  auto collect_finished = [&]()->bool{
245  bool any_done = false;
246  for(size_t id=0; id<num_parallel_jobs; id++){
247  if (work_flag[id] == flag_done){
248  if (!x.empty()){ // shouldn't be empty
249  complete.add(x[id], y[id]);
250  manager.complete(x[id]);
251  any_done = true;
252  }
253  if ((grid.getNumLoaded() < 1000) || (double(complete.getNumStored()) / double(grid.getNumLoaded()) > 0.2))
254  load_complete(); // move from complete into the grid
255 
256  if (total_num_launched < max_num_points){
257  // refresh the candidates if enough of the current candidates have completed
258  if (double(manager.getNumDone()) / double(manager.getNumCandidates()) > 0.2)
259  refresh_candidates();
260 
261  x[id] = checkout_sample(); // if necessary this will call refresh_candidates()
262 
263  if (!x[id].empty()){ // if empty, then we have finished all possible candidates (reached tolerance)
264  total_num_launched += x[id].size() / num_dimensions;
265  set_initial_guess(x[id], y[id]);
266  work_flag[id] = flag_computing;
267  }else{
268  work_flag[id] = flag_shutdown; // not enough samples, cancel the thread
269  }
270  }else{
271  work_flag[id] = flag_shutdown; // reached the budget, shutdown the thread
272  }
273  }
274  }
275  return any_done;
276  };
277 
278  while(manager.getNumRunning() > 0){ // main loop
279  { // lock access to the count_done variable
280  std::unique_lock<std::mutex> lock(access_count_done);
281  // unlock and wait until some else increments the "done" count
282  until_someone_done.wait(lock, [&]()->bool{ return (count_done > 0); });
283  // the lock is back on at this point, process the completed samples, reset the count and go back to waiting
284  count_done = 0;
285  if (collect_finished()) checkpoint(); // if new samples were computed, save the state
286  } // unlock the access_count_done and notify that we have loaded new jobs
287  // without the unlock, the threads will wake up but will not be able to read the worker flags
288  until_new_job.notify_all();
289  }
290 
291  load_complete(); // flush completed jobs
292 
293  for(auto &w : workers) if (w.joinable()) w.join(); // join all threads
294 
295  }else{
296  std::vector<double> x(grid.getNumDimensions()), y( grid.getNumOutputs());
297 
298  while((total_num_launched < max_num_points) && (manager.getNumCandidates() > 0)){
299  x = manager.next(max_num_points - total_num_launched);
300  if (x.empty()){ // need more candidates
301  refresh_candidates();
302  x = manager.next(max_num_points - total_num_launched); // if this is empty, then we have exhausted the candidates
303  }
304  if (!x.empty()){ // could be empty if there are no more candidates
305  total_num_launched += x.size() / num_dimensions;
306  set_initial_guess(x, y);
307  model(x, y, 0); // compute a sample
308  complete.add(x, y);
309  manager.complete(x);
310 
311  // the fist thousand points can be loaded one at a time, then add when % increase of the grid is achieved
312  if ((grid.getNumLoaded() < 1000) || (double(complete.getNumStored()) / double(grid.getNumLoaded()) > 0.2))
313  load_complete(); // also does checkpoint save
314  // if done with the top % of the grid points, recompute the candidates
315  if (double(manager.getNumDone()) / double(manager.getNumCandidates()) > 0.2)
316  refresh_candidates();
317  }
318  checkpoint();
319  }
320 
321  load_complete(); // flush completed jobs
322  }
323 }
324 
463 template<bool parallel_construction = TasGrid::mode_parallel, bool initial_guess = no_initial_guess>
465  size_t max_num_points, size_t num_parallel_jobs, size_t max_samples_per_job,
466  TasmanianSparseGrid &grid,
467  double tolerance, TypeRefinement criteria, int output = -1,
468  std::vector<int> const &level_limits = std::vector<int>(),
469  std::string const &checkpoint_filename = std::string()){
470  if (!grid.isLocalPolynomial() && !grid.isWavelet()) throw std::runtime_error("ERROR: construction (with tolerance and criteria) called for a grid that is not local polynomial or wavelet.");
471  constructCommon<parallel_construction, initial_guess>
472  (model, max_num_points, num_parallel_jobs, max_samples_per_job, grid,
473  [&](TasmanianSparseGrid &g)->std::vector<double>{
474  return g.getCandidateConstructionPoints(tolerance, criteria, output, level_limits);
475  }, checkpoint_filename);
476 }
477 
490 template<bool parallel_construction = TasGrid::mode_parallel, bool initial_guess = no_initial_guess>
492  size_t max_num_points, size_t num_parallel_jobs, size_t max_samples_per_job,
493  TasmanianSparseGrid &grid,
494  TypeDepth type, std::vector<int> const &anisotropic_weights = std::vector<int>(),
495  std::vector<int> const &level_limits = std::vector<int>(),
496  std::string const &checkpoint_filename = std::string()){
497  constructCommon<parallel_construction, initial_guess>
498  (model, max_num_points, num_parallel_jobs, max_samples_per_job, grid,
499  [&](TasmanianSparseGrid &g)->std::vector<double>{
500  return g.getCandidateConstructionPoints(type, anisotropic_weights, level_limits);
501  }, checkpoint_filename);
502 }
503 
517 template<bool parallel_construction = TasGrid::mode_parallel, bool initial_guess = no_initial_guess>
519  size_t max_num_points, size_t num_parallel_jobs, size_t max_samples_per_job,
520  TasmanianSparseGrid &grid,
521  TypeDepth type, int output, std::vector<int> const &level_limits = std::vector<int>(),
522  std::string const &checkpoint_filename = std::string()){
523  constructCommon<parallel_construction, initial_guess>
524  (model, max_num_points, num_parallel_jobs, max_samples_per_job, grid,
525  [&](TasmanianSparseGrid &g)->std::vector<double>{
526  return g.getCandidateConstructionPoints(type, output, level_limits);
527  }, checkpoint_filename);
528 }
529 
530 }
531 
532 #endif
Manages candidate points.
Definition: tsgCandidateManager.hpp:70
size_t getNumDone() const
Returns the number of complete jobs.
Definition: tsgCandidateManager.hpp:160
size_t getNumRunning() const
Returns the number of running jobs.
Definition: tsgCandidateManager.hpp:157
size_t getNumCandidates() const
Returns the number of all candidate jobs.
Definition: tsgCandidateManager.hpp:163
void complete(std::vector< double > const &p)
Mark a point as "complete".
Definition: tsgCandidateManager.hpp:105
std::vector< double > next(size_t remaining_budget)
Returns the next best point to compute, returns empty vector if no points are available.
Definition: tsgCandidateManager.hpp:132
Stores complete set of points before adding to the sparse grid.
Definition: tsgCandidateManager.hpp:227
void add(std::vector< double > const &x, std::vector< double > const &y)
Add a point to the stored list.
Definition: tsgCandidateManager.hpp:251
void read(std::istream &is)
Read the stored samples from the stream.
Definition: tsgCandidateManager.hpp:243
size_t getNumStored() const
Returns the number of stored points.
Definition: tsgCandidateManager.hpp:265
void write(std::ostream &os) const
Write the stored samples to a stream.
Definition: tsgCandidateManager.hpp:236
void load(TasmanianSparseGrid &grid)
Move the stored points into the grid.
Definition: tsgCandidateManager.hpp:257
The master-class that represents an instance of a Tasmanian sparse grid.
Definition: TasmanianSparseGrid.hpp:293
int getNumOutputs() const
Return the outputs of the grid, i.e., number of model outputs.
Definition: TasmanianSparseGrid.hpp:644
void write(const char *filename, bool binary=mode_binary) const
Write the grid to the given filename using either binary or ASCII format.
int getNumLoaded() const
Return the number of points already associated with model values via loadNeededValues().
Definition: TasmanianSparseGrid.hpp:657
void evaluateBatch(std::vector< FloatType > const &x, std::vector< FloatType > &y) const
Computes the value of the interpolant (or point-wise approximation) for a batch of points.
void beginConstruction()
Begin a dynamic construction procedure.
void read(const char *filename)
Read the grid from the given filename, automatically detect the format.
bool isUsingConstruction() const
Returns true if the dynamic construction procedure has been initialized, false otherwise.
Definition: TasmanianSparseGrid.hpp:1403
std::vector< double > getCandidateConstructionPoints(TypeDepth type, std::vector< int > const &anisotropic_weights=std::vector< int >(), std::vector< int > const &level_limits=std::vector< int >())
Generate a sorted list of points weighted by descending importance.
int getNumDimensions() const
Return the dimensions of the grid, i.e., number of model inputs.
Definition: TasmanianSparseGrid.hpp:642
bool isLocalPolynomial() const
Returns true if the grid is of type local polynomial, false otherwise.
Definition: TasmanianSparseGrid.hpp:1085
bool isWavelet() const
Returns true if the grid is of type wavelet, false otherwise.
Definition: TasmanianSparseGrid.hpp:1087
TypeDepth
Used by Global Sequence and Fourier grids, indicates the selection criteria.
Definition: tsgEnumerates.hpp:203
TypeRefinement
Refinement strategy for local polynomial and wavelet grids.
Definition: tsgEnumerates.hpp:425
std::function< void(std::vector< double > const &x, std::vector< double > &y, size_t thread_id)> ModelSignature
Signature of a model function to be used in the construction procedures.
Definition: tsgConstructSurrogate.hpp:66
constexpr bool mode_parallel
Allows for expressive calls to TasGrid::constructSurrogate().
Definition: tsgConstructSurrogate.hpp:72
constexpr bool with_initial_guess
Allows for expressive calls to TasGrid::constructSurrogate().
Definition: tsgConstructSurrogate.hpp:84
void constructCommon(ModelSignature model, size_t max_num_points, size_t num_parallel_jobs, size_t max_samples_per_job, TasmanianSparseGrid &grid, std::function< std::vector< double >(TasmanianSparseGrid &)> candidates, std::string const &checkpoint_filename)
Construction algorithm using generic candidates procedure.
Definition: tsgConstructSurrogate.hpp:103
void constructSurrogate(ModelSignature model, size_t max_num_points, size_t num_parallel_jobs, size_t max_samples_per_job, TasmanianSparseGrid &grid, double tolerance, TypeRefinement criteria, int output=-1, std::vector< int > const &level_limits=std::vector< int >(), std::string const &checkpoint_filename=std::string())
Construct a sparse grid surrogate to the model defined by the lambda.
Definition: tsgConstructSurrogate.hpp:464
constexpr bool mode_sequential
Allows for expressive calls to TasGrid::constructSurrogate().
Definition: tsgConstructSurrogate.hpp:78
constexpr bool no_initial_guess
Allows for expressive calls to TasGrid::constructSurrogate().
Definition: tsgConstructSurrogate.hpp:90
constexpr bool mode_binary
Constant allowing for more expressive selection of ascii and binary mode in IO methods.
Definition: tsgIOHelpers.hpp:68
Encapsulates the Tasmanian Sparse Grid module.
Definition: TasmanianSparseGrid.hpp:68
Manager for manipulations of candidate construction points.