CombineHarvester
CombineHarvester.h
Go to the documentation of this file.
1 #ifndef CombineTools_CombineHarvester_h
2 #define CombineTools_CombineHarvester_h
3 #include <string>
4 #include <utility>
5 #include <vector>
6 #include <memory>
7 #include <map>
8 #include <unordered_map>
9 #include <list>
10 #include <cmath>
11 #include <set>
12 #include <functional>
13 #include "boost/range/algorithm_ext/erase.hpp"
14 #include "TFile.h"
15 #include "TH1.h"
16 #include "RooWorkspace.h"
23 
24 
25 namespace ch {
26 
27 // Define some useful CombineHarvester-specific typedefs
28 typedef std::vector<std::pair<int, std::string>> Categories;
29 
31  public:
45 
59  CombineHarvester(CombineHarvester const& other);
62 
68  void SetFlag(std::string const& flag, bool const& value);
69  bool GetFlag(std::string const& flag) const;
70 
75 
95  inline void SetVerbosity(unsigned verbosity) { verbosity_ = verbosity; }
96  inline unsigned Verbosity() { return verbosity_; }
109  int ParseDatacard(std::string const& filename,
110  std::string const& analysis,
111  std::string const& era,
112  std::string const& channel,
113  int bin_id,
114  std::string const& mass);
115  int ParseDatacard(std::string const& filename,
116  std::string parse_rule = "");
117 
118  void WriteDatacard(std::string const& name, std::string const& root_file);
119  void WriteDatacard(std::string const& name, TFile & root_file);
120  void WriteDatacard(std::string const& name);
141  CombineHarvester& bin(std::vector<std::string> const& vec, bool cond = true);
142  CombineHarvester& bin_id(std::vector<int> const& vec, bool cond = true);
143  CombineHarvester& process(std::vector<std::string> const& vec, bool cond = true);
144  CombineHarvester& analysis(std::vector<std::string> const& vec, bool cond = true);
145  CombineHarvester& era(std::vector<std::string> const& vec, bool cond = true);
146  CombineHarvester& channel(std::vector<std::string> const& vec, bool cond = true);
147  CombineHarvester& mass(std::vector<std::string> const& vec, bool cond = true);
148  CombineHarvester& attr(std::vector<std::string> const& vec,std::string attr_label, bool cond = true);
149  CombineHarvester& syst_name(std::vector<std::string> const& vec, bool cond = true);
150  CombineHarvester& syst_type(std::vector<std::string> const& vec, bool cond = true);
151 
152  CombineHarvester& process_rgx(std::vector<std::string> const& vec, bool cond = true);
153 
159 
160  template<typename Function>
161  CombineHarvester& FilterAll(Function func);
162  template<typename Function>
163  CombineHarvester& FilterObs(Function func);
164  template<typename Function>
165  CombineHarvester& FilterProcs(Function func);
166  template<typename Function>
167  CombineHarvester& FilterSysts(Function func);
178  // Set generation
179  std::set<std::string> bin_set();
180  std::set<int> bin_id_set();
181  std::set<std::string> process_set();
182  std::set<std::string> analysis_set();
183  std::set<std::string> era_set();
184  std::set<std::string> channel_set();
185  std::set<std::string> mass_set();
186  std::set<std::string> syst_name_set();
187  std::set<std::string> syst_type_set();
188 
202  template <typename T,
203  typename R = typename std::decay<
204  typename std::result_of<T(Object const*)>::type>::type>
205  std::set<R> SetFromAll(T func);
206 
212  template <typename T,
213  typename R = typename std::decay<
214  typename std::result_of<T(Observation const*)>::type>::type>
215  std::set<R> SetFromObs(T func);
216 
222  template <typename T,
223  typename R = typename std::decay<
224  typename std::result_of<T(Process const*)>::type>::type>
225  std::set<R> SetFromProcs(T func);
226 
232  template <typename T,
233  typename R = typename std::decay<
234  typename std::result_of<T(Systematic const*)>::type>::type>
235  std::set<R> SetFromSysts(T func);
246  ch::Parameter const* GetParameter(std::string const& name) const;
247  ch::Parameter* GetParameter(std::string const& name);
248 
249  void UpdateParameters(std::vector<ch::Parameter> const& params);
250 
258  void UpdateParameters(RooFitResult const* fit);
259  void UpdateParameters(RooFitResult const& fit);
260 
261  std::vector<ch::Parameter> GetParameters() const;
262  void RenameParameter(std::string const& oldname, std::string const& newname);
263 
264  template<typename Function>
265  void ForEachObj(Function func);
266 
267  template<typename Function>
268  void ForEachProc(Function func);
269 
270  template<typename Function>
271  void ForEachObs(Function func);
272 
273  template<typename Function>
274  void ForEachSyst(Function func);
275 
276  void VariableRebin(std::vector<double> bins);
277  void ZeroBins(double min, double max);
278  void SetPdfBins(unsigned nbins);
279 
280  //
281  double getParFromWs(const std::string name);
282  void setParInWs(const std::string name,double value) ;
283  void renameParInWs(const std::string& name, const std::string& newName,const std::string& wsName="");
284 
295  void SetGroup(std::string const& name, std::vector<std::string> const& patterns);
296 
306  void RemoveGroup(std::string const& name, std::vector<std::string> const& patterns);
307 
314  void RenameGroup(std::string const& oldname, std::string const& newname);
315 
321  void AddDatacardLineAtEnd(std::string const& line);
322 
340  double GetRate();
341  double GetObservedRate();
342  double GetUncertainty();
343 
352  double GetUncertainty(RooFitResult const* fit, unsigned n_samples);
353  double GetUncertainty(RooFitResult const& fit, unsigned n_samples);
354  TH1F GetShape();
356 
366  TH1F GetShapeWithUncertainty(RooFitResult const* fit, unsigned n_samples);
367  TH1F GetShapeWithUncertainty(RooFitResult const& fit, unsigned n_samples);
368  TH1F GetObservedShape();
369 
370  TH2F GetRateCovariance(RooFitResult const& fit, unsigned n_samples);
371  TH2F GetRateCorrelation(RooFitResult const& fit, unsigned n_samples);
397  void AddObservations(std::vector<std::string> mass,
398  std::vector<std::string> analysis,
399  std::vector<std::string> era,
400  std::vector<std::string> channel,
402 
403  void AddProcesses(std::vector<std::string> mass,
404  std::vector<std::string> analysis,
405  std::vector<std::string> era,
406  std::vector<std::string> channel,
407  std::vector<std::string> procs,
408  ch::Categories bin, bool signal);
409  void AddSystVar(std::string const& name, double val, double err);
410  void AddSystFromProc(Process const& proc, std::string const& name,
411  std::string const& type, bool asymm, double val_u,
412  double val_d, std::string const& formula,
413  std::string const& args);
414 
415  template <class Map>
416  void AddSyst(CombineHarvester & target, std::string const& name,
417  std::string const& type, Map const& valmap);
418 
419  void ExtractShapes(std::string const& file, std::string const& rule,
420  std::string const& syst_rule);
421  void ExtractPdfs(CombineHarvester& target, std::string const& ws_name,
422  std::string const& rule, std::string norm_rule = "");
423  void ExtractData(std::string const& ws_name, std::string const& rule);
424 
425  void AddWorkspace(RooWorkspace const& ws, bool can_rename = false);
426 
427  void InsertObservation(ch::Observation const& obs);
428  void InsertProcess(ch::Process const& proc);
429  void InsertSystematic(ch::Systematic const& sys);
430 
436  void RenameSystematic(CombineHarvester& target, std::string const& old_name, std::string const& new_name);
437 
438  void CreateParameterIfEmpty(std::string const& name);
439 
446  void AddBinByBin(double threshold, bool fixed_norm, CombineHarvester* other);
447 
448 
455  void AddBinByBin(double threshold, bool fixed_norm, CombineHarvester & other);
456 
457 
464  void MergeBinErrors(double bbb_threshold, double merge_threshold);
467  void SetAutoMCStats(CombineHarvester &target, double thresh, bool sig=false, int mode=1);
468  void RenameAutoMCStatsBin(std::string const& oldname, std::string const& newname);
469  std::set<std::string> GetAutoMCStatsBins() const;
470 
471  void AddExtArgValue(std::string const& name, double const& value);
472  private:
473  friend void swap(CombineHarvester& first, CombineHarvester& second);
474 
475  // ---------------------------------------------------------------
476  // Main data members
477  // ---------------------------------------------------------------
478  std::vector<std::shared_ptr<Observation>> obs_;
479  std::vector<std::shared_ptr<Process>> procs_;
480  std::vector<std::shared_ptr<Systematic>> systs_;
481  std::map<std::string, std::shared_ptr<Parameter>> params_;
482  std::map<std::string, std::shared_ptr<RooWorkspace>> wspaces_;
483 
484  std::unordered_map<std::string, bool> flags_;
485 
486  struct AutoMCStatsSettings {
487  double event_threshold;
488  bool include_signal;
489  int hist_mode;
490 
491  AutoMCStatsSettings(double thresh, bool sig=false, int mode=1) {
492  event_threshold = thresh;
493  include_signal = sig;
494  hist_mode = mode;
495  }
496 
497  AutoMCStatsSettings() : AutoMCStatsSettings(0.) {}
498  };
499 
500  std::map<std::string, AutoMCStatsSettings> auto_stats_settings_;
501  std::vector<std::string> post_lines_;
502 
503  // ---------------------------------------------------------------
504  // typedefs
505  // ---------------------------------------------------------------
506  typedef std::pair<std::string, std::string> StrPair;
507  typedef std::vector<StrPair> StrPairVec;
508  typedef std::vector<std::string> StrVec;
509 
510 
511  // ---------------------------------------------------------------
512  // Logging
513  // ---------------------------------------------------------------
514  unsigned verbosity_;
515  std::ostream * log_;
516  std::ostream& log() const { return *log_; }
517 
518  // ---------------------------------------------------------------
519  // Private methods for the shape extraction routines
520  // --> implementation in src/CombineHarvester.cc
521  // ---------------------------------------------------------------
522  void LoadShapes(Observation* entry,
523  std::vector<HistMapping> const& mappings);
524  void LoadShapes(Process* entry,
525  std::vector<HistMapping> const& mappings);
526  void LoadShapes(Systematic* entry,
527  std::vector<HistMapping> const& mappings);
528 
529  HistMapping const& ResolveMapping(std::string const& process,
530  std::string const& bin,
531  std::vector<HistMapping> const& mappings);
532 
533  StrPairVec GenerateShapeMapAttempts(std::string process,
534  std::string category);
535 
536  std::shared_ptr<RooWorkspace> SetupWorkspace(RooWorkspace const& ws,
537  bool can_rename = false);
538 
539  void ImportParameters(RooArgSet *vars);
540 
541  RooAbsData const* FindMatchingData(Process const* proc);
542 
543  ch::Parameter * SetupRateParamVar(std::string const& name, double val, bool is_ext_arg = false);
544  void SetupRateParamFunc(std::string const& name, std::string const& formula,
545  std::string const& pars);
546  void SetupRateParamWspObj(std::string const& name, std::string const& obj, bool is_ext_arg = false);
547  bool SetupRateParamWspObjFromWsStore(std::string const& name, std::string const& obj, std::map<std::string, std::shared_ptr<RooWorkspace>> const& ws_store);
548  // ---------------------------------------------------------------
549  // Private methods for the shape writing routines
550  // ---------------------------------------------------------------
551  void WriteHistToFile(
552  TH1 * hist,
553  TFile * file,
554  std::vector<HistMapping> const& mappings,
555  std::string const& bin,
556  std::string const& process,
557  std::string const& mass,
558  std::string const& nuisance,
559  unsigned type);
560 
561 void FillHistMappings(std::vector<HistMapping> & mappings);
562 
563  // ---------------------------------------------------------------
564  // Private methods for shape/yield evaluation
565  // --> implementation in src/CombineHarvester_Evaluate.cc
566  // ---------------------------------------------------------------
567  typedef std::vector<std::vector<Systematic const*>> ProcSystMap;
568  ProcSystMap GenerateProcSystMap();
569 
570  double GetRateInternal(ProcSystMap const& lookup,
571  std::string const& single_sys = "");
572 
573  TH1F GetShapeInternal(ProcSystMap const& lookup,
574  std::string const& single_sys = "");
575 
576  inline double smoothStepFunc(double x) const {
577  if (std::fabs(x) >= 1.0/*_smoothRegion*/) return x > 0 ? +1 : -1;
578  double xnorm = x/1.0;/*_smoothRegion*/
579  double xnorm2 = xnorm*xnorm;
580  return 0.125 * xnorm * (xnorm2 * (3.*xnorm2 - 10.) + 15);
581  }
582 
583  double logKappaForX(double x, double k_low, double k_high) const;
584 
585  void ShapeDiff(double x, TH1F* target, TH1 const* nom, TH1 const* low,
586  TH1 const* high, bool linear);
587  void ShapeDiff(double x, TH1F* target, RooDataHist const* nom,
588  RooDataHist const* low, RooDataHist const* high);
589 
590  // bug fix for RooConstVar compatibility between ROOT626 and workspace created with earlier versions
591  RooWorkspace* fixRooConstVar(RooWorkspace *win, bool useRooRealVar=true, bool clean=true);
592 };
593 
594 
595 // ---------------------------------------------------------------
596 // Template method implementation
597 // ---------------------------------------------------------------
598 template <typename T, typename R>
599 std::set<R> CombineHarvester::SetFromAll(T func) {
600  std::set<R> ret;
601  for (auto const& item : obs_) ret.insert(func(item.get()));
602  for (auto const& item : procs_) ret.insert(func(item.get()));
603  for (auto const& item : systs_) ret.insert(func(item.get()));
604  return ret;
605 };
606 
607 template <typename T, typename R>
608 std::set<R> CombineHarvester::SetFromObs(T func) {
609  std::set<R> ret;
610  for (auto const& item : obs_) ret.insert(func(item.get()));
611  return ret;
612 };
613 
614 template <typename T, typename R>
615 std::set<R> CombineHarvester::SetFromProcs(T func) {
616  std::set<R> ret;
617  for (auto const& item : procs_) ret.insert(func(item.get()));
618  return ret;
619 };
620 
621 template <typename T, typename R>
622 std::set<R> CombineHarvester::SetFromSysts(T func) {
623  std::set<R> ret;
624  for (auto const& item : systs_) ret.insert(func(item.get()));
625  return ret;
626 };
627 
628 template<typename Function>
629 void CombineHarvester::ForEachObj(Function func) {
630  ForEachObs(func);
631  ForEachProc(func);
632  ForEachSyst(func);
633 }
634 
635 template<typename Function>
636 void CombineHarvester::ForEachProc(Function func) {
637  for (auto & item: procs_) func(item.get());
638 }
639 
640 template<typename Function>
641 void CombineHarvester::ForEachObs(Function func) {
642  for (auto & item: obs_) func(item.get());
643 }
644 
645 template<typename Function>
646 void CombineHarvester::ForEachSyst(Function func) {
647  for (auto & item: systs_) func(item.get());
648 }
649 
650 template<typename Function>
652  FilterObs(func);
653  FilterProcs(func);
654  FilterSysts(func);
655  return *this;
656 }
657 
658 template<typename Function>
660  boost::remove_erase_if(
661  obs_, [&](std::shared_ptr<Observation> ptr) { return func(ptr.get());
662  });
663  return *this;
664 }
665 
666 template<typename Function>
668  boost::remove_erase_if(
669  procs_, [&](std::shared_ptr<Process> ptr) { return func(ptr.get());
670  });
671  return *this;
672 }
673 template<typename Function>
675  boost::remove_erase_if(
676  systs_, [&](std::shared_ptr<Systematic> ptr) { return func(ptr.get());
677  });
678  return *this;
679 }
680 
681 template <class Map>
683  std::string const& name, std::string const& type,
684  Map const& valmap) {
685  // Keep track of which Process entries get a Systematic assigned and which
686  // don't. If verbosity is on we'll print lists of these processes at the end.
687  std::vector<ch::Process *> not_added_procs;
688  std::vector<ch::Process *> added_procs;
689  // Also track which tuples in the map did not get used. Do this by getting the
690  // full map here and then removing elements as they are used to create a
691  // Systematic.
692  auto tuples = valmap.GetTupleSet();
693  if (verbosity_ >= 1) {
694  log() << (name + ":" + type) << "\n";
695  }
696  for (unsigned i = 0; i < procs_.size(); ++i) {
697  if (!valmap.Contains(procs_[i].get())) {
698  not_added_procs.push_back(procs_[i].get());
699  continue;
700  }
701  tuples.erase(valmap.GetTuple(procs_[i].get()));
702  added_procs.push_back(procs_[i].get());
703  double val_u = valmap.ValU(procs_[i].get());
704  double val_d = valmap.ValD(procs_[i].get());
705  std::string formula = valmap.Formula(procs_[i].get());
706  std::string args = valmap.Args(procs_[i].get());
707  target.AddSystFromProc(*(procs_[i]), name, type, valmap.IsAsymm(),
708  val_u, val_d, formula, args);
709  }
710  if (tuples.size() && verbosity_ >= 1) {
711  log() << ">> Map keys that were not used to create a Systematic:\n";
712  for (auto s : tuples) {
713  log() << ch::Tuple2String(s) << "\n";
714  }
715  }
716  if (verbosity_ >= 2) {
717  Process::PrintHeader(log());
718  log() << ">> Process entries that did not get a Systematic:\n";
719  for (auto p : not_added_procs) {
720  log() << *p << "\n";
721  }
722  log() << ">> Process entries that did get a Systematic:\n";
723  for (auto p : added_procs) {
724  log() << *p << "\n";
725  }
726  }
727 }
728 }
729 
730 #endif
void SetVerbosity(unsigned verbosity)
void AddSystFromProc(Process const &proc, std::string const &name, std::string const &type, bool asymm, double val_u, double val_d, std::string const &formula, std::string const &args)
TH2F GetRateCovariance(RooFitResult const &fit, unsigned n_samples)
void ZeroBins(double min, double max)
CombineHarvester & process_rgx(std::vector< std::string > const &vec, bool cond=true)
void AddWorkspace(RooWorkspace const &ws, bool can_rename=false)
CombineHarvester & bin_id(std::vector< int > const &vec, bool cond=true)
void AddProcesses(std::vector< std::string > mass, std::vector< std::string > analysis, std::vector< std::string > era, std::vector< std::string > channel, std::vector< std::string > procs, ch::Categories bin, bool signal)
CombineHarvester & backgrounds()
void AddBinByBin(double threshold, bool fixed_norm, CombineHarvester *other)
Create bin-by-bin uncertainties.
CombineHarvester & signals()
CombineHarvester & mass(std::vector< std::string > const &vec, bool cond=true)
void RenameParameter(std::string const &oldname, std::string const &newname)
CombineHarvester & PrintObs()
void AddSystVar(std::string const &name, double val, double err)
void CreateParameterIfEmpty(std::string const &name)
TH2F GetRateCorrelation(RooFitResult const &fit, unsigned n_samples)
CombineHarvester & FilterObs(Function func)
void MergeBinErrors(double bbb_threshold, double merge_threshold)
Merge bin errors within a bin.
CombineHarvester & process(std::vector< std::string > const &vec, bool cond=true)
void ForEachSyst(Function func)
void ForEachProc(Function func)
void RenameAutoMCStatsBin(std::string const &oldname, std::string const &newname)
std::set< R > SetFromObs(T func)
Fill an std::set using only the Observation entries.
void renameParInWs(const std::string &name, const std::string &newName, const std::string &wsName="")
void SetFlag(std::string const &flag, bool const &value)
Set a named flag value.
CombineHarvester & era(std::vector< std::string > const &vec, bool cond=true)
CombineHarvester & operator=(CombineHarvester other)
std::set< std::string > process_set()
void AddDatacardLineAtEnd(std::string const &line)
Add a line of text at the end of all datacards.
void SetGroup(std::string const &name, std::vector< std::string > const &patterns)
Add parameters to a given group.
void WriteDatacard(std::string const &name, std::string const &root_file)
CombineHarvester & attr(std::vector< std::string > const &vec, std::string attr_label, bool cond=true)
CombineHarvester & PrintAll()
CombineHarvester & analysis(std::vector< std::string > const &vec, bool cond=true)
friend void swap(CombineHarvester &first, CombineHarvester &second)
std::set< R > SetFromSysts(T func)
Fill an std::set using only the Systematic entries.
std::set< std::string > syst_type_set()
void RenameGroup(std::string const &oldname, std::string const &newname)
Rename a nuisance parameter group.
std::set< R > SetFromProcs(T func)
Fill an std::set using only the Process entries.
CombineHarvester & bin(std::vector< std::string > const &vec, bool cond=true)
void SetAutoMCStats(CombineHarvester &target, double thresh, bool sig=false, int mode=1)
void UpdateParameters(std::vector< ch::Parameter > const &params)
std::set< std::string > mass_set()
std::set< std::string > bin_set()
CombineHarvester & syst_name(std::vector< std::string > const &vec, bool cond=true)
void InsertProcess(ch::Process const &proc)
void AddObservations(std::vector< std::string > mass, std::vector< std::string > analysis, std::vector< std::string > era, std::vector< std::string > channel, ch::Categories bin)
void ForEachObs(Function func)
CombineHarvester & FilterSysts(Function func)
bool GetFlag(std::string const &flag) const
void VariableRebin(std::vector< double > bins)
std::set< std::string > era_set()
int ParseDatacard(std::string const &filename, std::string const &analysis, std::string const &era, std::string const &channel, int bin_id, std::string const &mass)
std::set< std::string > channel_set()
CombineHarvester & FilterAll(Function func)
void AddSyst(CombineHarvester &target, std::string const &name, std::string const &type, Map const &valmap)
CombineHarvester & FilterProcs(Function func)
CombineHarvester & PrintParams()
CombineHarvester & histograms()
void InsertSystematic(ch::Systematic const &sys)
std::set< std::string > GetAutoMCStatsBins() const
void RemoveGroup(std::string const &name, std::vector< std::string > const &patterns)
Remove parameters to a given group.
std::set< std::string > syst_name_set()
void ExtractData(std::string const &ws_name, std::string const &rule)
void ClearDatacardLinesAtEnd()
Clear all added datacard lines.
std::set< R > SetFromAll(T func)
Fill an std::set with the return values from an arbitrary function.
CombineHarvester & PrintProcs()
CombineHarvester deep()
Creates and retunrs a deep copy of the CombineHarvester instance.
void InsertObservation(ch::Observation const &obs)
CombineHarvester & PrintSysts()
std::vector< ch::Parameter > GetParameters() const
std::set< std::string > analysis_set()
double getParFromWs(const std::string name)
void ExtractShapes(std::string const &file, std::string const &rule, std::string const &syst_rule)
void SetPdfBins(unsigned nbins)
CombineHarvester cp()
Creates and returns a shallow copy of the CombineHarvester instance.
void ForEachObj(Function func)
void AddExtArgValue(std::string const &name, double const &value)
ch::Parameter const * GetParameter(std::string const &name) const
CombineHarvester & channel(std::vector< std::string > const &vec, bool cond=true)
CombineHarvester & syst_type(std::vector< std::string > const &vec, bool cond=true)
void setParInWs(const std::string name, double value)
void RenameSystematic(CombineHarvester &target, std::string const &old_name, std::string const &new_name)
Rename a systematic from 'old_name' to 'new_name' and add a parameter 'new_name' to CH instance 'targ...
void ExtractPdfs(CombineHarvester &target, std::string const &ws_name, std::string const &rule, std::string norm_rule="")
static std::ostream & PrintHeader(std::ostream &out)
Definition: Process.cc:153
Definition: Algorithm.h:10
std::string Tuple2String(const std::tuple< Args... > &t)
Format any std::tuple as a string.
Definition: Utilities.h:214
std::vector< std::pair< int, std::string > > Categories