CombineHarvester
BinByBin.cc
Go to the documentation of this file.
2 #include <iostream>
3 #include <string>
4 #include <vector>
5 #include "boost/format.hpp"
6 #include "boost/lexical_cast.hpp"
7 #include "Math/QuantFunc.h"
8 
9 namespace ch {
10 
12  : pattern_("CMS_$ANALYSIS_$CHANNEL_$BIN_$ERA_$PROCESS_bin_$#"),
13  v_(0),
14  bbb_threshold_(0.),
15  merge_threshold_(0.),
16  fix_norm_(true),
17  poisson_errors_(false),
18  merge_zero_bins_(true),
19  merge_saturated_bins_(true) {}
20 
21 
23  // Reduce merge_threshold very slightly to avoid numerical issues
24  // E.g. two backgrounds each with bin error 1.0. merge_threshold of
25  // 0.5 should not result in merging - but can do depending on
26  // machine and compiler
27  double merge_threshold = BinByBinFactory::GetMergeThreshold() - 1E-9 * BinByBinFactory::GetMergeThreshold();
28  auto bins = cb.bin_set();
29  for (auto const& bin : bins) {
30  unsigned bbb_added = 0;
31  unsigned bbb_removed = 0;
32  CombineHarvester tmp = std::move(cb.cp().bin({bin}).histograms());
33  std::vector<Process *> procs;
34  tmp.ForEachProc([&](Process *p) {
35  if (p->shape()->GetSumw2N() == 0) {
36  std::cout << "Process " << p->process()
37  << " does not continue the weights information needed for "
38  "valid errors, skipping\n";
39  } else {
40  procs.push_back(p);
41  }
42  });
43  if (procs.size() == 0) continue;
44 
45  std::vector<std::unique_ptr<TH1>> h_copies(procs.size());
46  for (unsigned i = 0; i < h_copies.size(); ++i) {
47  h_copies[i] = procs[i]->ClonedScaledShape();
48  }
49 
50  for (int i = 1; i <= h_copies[0]->GetNbinsX(); ++i) {
51  double tot_bbb_added = 0.0;
52  std::vector<std::tuple<double, TH1 *, bool>> result;
53  for (unsigned j = 0; j < h_copies.size(); ++j) {
54  double val = h_copies[j]->GetBinContent(i);
55  double err = h_copies[j]->GetBinError(i);
56  // Might exclude this bin from the merging procedure:
57  // - If the content and error are both zero
58  if (val == 0.0 && err == 0.0) continue;
59  // - If the content is zero (and the error implicitly non-zero)
60  // only merge if the MergeZeroBins option is true
61  if (val == 0.0 && !BinByBinFactory::GetMergeZeroBins()) continue;
62  // - Bin participates in the merging if err/val threshold is met
63  // *OR* if the content is zero but the error > zero.
64  if (val == 0 || (err/val) > BinByBinFactory::GetAddThreshold()) {
65  bbb_added += 1;
66  bool can_expand = true;
67  if (!BinByBinFactory::GetMergeSaturatedBins() && err >= val) {
68  can_expand = false;
69  } else {
70  tot_bbb_added += (err * err);
71  }
72  result.push_back(std::make_tuple(err*err, h_copies[j].get(), can_expand));
73  }
74  }
75  if (tot_bbb_added == 0.0) continue;
76  std::sort(result.begin(), result.end());
77  double removed = 0.0;
78  for (unsigned r = 0; r < result.size(); ++r) {
79  if ((std::get<0>(result[r]) + removed) < (merge_threshold * tot_bbb_added) &&
80  r < (result.size() - 1) && std::get<2>(result[r])) {
81  bbb_removed += 1;
82  removed += std::get<0>(result[r]);
83  std::get<1>(result[r])->SetBinError(i, 0.0);
84  }
85  }
86  double expand = std::sqrt(1. / (1. - (removed / tot_bbb_added)));
87  for (unsigned r = 0; r < result.size(); ++r) {
88  if (!std::get<2>(result[r])) continue;
89  std::get<1>(result[r])->SetBinError(
90  i, std::get<1>(result[r])->GetBinError(i) * expand);
91  }
92  }
93  for (unsigned i = 0; i < h_copies.size(); ++i) {
94  procs[i]->set_shape(std::move(h_copies[i]), false);
95  }
97  std::cout << "BIN: " << bin << std::endl;
98  std::cout << "Total bbb added: " << bbb_added << "\n";
99  std::cout << "Total bbb removed: " << bbb_removed << "\n";
100  std::cout << "Total bbb =======>: " << bbb_added-bbb_removed << "\n";
101  }
102  }
103 }
104 
106  unsigned bbb_added = 0;
107  std::vector<Process *> procs;
108  src.ForEachProc([&](Process *p) {
109  procs.push_back(p);
110  });
111  for (unsigned i = 0; i < procs.size(); ++i) {
112  if (!procs[i]->shape()) continue;
113  TH1 const* h = procs[i]->shape();
114  if (h->GetSumw2N() == 0) {
115  std::cout << "Process " << procs[i]->process()
116  << " does not continue the weights information needed for "
117  "valid errors, skipping\n";
118  continue;
119  }
120  unsigned n_pop_bins = 0;
121  for (int j = 1; j <= h->GetNbinsX(); ++j) {
122  if (h->GetBinContent(j) > 0.0) ++n_pop_bins;
123  }
124  if (n_pop_bins <= 1 && BinByBinFactory::GetFixNorm()) {
125  if (BinByBinFactory::GetVerbosity() >= 1) {
126  std::cout << "Requested fixed_norm but template has <= 1 populated "
127  "bins, skipping\n";
128  std::cout << Process::PrintHeader << *(procs[i]) << "\n";
129  }
130  continue;
131  }
132  for (int j = 1; j <= h->GetNbinsX(); ++j) {
133  bool do_bbb = false;
134  double val = h->GetBinContent(j);
135  double err = h->GetBinError(j);
136  double err_lo = err;
137  double err_hi = err;
138  if (val == 0. && err > 0.) do_bbb = true;
139  if (val > 0. && (err / val) > BinByBinFactory::GetAddThreshold()) do_bbb = true;
140  // if (h->GetBinContent(j) <= 0.0) {
141  // if (h->GetBinError(j) > 0.0) {
142  // std::cout << *(procs_[i]) << "\n";
143  // std::cout << "Bin with content <= 0 and error > 0 found, skipping\n";
144  // }
145  // continue;
146  // }
147 
148  if (do_bbb && BinByBinFactory::GetPoissonErrors() && val > 0.) {
149  double n_evt_float = (val*val) / (err*err);
150  unsigned n_evt = std::floor(0.5 + n_evt_float);
151  if (n_evt == 0) n_evt = 1;
152  double cl = 0.68;
153  // For now use the exact poisson interval, it's generally more conservative
154  // than the interval based on the likelihood
155  err_hi = ROOT::Math::gamma_quantile((1.-((1.-cl)/2.)), n_evt+1, 1) - n_evt;
156  err_lo = n_evt - ROOT::Math::gamma_quantile(((1.-cl)/2.), n_evt, 1);
157  // std::cout << "Bin " << j << " content: " << val << "\tErr: " << err
158  // << "\t EffEvents: " << n_evt_float << "\t" << n_evt
159  // << "\tErrLo: " << (err_lo/n_evt_float)*val
160  // << "\tErrHi: " << (err_hi/n_evt_float)*val << "\n";
161  err_hi = (err_hi/n_evt_float) * val;
162  err_lo = (err_lo/n_evt_float) * val;
163  }
164  if (do_bbb) {
165  ++bbb_added;
166  ch::Systematic sys;
167  ch::SetProperties(&sys, procs[i]);
168  sys.set_type("shape");
169  std::string name = BinByBinFactory::GetPattern();
170  boost::replace_all(name, "$ANALYSIS", sys.analysis());
171  boost::replace_all(name, "$CHANNEL", sys.channel());
172  boost::replace_all(name, "$BIN", sys.bin());
173  boost::replace_all(name, "$BINID", boost::lexical_cast<std::string>(sys.bin_id()));
174  boost::replace_all(name, "$ERA", sys.era());
175  boost::replace_all(name, "$PROCESS", sys.process());
176  boost::replace_all(name, "$MASS", sys.mass());
177  boost::replace_all(name, "$#", boost::lexical_cast<std::string>(j));
178  sys.set_name(name);
179  sys.set_asymm(true);
180  std::unique_ptr<TH1> h_d(static_cast<TH1 *>(h->Clone()));
181  std::unique_ptr<TH1> h_u(static_cast<TH1 *>(h->Clone()));
182  h_d->SetBinContent(j, val - err_lo);
183  if (h_d->GetBinContent(j) < 0.) h_d->SetBinContent(j, 0.);
184  if (!(h_d->Integral() > 0.)) h_d->SetBinContent(j,0.00001*h->Integral());
185  h_u->SetBinContent(j, val + err_hi);
187  sys.set_value_d(1.0);
188  sys.set_value_u(1.0);
189  } else {
190  sys.set_value_d(h_d->Integral()/h->Integral());
191  sys.set_value_u(h_u->Integral()/h->Integral());
192  }
193  sys.set_shapes(std::move(h_u), std::move(h_d), nullptr);
194  dest.CreateParameterIfEmpty(sys.name());
195  dest.InsertSystematic(sys);
196  }
197  }
198  }
199  // std::cout << "bbb added: " << bbb_added << std::endl;
200 }
201 
203  MergeBinErrors(src);
204  AddBinByBin(src, dest);
205 }
206 }
unsigned GetVerbosity()
Getter functions for class attributes.
Definition: BinByBin.h:156
bool GetPoissonErrors()
Definition: BinByBin.h:161
double GetAddThreshold()
Definition: BinByBin.h:157
std::string GetPattern()
Definition: BinByBin.h:159
void MergeBinErrors(CombineHarvester &cb)
Merges histogram bin errors between processes.
Definition: BinByBin.cc:22
void MergeAndAdd(CombineHarvester &src, CombineHarvester &dest)
A convenience function which calls MergeBinErrors and AddBinByBin in turn.
Definition: BinByBin.cc:202
void AddBinByBin(CombineHarvester &src, CombineHarvester &dest)
Create bin-by-bin shape uncertainties for every process in src, and add these to dest
Definition: BinByBin.cc:105
bool GetMergeZeroBins()
Definition: BinByBin.h:162
bool GetMergeSaturatedBins()
Definition: BinByBin.h:163
double GetMergeThreshold()
Definition: BinByBin.h:158
void CreateParameterIfEmpty(std::string const &name)
void ForEachProc(Function func)
CombineHarvester & bin(std::vector< std::string > const &vec, bool cond=true)
std::set< std::string > bin_set()
CombineHarvester & histograms()
void InsertSystematic(ch::Systematic const &sys)
CombineHarvester cp()
Creates and returns a shallow copy of the CombineHarvester instance.
virtual std::string const & process() const
Definition: Object.h:20
virtual std::string const & bin() const
Definition: Object.h:17
virtual int bin_id() const
Definition: Object.h:35
virtual std::string const & analysis() const
Definition: Object.h:26
virtual std::string const & era() const
Definition: Object.h:29
virtual std::string const & mass() const
Definition: Object.h:38
virtual std::string const & channel() const
Definition: Object.h:32
static std::ostream & PrintHeader(std::ostream &out)
Definition: Process.cc:153
TH1 const * shape() const
Definition: Process.h:52
void set_shapes(std::unique_ptr< TH1 > shape_u, std::unique_ptr< TH1 > shape_d, TH1 const *nominal)
Definition: Systematic.cc:128
void set_value_d(double const &value_d)
Definition: Systematic.h:30
void set_name(std::string const &name)
Definition: Systematic.cc:54
void set_value_u(double const &value_u)
Definition: Systematic.h:27
std::string const & name() const
Definition: Systematic.h:22
void set_asymm(bool const &asymm)
Definition: Systematic.h:36
void set_type(std::string const &type)
Definition: Systematic.h:24
Definition: Algorithm.h:10
void SetProperties(T *first, U const *second)
Definition: Utilities.h:59