CombineHarvester
AutoRebin.cc
Go to the documentation of this file.
2 #include <iostream>
3 #include <string>
4 #include <vector>
5 #include "boost/format.hpp"
6 #include "boost/lexical_cast.hpp"
7 
8 namespace ch {
9 
11  : v_(0),
12  rebin_mode_(0),
13  perform_rebin_(true),
14  bin_threshold_(0.),
15  bin_uncert_fraction_(100.) {}
16 
17 
19 
20  auto bins = src.bin_set();
21  //Perform rebinning separately for each channel-category of the CH instance
22 
23  for (auto bin : bins) {
24  //Build histogram containing total of all backgrounds, avoid using
25  //GetShapeWithUncertainty so that possible negative bins are retained
26  TH1F data_obs = src.cp().bin({bin}).GetObservedShape();
27  TH1F total_bkg;
28  if (data_obs.GetXaxis()->GetXbins()->GetArray()){
29  total_bkg = TH1F("","",data_obs.GetNbinsX(),
30  data_obs.GetXaxis()->GetXbins()->GetArray()) ;
31  } else {
32  total_bkg = TH1F("","",data_obs.GetNbinsX(),
33  data_obs.GetXaxis()->GetBinLowEdge(1),data_obs.GetXaxis()->GetBinLowEdge(data_obs.GetNbinsX()+1));
34  }
35  src.cp().bin({bin}).backgrounds().ForEachProc([&](ch::Process *proc) {
36  total_bkg.Add((proc->ClonedScaledShape()).get());
37  });
38 
39  //Create a vector to store the original and new binning
40  int nbins = total_bkg.GetNbinsX();
41  std::vector<double> init_bins;
42  for(int i=1; i<=nbins+1; ++i) init_bins.push_back(total_bkg.GetBinLowEdge(i));
43  std::cout << "[AutoRebin] Searching for bins failing conditions "
44  "for analysis bin id " << bin << std::endl;
45  std::vector<double> new_bins = init_bins;
46 
47  //Call to function to fill new_bins vector with the recommended binning
48  AutoRebin::FindNewBinning(total_bkg, new_bins, bin_threshold_,
49  bin_uncert_fraction_, rebin_mode_);
50 
51  //Inform user if binning has been modified
52  if(new_bins.size() != init_bins.size()) {
53  std::cout << "[AutoRebin] Some bins not satisfying requested condition "
54  "found for analysis bin " << bin << ", merging with neighbouring bins"
55  << std::endl;
56  if(v_ > 0) {
57  unsigned i_old = 0;
58  unsigned i_new = 0;
59  std::cout << (
60  boost::format("%-21s %-10s %-21s\n") % "Init Edges/Widths" % "Content" % "New Edges/Widths");
61 
62  for (; i_old < init_bins.size(); ++i_old) {
63  double i_old_width = (i_old < (init_bins.size() - 1))
64  ? (init_bins[i_old + 1] - init_bins[i_old])
65  : 0.;
66  std::cout << (
67  boost::format("%-10.0f %-10.0f %-10.2g") % init_bins[i_old] % i_old_width % total_bkg.GetBinContent(i_old+1));
68  bool new_aligned = (i_new < new_bins.size()) ? std::fabs(init_bins[i_old] - new_bins[i_new]) < 1E-8 : false;
69  if (new_aligned) {
70  double i_new_width = (i_new < (new_bins.size() - 1))
71  ? (new_bins[i_new + 1] - new_bins[i_new])
72  : 0.;
73 
74  std::cout << (
75  boost::format("%-10.0f %-10.0f\n") % new_bins[i_new] % i_new_width);
76  ++i_new;
77  } else {
78  std::cout << (
79  boost::format("%-10s %-10s\n") % "-" % "-");
80  }
81  }
82  // std::cout << "Original binning: " << std::endl;
83  // for (std::vector<double>::const_iterator i =
84  // init_bins.begin(); i != init_bins.end(); ++i) std::cout << *i << ", ";
85  // std::cout << std::endl;
86  // std::cout << "New binning: " << std::endl;
87  // for (std::vector<double>::const_iterator i = new_bins.begin();
88  // i != new_bins.end(); ++i)
89  // std::cout << *i << ", ";
90  // std::cout << std::endl;
91  }
92  //Altering binning in CH instance for all distributions if requested
93  if(perform_rebin_) {
94  std::cout << "[AutoRebin] Applying binning to all relevant distributions "
95  "for analysis bin id " << bin << std::endl;
96  dest.cp().bin({bin}).VariableRebin(new_bins);
97  }
98  } else std::cout << "[AutoRebin] Did not find any bins to merge for analysis "
99  "bin id: " << bin << std::endl;
100  }
101 }
102 
103 
104 void AutoRebin::FindNewBinning(TH1F &total_bkg, std::vector<double> &new_bins,
105  double bin_condition, double bin_uncert_fraction, int mode) {
106 
107  bool all_bins = true;
108  //Find the maximum bin
109  int hbin_idx = total_bkg.GetMaximumBin();
110  int nbins = total_bkg.GetNbinsX();
111  std::cout << "[AutoRebin::FindNewBinning] Searching for bins failing condition "
112  "using algorithm mode: " << mode << std::endl;
113 
114  //Mode 0 is the simplest version of merging algorithm. It loops from the
115  //peak bin left to bin 1 then right to final bin. When the "next" bin in
116  //either direction fails
117  //the threshold, it removes the boundary between the current bin and the
118  //next bin. This will be overly conservative in situations where a better
119  //solution is to sum a subset of subsequent bins to reach the threshold and
120  //will lead to more rebinning than necessary.
121  if(mode==0) {
122  new_bins.clear();
123  //Loop from highest bin, first left and then right, merging bins where
124  //necessary to make new list of BinLowEdges
125  if(v_>0) std::cout << "[AutoRebin::FindNewBinning] Testing bins of "
126  "total_bkg hist to find those failing condition, starting from "
127  "maximum bin: " << hbin_idx << std::endl;
128  for(int idx=hbin_idx; idx>1; --idx) {
129  if(v_>0) std::cout << "Bin index: " << idx << ", BinLowEdge: " <<
130  total_bkg.GetBinLowEdge(idx) << ", Bin content: " <<
131  total_bkg.GetBinContent(idx) << ", Bin error fraction: " <<
132  total_bkg.GetBinError(idx-1)/total_bkg.GetBinContent(idx-1)
133  << std::endl;
134  if(total_bkg.GetBinContent(idx-1) > bin_condition
135  && (total_bkg.GetBinError(idx-1)/total_bkg.GetBinContent(idx-1)
136  < bin_uncert_fraction)
137  && idx!=1)
138  new_bins.push_back(total_bkg.GetBinLowEdge(idx));
139  }
140  for(int idx=hbin_idx+1; idx<=nbins; ++idx) {
141  if(v_>0) std::cout << "Bin index: " << idx << ", BinLowEdge: " <<
142  total_bkg.GetBinLowEdge(idx) << ", Bin content: " <<
143  total_bkg.GetBinContent(idx) << ", Bin error fraction: " <<
144  total_bkg.GetBinError(idx)/total_bkg.GetBinContent(idx)
145  << std::endl;
146  if(total_bkg.GetBinContent(idx) > bin_condition
147  && (total_bkg.GetBinError(idx)/total_bkg.GetBinContent(idx)
148  < bin_uncert_fraction))
149  new_bins.push_back(total_bkg.GetBinLowEdge(idx));
150  }
151  //Add to vector the extremal bins, which are untreated in the above
152  new_bins.push_back(total_bkg.GetBinLowEdge(1));
153  new_bins.push_back(total_bkg.GetBinLowEdge(nbins+1));
154  std::sort(new_bins.begin(), new_bins.end());
155 
156  //Mode 1 finds bins below threshold, then tries merging with bins left or
157  //right until threshold is met, the final choice being the one which
158  //minimises the number of required bin merges.
159  } else if(mode == 1 || mode == 2) {
160  //Start from finding the minimum bin and the bin with largest fractional
161  //error
162  int lbin_idx = 0;
163  if (mode == 1) {
164  lbin_idx = total_bkg.GetMinimumBin();
165  } else if (mode == 2) {
166  double maxval = std::numeric_limits<double>::max();
167  // loop through bins from left to right - we take the rightmost
168  // bin in the case multiple bins are tied on the smallest value
169  for (int idx = 1; idx <= total_bkg.GetNbinsX(); ++idx) {
170  if (total_bkg.GetBinContent(idx) <= maxval) {
171  lbin_idx = idx;
172  maxval = total_bkg.GetBinContent(idx);
173  }
174  }
175  }
176 
177  int herrbin_idx = GetMaximumFracUncertBin(total_bkg);
178  bool bin_tot_flag = total_bkg.GetBinContent(lbin_idx) <= bin_condition;
179  bool bin_err_flag = total_bkg.GetBinError(herrbin_idx)/
180  total_bkg.GetBinContent(herrbin_idx) >= bin_uncert_fraction;
181  if(bin_tot_flag || bin_err_flag) {
182  //Fix issues with minimal bins in first pass and bin errors after
183  lbin_idx = (bin_tot_flag ? lbin_idx : herrbin_idx);
184  if(v_>0 && bin_tot_flag) std::cout << "[AutoRebin::FindNewBinning] Testing "
185  "bins of total_bkg hist to find those with entry <= " << bin_condition
186  << ", starting from bin: " << lbin_idx << std::endl;
187  if(v_>0 && !bin_tot_flag) std::cout << "[AutoRebin::FindNewBinning] Testing "
188  "bins of total_bkg hist to find those with fractional error >= " <<
189  bin_uncert_fraction << ", starting from bin: " << lbin_idx << std::endl;
190  //loop left first
191  double left_tot = total_bkg.GetBinContent(lbin_idx);
192  double left_err_tot = total_bkg.GetBinError(lbin_idx);
193  int idx = lbin_idx - 1;
194  //Note: this logic works so long as bins with 0 entries are merged
195  //first, to give sensible values for the fractional error
196  while (((bin_tot_flag && left_tot <= bin_condition) || (!bin_tot_flag &&
197  left_err_tot/left_tot >= bin_uncert_fraction)) && idx > 0) {
198  left_err_tot = sqrt(pow(left_err_tot,2) + pow(total_bkg.GetBinError(idx),2));
199  left_tot = left_tot + total_bkg.GetBinContent(idx);
200  if(v_>0) std::cout << "Moving left, bin index: " << idx << ", BinLowEdge: " <<
201  total_bkg.GetBinLowEdge(idx) << ", Bin content: " <<
202  total_bkg.GetBinContent(idx) << ", Running total from combining bins: "
203  << left_tot << ", Current fractional error: " << left_err_tot/left_tot
204  << std::endl;
205  --idx;
206  }
207  int left_bins = lbin_idx - idx;
208  double right_tot = total_bkg.GetBinContent(lbin_idx);
209  double right_err_tot = total_bkg.GetBinError(lbin_idx);
210  idx = lbin_idx + 1;
211  //now try right
212  while (((bin_tot_flag && right_tot <= bin_condition) || (!bin_tot_flag &&
213  right_err_tot/right_tot >= bin_uncert_fraction)) && idx <= nbins) {
214  right_err_tot = sqrt(pow(right_err_tot,2) + pow(total_bkg.GetBinError(idx),2));
215  right_tot = right_tot + total_bkg.GetBinContent(idx);
216  if(v_>0) std::cout << "Moving right, bin index: " << idx << ", BinLowEdge: " <<
217  total_bkg.GetBinLowEdge(idx) << ", Bin content: " <<
218  total_bkg.GetBinContent(idx) << ", Running total from combining bins: "
219  << right_tot << ", Current fractional error: " << right_err_tot/right_tot
220  << std::endl;
221  ++idx;
222  }
223  int right_bins = idx - lbin_idx;
224  bool left_pass = bin_tot_flag ? left_tot > bin_condition :
225  left_err_tot/left_tot < bin_uncert_fraction;
226  bool right_pass = bin_tot_flag ? right_tot > bin_condition :
227  right_err_tot/right_tot < bin_uncert_fraction;
228  if (mode == 2) right_pass = false; // never merge right in mode 2
229  //Decision on which way to merge - remove relevant entries from
230  //binning vector as applicable
231  if(left_pass && !right_pass) {
232  if(v_>0) std::cout << "Merging left using " << left_bins - 1 <<
233  " bins" << std::endl;
234  for(int i = lbin_idx; i > lbin_idx - left_bins + 1; --i) {
235  if(v_>0) std::cout << "Erasing bin low edge for bin " << i <<
236  ", BinLowEdge: " << total_bkg.GetBinLowEdge(i) << std::endl;
237  new_bins.erase( std::remove( new_bins.begin(), new_bins.end(),
238  total_bkg.GetBinLowEdge(i)),new_bins.end() );
239  }
240  } else if(right_pass && !left_pass) {
241  if(v_>0) std::cout << "Merging right using " << right_bins - 1 <<
242  " bins" << std::endl;
243  for(int i = lbin_idx; i < lbin_idx + right_bins - 1; ++i) {
244  if(v_>0) std::cout << "Erasing bin upper edge for bin " << i <<
245  ", BinUpperEdge: " << total_bkg.GetBinLowEdge(i+1) << std::endl;
246  new_bins.erase( std::remove( new_bins.begin(), new_bins.end(),
247  total_bkg.GetBinLowEdge(i+1)),new_bins.end() );
248  }
249  } else if(left_pass && right_pass && left_bins < right_bins) {
250  if(v_>0) std::cout << "Merging left using " << left_bins - 1 <<
251  " bins" << std::endl;
252  for(int i = lbin_idx; i > lbin_idx - left_bins + 1; --i) {
253  if(v_>0) std::cout << "Erasing bin low edge for bin " << i <<
254  ", BinLowEdge: " << total_bkg.GetBinLowEdge(i) << std::endl;
255  new_bins.erase( std::remove( new_bins.begin(), new_bins.end(),
256  total_bkg.GetBinLowEdge(i)),new_bins.end() );
257  }
258  } else if(left_pass && right_pass && left_bins > right_bins) {
259  if(v_>0) std::cout << "Merging right using " << right_bins - 1 <<
260  " bins" << std::endl;
261  for(int i = lbin_idx; i < lbin_idx + right_bins - 1 ; ++i) {
262  if(v_>0) std::cout << "Erasing bin upper edge for bin " << i <<
263  ", BinUpperEdge: " << total_bkg.GetBinLowEdge(i+1) << std::endl;
264  new_bins.erase( std::remove( new_bins.begin(), new_bins.end(),
265  total_bkg.GetBinLowEdge(i+1)),new_bins.end() );
266  }
267  //In case where left and right are equally good, merge towards the
268  //peak (unclear to me if this is really always better but it generally more
269  //conservative in leading to bins with higher content)
270  } else if(left_pass && right_pass && left_bins == right_bins &&
271  lbin_idx < hbin_idx) {
272  if(v_>0) std::cout << "Merging right using " << right_bins - 1 <<
273  " bins" << std::endl;
274  for(int i = lbin_idx; i < lbin_idx + right_bins - 1; ++i) {
275  if(v_>0) std::cout << "Erasing bin upper edge for bin " << i <<
276  ", BinUpperEdge: " << total_bkg.GetBinLowEdge(i+1) << std::endl;
277  new_bins.erase( std::remove( new_bins.begin(), new_bins.end(),
278  total_bkg.GetBinLowEdge(i+1)),new_bins.end() );
279  }
280  } else if(left_pass && right_pass && left_bins == right_bins &&
281  lbin_idx > hbin_idx) {
282  if(v_>0) std::cout << "Merging left using " << left_bins - 1 <<
283  " bins" << std::endl;
284  for(int i = lbin_idx; i > lbin_idx - left_bins + 1; --i) {
285  if(v_>0) std::cout << "Erasing bin low edge for bin " << i <<
286  ", BinLowEdge: " << total_bkg.GetBinLowEdge(i) << std::endl;
287  new_bins.erase( std::remove( new_bins.begin(), new_bins.end(),
288  total_bkg.GetBinLowEdge(i)),new_bins.end() );
289  }
290  } else if(!left_pass && !right_pass) {
291  std::cout << "[AutoRebin::FindNewBinning] WARNING: No solution found "
292  "to satisfy condition, try merging all bins" << std::endl;
293  for(int i = 2; i<=nbins; ++i) new_bins.erase( std::remove( new_bins.begin(),
294  new_bins.end(), total_bkg.GetBinLowEdge(i)),new_bins.end() );
295  }
296  }
297  } else {
298  std::cout << "[AutoRebin::FindNewBinning] Chosen mode " << mode << " not"
299  " currently supported, exiting without altering binning" << std::endl;
300  return;
301  }
302 
303  TH1F* total_bkg_new;
304  total_bkg_new = (TH1F*)total_bkg.Rebin(new_bins.size()-1, "total_bkg_new",
305  &new_bins[0]);
306 
307  //Flag checks if all bins now satify the condition after pass through
308  int nbins_new = total_bkg_new->GetNbinsX();
309  if(nbins_new!=nbins) {
310  if(v_>0) std::cout << std::endl;
311  if(v_>0) std::cout << "[AutoRebin::FindNewBinning] New binning found: "
312  << std::endl;
313  for(int i=1; i<=nbins_new+1; ++i) {
314  if(v_>0) std::cout << "Bin index: " << i << ", BinLowEdge: " <<
315  total_bkg_new->GetBinLowEdge(i) << ", Bin content: " <<
316  total_bkg_new->GetBinContent(i) << ", Bin error fraction: " <<
317  total_bkg_new->GetBinError(i)/total_bkg_new->GetBinContent(i)
318  << std::endl;
319  //Last bin is allowed to be failing condition, just there to enclose the
320  //penultimate bin
321  if((total_bkg_new->GetBinContent(i) <= bin_condition
322  || total_bkg_new->GetBinError(i)/total_bkg_new->GetBinContent(i)
323  >= bin_uncert_fraction)
324  && i!=nbins_new+1) all_bins=false;
325  }
326  }
327 
328  if(all_bins) return;
329  //Run function again if all_bins is not true, i.e. if it's possible that not
330  //all bins pass condition after one pass
331  else FindNewBinning(*total_bkg_new, new_bins, bin_condition, bin_uncert_fraction, mode);
332 
333 }
334 
335 
337  int idx = 1;
338  double maximum = 0;
339  for(int i = 1; i <= total_bkg.GetNbinsX(); ++i){
340  if(total_bkg.GetBinError(i)/total_bkg.GetBinContent(i) > maximum) {
341  maximum = total_bkg.GetBinError(i)/total_bkg.GetBinContent(i);
342  idx = i;
343  }
344  }
345 
346  return idx;
347 }
348 
349 
350 
351 
352 }
void FindNewBinning(TH1F &total_bkg, std::vector< double > &new_bins, double bin_condition, double bin_uncert_fraction, int mode)
Pass through the total background histogram to find bins failing the required condition ("empty" bins...
Definition: AutoRebin.cc:104
int GetMaximumFracUncertBin(TH1F &total_bkg)
Return bin with maximum value of fractional error.
Definition: AutoRebin.cc:336
void Rebin(CombineHarvester &src, CombineHarvester &dest)
Work out optimal binning using the total background histogram built from src, and apply the binning t...
Definition: AutoRebin.cc:18
void ForEachProc(Function func)
CombineHarvester & bin(std::vector< std::string > const &vec, bool cond=true)
std::set< std::string > bin_set()
CombineHarvester cp()
Creates and returns a shallow copy of the CombineHarvester instance.
std::unique_ptr< TH1 > ClonedScaledShape() const
Definition: Process.cc:117
Definition: Algorithm.h:10