CombineHarvester
CombineHarvester_Evaluate.cc
Go to the documentation of this file.
2 #include <vector>
3 #include <map>
4 #include <string>
5 #include <iostream>
6 #include <utility>
7 #include <set>
8 #include <fstream>
9 #include "boost/lexical_cast.hpp"
10 #include "boost/algorithm/string.hpp"
11 #include "boost/range/algorithm_ext/erase.hpp"
12 #include "boost/range/algorithm/find.hpp"
13 #include "boost/format.hpp"
14 #include "TDirectory.h"
15 #include "TH1.h"
16 #include "TH2.h"
24 
25 // #include "TMath.h"
26 // #include "boost/format.hpp"
27 // #include "Utilities/interface/FnPredicates.h"
28 // #include "Math/QuantFuncMathCore.h"
29 
30 namespace ch {
31 
32 CombineHarvester::ProcSystMap CombineHarvester::GenerateProcSystMap() {
33  ProcSystMap lookup(procs_.size());
34  for (unsigned i = 0; i < systs_.size(); ++i) {
35  for (unsigned j = 0; j < procs_.size(); ++j) {
36  if (MatchingProcess(*(systs_[i]), *(procs_[j]))) {
37  lookup[j].push_back(systs_[i].get());
38  }
39  }
40  }
41  return lookup;
42 }
43 
45  auto lookup = GenerateProcSystMap();
46  double err_sq = 0.0;
47  for (auto param_it : params_) {
48  double backup = param_it.second->val();
49  param_it.second->set_val(backup+param_it.second->err_d());
50  double rate_d = this->GetRateInternal(lookup, param_it.first);
51  param_it.second->set_val(backup+param_it.second->err_u());
52  double rate_u = this->GetRateInternal(lookup, param_it.first);
53  double err = std::fabs(rate_u-rate_d) / 2.0;
54  err_sq += err * err;
55  param_it.second->set_val(backup);
56  }
57  return std::sqrt(err_sq);
58 }
59 
60 double CombineHarvester::GetUncertainty(RooFitResult const* fit,
61  unsigned n_samples) {
62  return GetUncertainty(*fit, n_samples);
63 }
64 
65 double CombineHarvester::GetUncertainty(RooFitResult const& fit,
66  unsigned n_samples) {
67  auto lookup = GenerateProcSystMap();
68  double rate = GetRateInternal(lookup);
69  double err_sq = 0.0;
70 
71  // Create a backup copy of the current parameter values
72  auto backup = GetParameters();
73 
74  // Calling randomizePars() ensures that the RooArgList of sampled parameters
75  // is already created within the RooFitResult
76  RooArgList const& rands = fit.randomizePars();
77 
78  // Now create two aligned vectors of the RooRealVar parameters and the
79  // corresponding ch::Parameter pointers
80  int n_pars = rands.getSize();
81  std::vector<RooRealVar const*> r_vec(n_pars, nullptr);
82  std::vector<ch::Parameter*> p_vec(n_pars, nullptr);
83  for (unsigned n = 0; n < p_vec.size(); ++n) {
84  r_vec[n] = dynamic_cast<RooRealVar const*>(rands.at(n));
85  p_vec[n] = GetParameter(r_vec[n]->GetName());
86  }
87 
88  for (unsigned i = 0; i < n_samples; ++i) {
89  // Randomise and update values
90  fit.randomizePars();
91  for (int n = 0; n < n_pars; ++n) {
92  if (p_vec[n]) p_vec[n]->set_val(r_vec[n]->getVal());
93  }
94 
95  double rand_rate = this->GetRateInternal(lookup);
96  double err = std::fabs(rand_rate-rate);
97  err_sq += (err*err);
98  }
99  this->UpdateParameters(backup);
100  return std::sqrt(err_sq/double(n_samples));
101 }
102 
104  auto lookup = GenerateProcSystMap();
105  TH1F shape = GetShape();
106  for (int i = 1; i <= shape.GetNbinsX(); ++i) {
107  shape.SetBinError(i, 0.0);
108  }
109  for (auto param_it : params_) {
110  double backup = param_it.second->val();
111  param_it.second->set_val(backup+param_it.second->err_d());
112  TH1F shape_d = this->GetShapeInternal(lookup, param_it.first);
113  param_it.second->set_val(backup+param_it.second->err_u());
114  TH1F shape_u = this->GetShapeInternal(lookup, param_it.first);
115  for (int i = 1; i <= shape.GetNbinsX(); ++i) {
116  double err =
117  std::fabs(shape_u.GetBinContent(i) - shape_d.GetBinContent(i)) / 2.0;
118  shape.SetBinError(i, err*err + shape.GetBinError(i));
119  }
120  param_it.second->set_val(backup);
121  }
122  for (int i = 1; i <= shape.GetNbinsX(); ++i) {
123  shape.SetBinError(i, std::sqrt(shape.GetBinError(i)));
124  }
125  return shape;
126 }
127 
128 TH1F CombineHarvester::GetShapeWithUncertainty(RooFitResult const* fit,
129  unsigned n_samples) {
130  return GetShapeWithUncertainty(*fit, n_samples);
131 }
132 
133 TH1F CombineHarvester::GetShapeWithUncertainty(RooFitResult const& fit,
134  unsigned n_samples) {
135  auto lookup = GenerateProcSystMap();
136  TH1F shape = GetShapeInternal(lookup);
137  TH1F yield_sum;
138  shape.Copy(yield_sum);
139  int n_bins = shape.GetNbinsX();
140  std::vector<std::vector<float>> sample_yields_perbin(n_bins, std::vector<float>(n_samples));
141 
142  for (int i = 1; i <= n_bins; ++i) {
143  shape.SetBinError(i, 0.0);
144  yield_sum.SetBinContent(i, 0.0);
145  }
146  // Create a backup copy of the current parameter values
147  auto backup = GetParameters();
148 
149  // Calling randomizePars() ensures that the RooArgList of sampled parameters
150  // is already created within the RooFitResult
151  RooArgList const& rands = fit.randomizePars();
152 
153  // Now create two aligned vectors of the RooRealVar parameters and the
154  // corresponding ch::Parameter pointers
155  int n_pars = rands.getSize();
156  std::vector<RooRealVar const*> r_vec(n_pars, nullptr);
157  std::vector<ch::Parameter*> p_vec(n_pars, nullptr);
158  for (unsigned n = 0; n < p_vec.size(); ++n) {
159  r_vec[n] = dynamic_cast<RooRealVar const*>(rands.at(n));
160  p_vec[n] = GetParameter(r_vec[n]->GetName());
161  }
162 
163  // Main loop through n_samples
164  for (unsigned i = 0; i < n_samples; ++i) {
165  // Randomise and update values
166  fit.randomizePars();
167  for (int n = 0; n < n_pars; ++n) {
168  if (p_vec[n]) p_vec[n]->set_val(r_vec[n]->getVal());
169  }
170  TH1F rand_shape = this->GetShapeInternal(lookup);
171  for (int j = 1; j <= n_bins; ++j) {
172  double yield = rand_shape.GetBinContent(j);
173  sample_yields_perbin[j-1][i] = yield;
174  yield_sum.SetBinContent(j, yield_sum.GetBinContent(j) + yield);
175  }
176  }
177  for (int i = 1; i <= n_bins; ++i) {
178  double yield_mean = yield_sum.GetBinContent(i)/double(n_samples);
179  for (auto x :sample_yields_perbin[i-1]){
180  double err = std::fabs(x - yield_mean);
181  shape.SetBinError(i, err*err + shape.GetBinError(i));
182  }
183  shape.SetBinError(i, std::sqrt(shape.GetBinError(i)/double(n_samples)));
184  }
185  this->UpdateParameters(backup);
186  return shape;
187 }
188 
189 TH2F CombineHarvester::GetRateCovariance(RooFitResult const& fit,
190  unsigned n_samples) {
191  auto lookup = GenerateProcSystMap();
192 
193  unsigned n = procs_.size();
194  TH1F nom("nominal", "nominal", n, 0, n);
195  TH2F res("covariance", "covariance", n, 0, n, n, 0, n);
196 
197  std::vector<CombineHarvester> ch_procs;
198  std::vector<std::string> labels;
199  unsigned nbins = this->bin_set().size();
200 
201  for (unsigned i = 0; i < procs_.size(); ++i) {
202  ch_procs.push_back(
203  this->cp().bin({procs_[i]->bin()}).process({procs_[i]->process()}));
204  if (nbins > 1) {
205  labels.push_back(procs_[i]->bin() + "," + procs_[i]->process());
206  } else {
207  labels.push_back(procs_[i]->process());
208  }
209  }
210  for (unsigned i = 0; i < procs_.size(); ++i) {
211  nom.SetBinContent(i + 1, ch_procs[i].GetRate());
212  }
213  auto backup = GetParameters();
214 
215  // Calling randomizePars() ensures that the RooArgList of sampled parameters
216  // is already created within the RooFitResult
217  RooArgList const& rands = fit.randomizePars();
218 
219  // Now create two aligned vectors of the RooRealVar parameters and the
220  // corresponding ch::Parameter pointers
221  int n_pars = rands.getSize();
222  std::vector<RooRealVar const*> r_vec(n_pars, nullptr);
223  std::vector<ch::Parameter*> p_vec(n_pars, nullptr);
224  for (unsigned n = 0; n < p_vec.size(); ++n) {
225  r_vec[n] = dynamic_cast<RooRealVar const*>(rands.at(n));
226  p_vec[n] = GetParameter(r_vec[n]->GetName());
227  }
228 
229  // Main loop through n_samples
230  for (unsigned rnd = 0; rnd < n_samples; ++rnd) {
231  // Randomise and update values
232  fit.randomizePars();
233  for (int n = 0; n < n_pars; ++n) {
234  if (p_vec[n]) p_vec[n]->set_val(r_vec[n]->getVal());
235  }
236 
237  for (int i = 1; i <= nom.GetNbinsX(); ++i) {
238  for (int j = 1; j <= nom.GetNbinsX(); ++j) {
239  int x = j;
240  int y = nom.GetNbinsX() - (i - 1);
241  res.GetXaxis()->SetBinLabel(x, labels[j - 1].c_str());
242  res.GetYaxis()->SetBinLabel(y, labels[i - 1].c_str());
243  res.SetBinContent(
244  x, y, res.GetBinContent(x, y) +
245  (ch_procs[i - 1].GetRate() - nom.GetBinContent(i)) *
246  (ch_procs[j - 1].GetRate() - nom.GetBinContent(j)));
247  }
248  }
249  }
250 
251  for (int i = 1; i <= nom.GetNbinsX(); ++i) {
252  for (int j = 1; j <= nom.GetNbinsX(); ++j) {
253  int x = j;
254  int y = nom.GetNbinsX() - (i - 1);
255  res.SetBinContent(x, y, res.GetBinContent(x, y) / double(n_samples));
256  }
257  }
258  this->UpdateParameters(backup);
259  return res;
260 }
261 
262 TH2F CombineHarvester::GetRateCorrelation(RooFitResult const& fit,
263  unsigned n_samples) {
264  TH2F cov = GetRateCovariance(fit, n_samples);
265  TH2F res = cov;
266  res.SetName("correlation");
267  res.SetTitle("correlation");
268  for (int i = 1; i <= cov.GetNbinsX(); ++i) {
269  for (int j = 1; j <= cov.GetNbinsX(); ++j) {
270  int x = j;
271  int y = cov.GetNbinsX() - (i - 1);
272  res.SetBinContent(
273  x, y,
274  cov.GetBinContent(x, y) /
275  (std::sqrt(cov.GetBinContent(i, cov.GetNbinsX() - (i - 1))) *
276  std::sqrt(cov.GetBinContent(j, cov.GetNbinsX() - (j - 1)))));
277  }
278  }
279  return res;
280 }
281 
283  auto lookup = GenerateProcSystMap();
284  return GetRateInternal(lookup);
285 }
286 
288  auto lookup = GenerateProcSystMap();
289  return GetShapeInternal(lookup);
290 }
291 
292 double CombineHarvester::GetRateInternal(ProcSystMap const& lookup,
293  std::string const& single_sys) {
294  double rate = 0.0;
295  // TH1F tot_shape
296  for (unsigned i = 0; i < procs_.size(); ++i) {
297  double p_rate = procs_[i]->rate();
298  // If we are evaluating the effect of a single parameter
299  // check the list of associated nuisances and skip if
300  // this "single_sys" is not in the list
301  // However - we can't skip if the process has a pdf, as
302  // we haven't checked what the parameters are
303  if (single_sys != "" && !procs_[i]->pdf()) {
304  if (!ch::any_of(lookup[i], [&](Systematic const* sys) {
305  return sys->name() == single_sys;
306  })) continue;
307  }
308  for (auto sys_it : lookup[i]) {
309  if (sys_it->type() == "rateParam") {
310  continue; // don't evaluate this for now
311  }
312  double x = params_[sys_it->name()]->val();
313  if (sys_it->asymm()) {
314  p_rate *= logKappaForX(x * sys_it->scale(), sys_it->value_d(),
315  sys_it->value_u());
316  } else {
317  p_rate *= std::pow(sys_it->value_u(), x * sys_it->scale());
318  }
319  }
320  rate += p_rate;
321  }
322  return rate;
323 }
324 
325 TH1F CombineHarvester::GetShapeInternal(ProcSystMap const& lookup,
326  std::string const& single_sys) {
327  TH1F shape;
328  bool shape_init = false;
329 
330  for (unsigned i = 0; i < procs_.size(); ++i) {
331  // Might be able to skip if only interested in one nuisance
332  // However - we can't skip if the process has a pdf, as
333  // we haven't checked what the parameters are
334  if (single_sys != "" && !procs_[i]->pdf()) {
335  if (!ch::any_of(lookup[i], [&](Systematic const* sys) {
336  return sys->name() == single_sys;
337  })) continue;
338  }
339 
340  double p_rate = procs_[i]->rate();
341  if (procs_[i]->shape() || procs_[i]->data()) {
342  TH1F proc_shape = procs_[i]->ShapeAsTH1F();
343  for (auto sys_it : lookup[i]) {
344  if (sys_it->type() == "rateParam") {
345  continue; // don't evaluate this for now
346  }
347  auto param_it = params_.find(sys_it->name());
348  if (param_it == params_.end()) {
349  throw std::runtime_error(
350  FNERROR("Parameter " + sys_it->name() +
351  " not found in CombineHarvester instance"));
352  }
353  double x = param_it->second->val();
354  if (sys_it->asymm()) {
355  p_rate *= logKappaForX(x * sys_it->scale(), sys_it->value_d(),
356  sys_it->value_u());
357  if (sys_it->type() == "shape" || sys_it->type() == "shapeN2" ||
358  sys_it->type() == "shapeU") {
359  bool linear = true;
360  if (sys_it->type() == "shapeN2") linear = false;
361  if (sys_it->shape_u() && sys_it->shape_d()) {
362  ShapeDiff(x * sys_it->scale(), &proc_shape, procs_[i]->shape(),
363  sys_it->shape_d(), sys_it->shape_u(), linear);
364  }
365  if (sys_it->data_u() && sys_it->data_d()) {
366  RooDataHist const* nom =
367  dynamic_cast<RooDataHist const*>(procs_[i]->data());
368  if (nom) {
369  ShapeDiff(x * sys_it->scale(), &proc_shape, nom,
370  sys_it->data_d(), sys_it->data_u());
371  }
372  }
373  }
374  } else {
375  p_rate *= std::pow(sys_it->value_u(), x * sys_it->scale());
376  }
377  }
378  for (int b = 1; b <= proc_shape.GetNbinsX(); ++b) {
379  if (proc_shape.GetBinContent(b) < 0.) proc_shape.SetBinContent(b, 0.);
380  }
381  proc_shape.Scale(p_rate);
382  if (!shape_init) {
383  proc_shape.Copy(shape);
384  shape.Reset();
385  shape_init = true;
386  }
387  shape.Add(&proc_shape);
388  } else if (procs_[i]->pdf()) {
389  if (!procs_[i]->observable()) {
390  RooAbsData const* data_obj = FindMatchingData(procs_[i].get());
391  std::string var_name = "CMS_th1x";
392  if (data_obj) var_name = data_obj->get()->first()->GetName();
393  procs_[i]->set_observable((RooRealVar *)procs_[i]->pdf()->findServer(var_name.c_str()));
394  }
395  TH1::AddDirectory(false);
396  TH1F* tmp = (TH1F*)procs_[i]->observable()->createHistogram("");
397  for (int b = 1; b <= tmp->GetNbinsX(); ++b) {
398  procs_[i]->observable()->setVal(tmp->GetBinCenter(b));
399  tmp->SetBinContent(b, tmp->GetBinWidth(b) * procs_[i]->pdf()->getVal());
400  }
401  TH1F proc_shape = *tmp;
402  delete tmp;
403  RooAbsPdf const* aspdf = dynamic_cast<RooAbsPdf const*>(procs_[i]->pdf());
404  if ((aspdf && !aspdf->selfNormalized()) || (!aspdf)) {
405  // LOGLINE(log(), "Have a pdf that is not selfNormalized");
406  // std::cout << "Integral: " << proc_shape.Integral() << "\n";
407  if (proc_shape.Integral() > 0.) {
408  proc_shape.Scale(1. / proc_shape.Integral());
409  }
410  }
411  for (auto sys_it : lookup[i]) {
412  if (sys_it->type() == "rateParam") {
413  continue; // don't evaluate this for now
414  }
415  double x = params_[sys_it->name()]->val();
416  if (sys_it->asymm()) {
417  p_rate *= logKappaForX(x * sys_it->scale(), sys_it->value_d(),
418  sys_it->value_u());
419  } else {
420  p_rate *= std::pow(sys_it->value_u(), x * sys_it->scale());
421  }
422  }
423  proc_shape.Scale(p_rate);
424  if (!shape_init) {
425  proc_shape.Copy(shape);
426  shape.Reset();
427  shape_init = true;
428  }
429  shape.Add(&proc_shape);
430  }
431  }
432  return shape;
433 }
434 
436  double rate = 0.0;
437  for (unsigned i = 0; i < obs_.size(); ++i) {
438  rate += obs_[i]->rate();
439  }
440  return rate;
441 }
442 
444  TH1F shape;
445  bool shape_init = false;
446 
447  for (unsigned i = 0; i < obs_.size(); ++i) {
448  TH1F proc_shape;
449  double p_rate = obs_[i]->rate();
450  if (obs_[i]->shape()) {
451  proc_shape = obs_[i]->ShapeAsTH1F();
452  } else if (obs_[i]->data()) {
453  TH1F* tmp = dynamic_cast<TH1F*>(obs_[i]->data()->createHistogram(
454  "", *(RooRealVar*)obs_[i]->data()->get()->first()));
455  tmp->Sumw2(false);
456  tmp->SetBinErrorOption(TH1::kPoisson);
457  proc_shape = *tmp;
458  delete tmp;
459  proc_shape.Scale(1. / proc_shape.Integral());
460  }
461  proc_shape.Scale(p_rate);
462  if (!shape_init) {
463  proc_shape.Copy(shape);
464  shape.Reset();
465  shape_init = true;
466  }
467  shape.Add(&proc_shape);
468  }
469  return shape;
470 }
471 
472 void CombineHarvester::ShapeDiff(double x,
473  TH1F * target,
474  TH1 const* nom,
475  TH1 const* low,
476  TH1 const* high,
477  bool linear) {
478  double fx = smoothStepFunc(x);
479  for (int i = 1; i <= target->GetNbinsX(); ++i) {
480  float h = high->GetBinContent(i);
481  float l = low->GetBinContent(i);
482  float n = nom->GetBinContent(i);
483  if (!linear) {
484  float t = target->GetBinContent(i);
485  target->SetBinContent(i, t > 0. ? std::log(t) : -999.);
486  h = (h > 0. && n > 0.) ? std::log(h/n) : 0.;
487  l = (l > 0. && n > 0.) ? std::log(l/n) : 0.;
488  target->SetBinContent(i, target->GetBinContent(i) +
489  0.5 * x * ((h - l) + (h + l) * fx));
490  target->SetBinContent(i, std::exp(target->GetBinContent(i)));
491  } else {
492  target->SetBinContent(i, target->GetBinContent(i) +
493  0.5 * x * ((h - l) + (h + l - 2. * n) * fx));
494  }
495  }
496 }
497 
498 void CombineHarvester::ShapeDiff(double x,
499  TH1F * target,
500  RooDataHist const* nom,
501  RooDataHist const* low,
502  RooDataHist const* high) {
503  double fx = smoothStepFunc(x);
504  for (int i = 1; i <= target->GetNbinsX(); ++i) {
505  high->get(i-1);
506  low->get(i-1);
507  nom->get(i-1);
508  // The RooDataHists are not scaled to unity (unlike in the TH1 case above)
509  // so we have to normalise the bin contents on the fly
510  float h = high->weight() / high->sumEntries();
511  float l = low->weight() / low->sumEntries();
512  float n = nom->weight() / nom->sumEntries();
513  target->SetBinContent(i, target->GetBinContent(i) +
514  0.5 * x * ((h - l) + (h + l - 2. * n) * fx));
515  }
516 }
517 
518 // void CombineHarvester::SetParameters(std::vector<ch::Parameter> params) {
519 // params_.clear();
520 // for (unsigned i = 0; i < params.size(); ++i) {
521 // params_[params[i].name()] = std::make_shared<ch::Parameter>(params[i]);
522 // }
523 // }
524 
525 void CombineHarvester::RenameParameter(std::string const& oldname,
526  std::string const& newname) {
527  auto it = params_.find(oldname);
528  if (it != params_.end()) {
529  params_[newname] = it->second;
530  params_[newname]->set_name(newname);
531  params_.erase(it);
532  }
533 }
534 
536  std::string const& name) const {
537  auto it = params_.find(name);
538  if (it != params_.end()) {
539  return it->second.get();
540  } else {
541  return nullptr;
542  }
543 }
544 
546  auto it = params_.find(name);
547  if (it != params_.end()) {
548  return it->second.get();
549  } else {
550  return nullptr;
551  }
552 }
553 
555  std::vector<ch::Parameter> const& params) {
556  for (unsigned i = 0; i < params.size(); ++i) {
557  auto it = params_.find(params[i].name());
558  if (it != params_.end()) {
559  it->second->set_val(params[i].val());
560  it->second->set_err_d(params[i].err_d());
561  it->second->set_err_u(params[i].err_u());
562  } else {
563  if (verbosity_ >= 1) {
564  LOGLINE(log(), "Parameter " + params[i].name() + " is not defined");
565  }
566  }
567  }
568 }
569 
570 void CombineHarvester::UpdateParameters(RooFitResult const& fit) {
571  for (int i = 0; i < fit.floatParsFinal().getSize(); ++i) {
572  RooRealVar const* var =
573  dynamic_cast<RooRealVar const*>(fit.floatParsFinal().at(i));
574  // check for failed cast here
575  auto it = params_.find(std::string(var->GetName()));
576  if (it != params_.end()) {
577  it->second->set_val(var->getVal());
578  it->second->set_err_d(var->getErrorLo());
579  it->second->set_err_u(var->getErrorHi());
580  } else {
581  if (verbosity_ >= 1) {
582  LOGLINE(log(),
583  "Parameter " + std::string(var->GetName()) + " is not defined");
584  }
585  }
586  }
587 }
588 
589 void CombineHarvester::UpdateParameters(RooFitResult const* fit) {
590  UpdateParameters(*fit);
591 }
592 
593 std::vector<ch::Parameter> CombineHarvester::GetParameters() const {
594  std::vector<ch::Parameter> params;
595  for (auto const& it : params_) {
596  params.push_back(*(it.second));
597  }
598  return params;
599 }
600 
601 void CombineHarvester::VariableRebin(std::vector<double> bins) {
602  // We need to keep a record of the Process rates before we rebin. The
603  // reasoning comes from the following scenario: the user might choose a new
604  // binning which excludes some of the existing bins - thus changing the
605  // process normalisation. This is fine, but we also need to adjust the shape
606  // Systematic entries - both the rebinning and the adjustment of the value_u
607  // and value_d shifts.
608  std::vector<double> prev_proc_rates(procs_.size());
609 
610  // Also hold on the scaled Process hists *after* the rebinning is done - these
611  // are needed to update the associated Systematic entries
612  std::vector<std::unique_ptr<TH1>> scaled_procs(procs_.size());
613 
614  for (unsigned i = 0; i < procs_.size(); ++i) {
615  if (procs_[i]->shape()) {
616  // Get the scaled shape here
617  std::unique_ptr<TH1> copy(procs_[i]->ClonedScaledShape());
618  // shape norm should only be "no_norm_rate"
619  prev_proc_rates[i] = procs_[i]->no_norm_rate();
620  std::unique_ptr<TH1> copy2(copy->Rebin(bins.size()-1, "", &(bins[0])));
621  // The process shape & rate will be reset here
622  procs_[i]->set_shape(std::move(copy2), true);
623  scaled_procs[i] = procs_[i]->ClonedScaledShape();
624  }
625  }
626  for (unsigned i = 0; i < obs_.size(); ++i) {
627  if (obs_[i]->shape()) {
628  std::unique_ptr<TH1> copy(obs_[i]->ClonedScaledShape());
629  std::unique_ptr<TH1> copy2(copy->Rebin(bins.size()-1, "", &(bins[0])));
630  obs_[i]->set_shape(std::move(copy2), true);
631  }
632  }
633  for (unsigned i = 0; i < systs_.size(); ++i) {
634  TH1 const* proc_hist = nullptr;
635  double prev_rate = 0.;
636  for (unsigned j = 0; j < procs_.size(); ++j) {
637  if (MatchingProcess(*(procs_[j]), *(systs_[i].get()))) {
638  proc_hist = scaled_procs[j].get();
639  prev_rate = prev_proc_rates[j];
640  }
641  }
642  if (systs_[i]->shape_u() && systs_[i]->shape_d()) {
643  // These hists will be normalised to unity
644  std::unique_ptr<TH1> copy_u(systs_[i]->ClonedShapeU());
645  std::unique_ptr<TH1> copy_d(systs_[i]->ClonedShapeD());
646 
647  // If we found a matching Process we will scale this back up to their
648  // initial rates
649  if (proc_hist) {
650  copy_u->Scale(systs_[i]->value_u() * prev_rate);
651  copy_d->Scale(systs_[i]->value_d() * prev_rate);
652  }
653  std::unique_ptr<TH1> copy2_u(
654  copy_u->Rebin(bins.size() - 1, "", &(bins[0])));
655  std::unique_ptr<TH1> copy2_d(
656  copy_d->Rebin(bins.size() - 1, "", &(bins[0])));
657  // If we have proc_hist != nullptr, set_shapes will re-calculate value_u
658  // and value_d for us, before scaling the new hists back to unity
659  systs_[i]->set_shapes(std::move(copy2_u), std::move(copy2_d), proc_hist);
660  }
661  }
662 }
663 
664 void CombineHarvester::ZeroBins(double min, double max) {
665  // We need to keep a record of the Process rates before we set bins to 0. The
666  // This is necessary because we need to make sure the process normalisation
667  // and shape systematic entries are correctly adjusted
668  std::vector<double> prev_proc_rates(procs_.size());
669 
670  // Also hold on the scaled Process hists *after* some of the bins have
671  // been set to 0 - these
672  // are needed to update the associated Systematic entries
673  std::vector<std::unique_ptr<TH1>> scaled_procs(procs_.size());
674 
675  for (unsigned i = 0; i < procs_.size(); ++i) {
676  if (procs_[i]->shape()) {
677  // Get the scaled shape here
678  std::unique_ptr<TH1> copy(procs_[i]->ClonedScaledShape());
679  // shape norm should only be "no_norm_rate"
680  prev_proc_rates[i] = procs_[i]->no_norm_rate();
681  for (int j = 1; j <= copy->GetNbinsX();j ++){
682  if(copy->GetBinLowEdge(j) >= min && copy->GetBinLowEdge(j+1) <= max){
683  copy->SetBinContent(j,0.);
684  copy->SetBinError(j,0.);
685  }
686  }
687  // The process shape & rate will be reset here
688  procs_[i]->set_shape(std::move(copy), true);
689  scaled_procs[i] = procs_[i]->ClonedScaledShape();
690  }
691  }
692  for (unsigned i = 0; i < obs_.size(); ++i) {
693  if (obs_[i]->shape()) {
694  std::unique_ptr<TH1> copy(obs_[i]->ClonedScaledShape());
695  for (int j = 1; j <= copy->GetNbinsX();j ++){
696  if(copy->GetBinLowEdge(j) >= min && copy->GetBinLowEdge(j+1) <= max){
697  copy->SetBinContent(j,0.);
698  copy->SetBinError(j,0.);
699  }
700  }
701  obs_[i]->set_shape(std::move(copy), true);
702  }
703  }
704  for (unsigned i = 0; i < systs_.size(); ++i) {
705  TH1 const* proc_hist = nullptr;
706  double prev_rate = 0.;
707  for (unsigned j = 0; j < procs_.size(); ++j) {
708  if (MatchingProcess(*(procs_[j]), *(systs_[i].get()))) {
709  proc_hist = scaled_procs[j].get();
710  prev_rate = prev_proc_rates[j];
711  }
712  }
713  if (systs_[i]->shape_u() && systs_[i]->shape_d()) {
714  // These hists will be normalised to unity
715  std::unique_ptr<TH1> copy_u(systs_[i]->ClonedShapeU());
716  std::unique_ptr<TH1> copy_d(systs_[i]->ClonedShapeD());
717 
718  // If we found a matching Process we will scale this back up to their
719  // initial rates
720  if (proc_hist) {
721  copy_u->Scale(systs_[i]->value_u() * prev_rate);
722  copy_d->Scale(systs_[i]->value_d() * prev_rate);
723  }
724  for (int j = 1; j <= copy_u->GetNbinsX();j ++){
725  if(copy_u->GetBinLowEdge(j) >= min && copy_u->GetBinLowEdge(j+1) <= max){
726  copy_u->SetBinContent(j,0.);
727  copy_u->SetBinError(j,0.);
728  }
729  if(copy_d->GetBinLowEdge(j) >= min && copy_d->GetBinLowEdge(j+1) <= max){
730  copy_d->SetBinContent(j,0.);
731  copy_d->SetBinError(j,0.);
732  }
733  }
734  // If we have proc_hist != nullptr, set_shapes will re-calculate value_u
735  // and value_d for us, before scaling the new hists back to unity
736  systs_[i]->set_shapes(std::move(copy_u), std::move(copy_d), proc_hist);
737  }
738  }
739 }
740 
741 
742 void CombineHarvester::SetPdfBins(unsigned nbins) {
743  for (unsigned i = 0; i < procs_.size(); ++i) {
744  std::set<std::string> binning_vars;
745  if (procs_[i]->pdf()) {
746  RooAbsData const* data_obj = FindMatchingData(procs_[i].get());
747  std::string var_name = "CMS_th1x";
748  if (data_obj) var_name = data_obj->get()->first()->GetName();
749  binning_vars.insert(var_name);
750  }
751  for (auto & it : wspaces_) {
752  for (auto & var : binning_vars) {
753  RooRealVar* avar =
754  dynamic_cast<RooRealVar*>(it.second->var(var.c_str()));
755  if (avar) avar->setBins(nbins);
756  }
757  }
758  }
759 }
760 
761 // This implementation "borowed" from
762 // HiggsAnalysis/CombinedLimit/src/ProcessNormalization.cc
763 double CombineHarvester::logKappaForX(double x, double k_low,
764  double k_high) const {
765  if (k_high == 0. || k_low == 0.) {
766  if (verbosity_ >= 1) {
767  LOGLINE(log(), "Have kappa=0.0 (scaling ill-defined), returning 1.0");
768  }
769  return 1.;
770  }
771  if (fabs(x) >= 0.5)
772  return (x >= 0 ? std::pow(k_high, x) : std::pow(k_low, -1.0 * x));
773  // interpolate between log(kappaHigh) and -log(kappaLow)
774  // logKappa(x) = avg + halfdiff * h(2x)
775  // where h(x) is the 3th order polynomial
776  // h(x) = (3 x^5 - 10 x^3 + 15 x)/8;
777  // chosen so that h(x) satisfies the following:
778  // h (+/-1) = +/-1
779  // h'(+/-1) = 0
780  // h"(+/-1) = 0
781  double logKhi = std::log(k_high);
782  double logKlo = -std::log(k_low);
783  double avg = 0.5 * (logKhi + logKlo), halfdiff = 0.5 * (logKhi - logKlo);
784  double twox = x + x, twox2 = twox * twox;
785  double alpha = 0.125 * twox * (twox2 * (3 * twox2 - 10.) + 15.);
786  double ret = avg + alpha * halfdiff;
787  return std::exp(ret * x);
788 }
789 
790 void CombineHarvester::SetGroup(std::string const& name,
791  std::vector<std::string> const& patterns) {
792  std::vector<boost::regex> rgx;
793  for (auto const& pt : patterns) rgx.emplace_back(pt);
794  for (auto it = params_.begin(); it != params_.end(); ++it) {
795  std::string par = it->first;
796  auto & groups = it->second->groups();
797  if (groups.count(name)) continue;
798  if (ch::contains_rgx(rgx, par)) {
799  groups.insert(name);
800  };
801  }
802 }
803 
804 void CombineHarvester::RemoveGroup(std::string const& name,
805  std::vector<std::string> const& patterns) {
806  std::vector<boost::regex> rgx;
807  for (auto const& pt : patterns) rgx.emplace_back(pt);
808  for (auto it = params_.begin(); it != params_.end(); ++it) {
809  std::string par = it->first;
810  auto & groups = it->second->groups();
811  if (!groups.count(name)) continue;
812  if (ch::contains_rgx(rgx, par)) {
813  groups.erase(name);
814  };
815  }
816 }
817 
818 void CombineHarvester::RenameGroup(std::string const& oldname,
819  std::string const& newname) {
820  for (auto it = params_.begin(); it != params_.end(); ++it) {
821  auto & groups = it->second->groups();
822  if (groups.count(oldname)) {
823  groups.erase(oldname);
824  groups.insert(newname);
825  }
826  }
827 }
828 
829 void CombineHarvester::AddDatacardLineAtEnd(std::string const& line) {
830  post_lines_.push_back(line);
831 }
832 
834  post_lines_.clear();
835 }
836 }
#define LOGLINE(x, y)
Definition: Logging.h:11
#define FNERROR(x)
Definition: Logging.h:9
TH2F GetRateCovariance(RooFitResult const &fit, unsigned n_samples)
void ZeroBins(double min, double max)
void RenameParameter(std::string const &oldname, std::string const &newname)
TH2F GetRateCorrelation(RooFitResult const &fit, unsigned n_samples)
CombineHarvester & process(std::vector< std::string > const &vec, bool cond=true)
void AddDatacardLineAtEnd(std::string const &line)
Add a line of text at the end of all datacards.
void SetGroup(std::string const &name, std::vector< std::string > const &patterns)
Add parameters to a given group.
void RenameGroup(std::string const &oldname, std::string const &newname)
Rename a nuisance parameter group.
CombineHarvester & bin(std::vector< std::string > const &vec, bool cond=true)
void UpdateParameters(std::vector< ch::Parameter > const &params)
std::set< std::string > bin_set()
void VariableRebin(std::vector< double > bins)
void RemoveGroup(std::string const &name, std::vector< std::string > const &patterns)
Remove parameters to a given group.
void ClearDatacardLinesAtEnd()
Clear all added datacard lines.
std::vector< ch::Parameter > GetParameters() const
void SetPdfBins(unsigned nbins)
CombineHarvester cp()
Creates and returns a shallow copy of the CombineHarvester instance.
ch::Parameter const * GetParameter(std::string const &name) const
std::string const & name() const
Definition: Systematic.h:22
Definition: Algorithm.h:10
bool contains_rgx(const std::vector< boost::regex > &r, T p)
Definition: Algorithm.h:34
bool any_of(const Range &r, Predicate p)
Definition: Algorithm.h:17
bool MatchingProcess(T const &first, U const &second)
Definition: Utilities.h:43