9 #include "boost/lexical_cast.hpp"
10 #include "boost/algorithm/string.hpp"
11 #include "boost/range/algorithm_ext/erase.hpp"
12 #include "boost/range/algorithm/find.hpp"
13 #include "boost/format.hpp"
14 #include "TDirectory.h"
32 CombineHarvester::ProcSystMap CombineHarvester::GenerateProcSystMap() {
33 ProcSystMap lookup(procs_.size());
34 for (
unsigned i = 0; i < systs_.size(); ++i) {
35 for (
unsigned j = 0; j < procs_.size(); ++j) {
37 lookup[j].push_back(systs_[i].get());
45 auto lookup = GenerateProcSystMap();
47 for (
auto param_it : params_) {
48 double backup = param_it.second->val();
49 param_it.second->set_val(backup+param_it.second->err_d());
50 double rate_d = this->GetRateInternal(lookup, param_it.first);
51 param_it.second->set_val(backup+param_it.second->err_u());
52 double rate_u = this->GetRateInternal(lookup, param_it.first);
53 double err = std::fabs(rate_u-rate_d) / 2.0;
55 param_it.second->set_val(backup);
57 return std::sqrt(err_sq);
67 auto lookup = GenerateProcSystMap();
68 double rate = GetRateInternal(lookup);
76 RooArgList
const& rands = fit.randomizePars();
80 int n_pars = rands.getSize();
81 std::vector<RooRealVar const*> r_vec(n_pars,
nullptr);
82 std::vector<ch::Parameter*> p_vec(n_pars,
nullptr);
83 for (
unsigned n = 0; n < p_vec.size(); ++n) {
84 r_vec[n] = dynamic_cast<RooRealVar const*>(rands.at(n));
88 for (
unsigned i = 0; i < n_samples; ++i) {
91 for (
int n = 0; n < n_pars; ++n) {
92 if (p_vec[n]) p_vec[n]->set_val(r_vec[n]->getVal());
95 double rand_rate = this->GetRateInternal(lookup);
96 double err = std::fabs(rand_rate-rate);
100 return std::sqrt(err_sq/
double(n_samples));
104 auto lookup = GenerateProcSystMap();
106 for (
int i = 1; i <= shape.GetNbinsX(); ++i) {
107 shape.SetBinError(i, 0.0);
109 for (
auto param_it : params_) {
110 double backup = param_it.second->val();
111 param_it.second->set_val(backup+param_it.second->err_d());
112 TH1F shape_d = this->GetShapeInternal(lookup, param_it.first);
113 param_it.second->set_val(backup+param_it.second->err_u());
114 TH1F shape_u = this->GetShapeInternal(lookup, param_it.first);
115 for (
int i = 1; i <= shape.GetNbinsX(); ++i) {
117 std::fabs(shape_u.GetBinContent(i) - shape_d.GetBinContent(i)) / 2.0;
118 shape.SetBinError(i, err*err + shape.GetBinError(i));
120 param_it.second->set_val(backup);
122 for (
int i = 1; i <= shape.GetNbinsX(); ++i) {
123 shape.SetBinError(i, std::sqrt(shape.GetBinError(i)));
129 unsigned n_samples) {
134 unsigned n_samples) {
135 auto lookup = GenerateProcSystMap();
136 TH1F shape = GetShapeInternal(lookup);
137 for (
int i = 1; i <= shape.GetNbinsX(); ++i) {
138 shape.SetBinError(i, 0.0);
145 RooArgList
const& rands = fit.randomizePars();
149 int n_pars = rands.getSize();
150 std::vector<RooRealVar const*> r_vec(n_pars,
nullptr);
151 std::vector<ch::Parameter*> p_vec(n_pars,
nullptr);
152 for (
unsigned n = 0; n < p_vec.size(); ++n) {
153 r_vec[n] = dynamic_cast<RooRealVar const*>(rands.at(n));
158 for (
unsigned i = 0; i < n_samples; ++i) {
161 for (
int n = 0; n < n_pars; ++n) {
162 if (p_vec[n]) p_vec[n]->set_val(r_vec[n]->getVal());
165 TH1F rand_shape = this->GetShapeInternal(lookup);
166 for (
int i = 1; i <= shape.GetNbinsX(); ++i) {
168 std::fabs(rand_shape.GetBinContent(i) - shape.GetBinContent(i));
169 shape.SetBinError(i, err*err + shape.GetBinError(i));
172 for (
int i = 1; i <= shape.GetNbinsX(); ++i) {
173 shape.SetBinError(i, std::sqrt(shape.GetBinError(i)/double(n_samples)));
180 unsigned n_samples) {
181 auto lookup = GenerateProcSystMap();
183 unsigned n = procs_.size();
184 TH1F nom(
"nominal",
"nominal", n, 0, n);
185 TH2F res(
"covariance",
"covariance", n, 0, n, n, 0, n);
187 std::vector<CombineHarvester> ch_procs;
188 std::vector<std::string> labels;
189 unsigned nbins = this->
bin_set().size();
191 for (
unsigned i = 0; i < procs_.size(); ++i) {
193 this->
cp().
bin({procs_[i]->bin()}).
process({procs_[i]->process()}));
195 labels.push_back(procs_[i]->
bin() +
"," + procs_[i]->
process());
197 labels.push_back(procs_[i]->
process());
200 for (
unsigned i = 0; i < procs_.size(); ++i) {
201 nom.SetBinContent(i + 1, ch_procs[i].
GetRate());
207 RooArgList
const& rands = fit.randomizePars();
211 int n_pars = rands.getSize();
212 std::vector<RooRealVar const*> r_vec(n_pars,
nullptr);
213 std::vector<ch::Parameter*> p_vec(n_pars,
nullptr);
214 for (
unsigned n = 0; n < p_vec.size(); ++n) {
215 r_vec[n] = dynamic_cast<RooRealVar const*>(rands.at(n));
220 for (
unsigned rnd = 0; rnd < n_samples; ++rnd) {
223 for (
int n = 0; n < n_pars; ++n) {
224 if (p_vec[n]) p_vec[n]->set_val(r_vec[n]->getVal());
227 for (
int i = 1; i <= nom.GetNbinsX(); ++i) {
228 for (
int j = 1; j <= nom.GetNbinsX(); ++j) {
230 int y = nom.GetNbinsX() - (i - 1);
231 res.GetXaxis()->SetBinLabel(x, labels[j - 1].c_str());
232 res.GetYaxis()->SetBinLabel(y, labels[i - 1].c_str());
234 x, y, res.GetBinContent(x, y) +
235 (ch_procs[i - 1].GetRate() - nom.GetBinContent(i)) *
236 (ch_procs[j - 1].
GetRate() - nom.GetBinContent(j)));
241 for (
int i = 1; i <= nom.GetNbinsX(); ++i) {
242 for (
int j = 1; j <= nom.GetNbinsX(); ++j) {
244 int y = nom.GetNbinsX() - (i - 1);
245 res.SetBinContent(x, y, res.GetBinContent(x, y) / double(n_samples));
253 unsigned n_samples) {
256 res.SetName(
"correlation");
257 res.SetTitle(
"correlation");
258 for (
int i = 1; i <= cov.GetNbinsX(); ++i) {
259 for (
int j = 1; j <= cov.GetNbinsX(); ++j) {
261 int y = cov.GetNbinsX() - (i - 1);
264 cov.GetBinContent(x, y) /
265 (std::sqrt(cov.GetBinContent(i, cov.GetNbinsX() - (i - 1))) *
266 std::sqrt(cov.GetBinContent(j, cov.GetNbinsX() - (j - 1)))));
273 auto lookup = GenerateProcSystMap();
274 return GetRateInternal(lookup);
278 auto lookup = GenerateProcSystMap();
279 return GetShapeInternal(lookup);
282 double CombineHarvester::GetRateInternal(ProcSystMap
const& lookup,
283 std::string
const& single_sys) {
286 for (
unsigned i = 0; i < procs_.size(); ++i) {
287 double p_rate = procs_[i]->rate();
293 if (single_sys !=
"" && !procs_[i]->pdf()) {
295 return sys->
name() == single_sys;
298 for (
auto sys_it : lookup[i]) {
299 if (sys_it->type() ==
"rateParam") {
302 double x = params_[sys_it->name()]->val();
303 if (sys_it->asymm()) {
304 p_rate *= logKappaForX(x * sys_it->scale(), sys_it->value_d(),
307 p_rate *= std::pow(sys_it->value_u(), x * sys_it->scale());
315 TH1F CombineHarvester::GetShapeInternal(ProcSystMap
const& lookup,
316 std::string
const& single_sys) {
318 bool shape_init =
false;
320 for (
unsigned i = 0; i < procs_.size(); ++i) {
324 if (single_sys !=
"" && !procs_[i]->pdf()) {
325 if (!
ch::any_of(lookup[i], [&](Systematic
const* sys) {
326 return sys->name() == single_sys;
330 double p_rate = procs_[i]->rate();
331 if (procs_[i]->shape() || procs_[i]->
data()) {
332 TH1F proc_shape = procs_[i]->ShapeAsTH1F();
333 for (
auto sys_it : lookup[i]) {
334 if (sys_it->type() ==
"rateParam") {
337 auto param_it = params_.find(sys_it->name());
338 if (param_it == params_.end()) {
339 throw std::runtime_error(
340 FNERROR(
"Parameter " + sys_it->name() +
341 " not found in CombineHarvester instance"));
343 double x = param_it->second->val();
344 if (sys_it->asymm()) {
345 p_rate *= logKappaForX(x * sys_it->scale(), sys_it->value_d(),
347 if (sys_it->type() ==
"shape" || sys_it->type() ==
"shapeN2" ||
348 sys_it->type() ==
"shapeU") {
350 if (sys_it->type() ==
"shapeN2") linear =
false;
351 if (sys_it->shape_u() && sys_it->shape_d()) {
352 ShapeDiff(x * sys_it->scale(), &proc_shape, procs_[i]->shape(),
353 sys_it->shape_d(), sys_it->shape_u(), linear);
355 if (sys_it->data_u() && sys_it->data_d()) {
356 RooDataHist
const* nom =
357 dynamic_cast<RooDataHist const*>(procs_[i]->
data());
359 ShapeDiff(x * sys_it->scale(), &proc_shape, nom,
360 sys_it->data_d(), sys_it->data_u());
365 p_rate *= std::pow(sys_it->value_u(), x * sys_it->scale());
368 for (
int b = 1; b <= proc_shape.GetNbinsX(); ++b) {
369 if (proc_shape.GetBinContent(b) < 0.) proc_shape.SetBinContent(b, 0.);
371 proc_shape.Scale(p_rate);
373 proc_shape.Copy(shape);
377 shape.Add(&proc_shape);
378 }
else if (procs_[i]->pdf()) {
379 if (!procs_[i]->observable()) {
380 RooAbsData
const* data_obj = FindMatchingData(procs_[i].get());
381 std::string var_name =
"CMS_th1x";
382 if (data_obj) var_name = data_obj->get()->first()->GetName();
383 procs_[i]->set_observable((RooRealVar *)procs_[i]->pdf()->findServer(var_name.c_str()));
385 TH1::AddDirectory(
false);
386 TH1F* tmp = (TH1F*)procs_[i]->observable()->createHistogram(
"");
387 for (
int b = 1; b <= tmp->GetNbinsX(); ++b) {
388 procs_[i]->observable()->setVal(tmp->GetBinCenter(b));
389 tmp->SetBinContent(b, tmp->GetBinWidth(b) * procs_[i]->pdf()->getVal());
391 TH1F proc_shape = *tmp;
393 RooAbsPdf
const* aspdf = dynamic_cast<RooAbsPdf const*>(procs_[i]->pdf());
394 if ((aspdf && !aspdf->selfNormalized()) || (!aspdf)) {
397 if (proc_shape.Integral() > 0.) {
398 proc_shape.Scale(1. / proc_shape.Integral());
401 for (
auto sys_it : lookup[i]) {
402 if (sys_it->type() ==
"rateParam") {
405 double x = params_[sys_it->name()]->val();
406 if (sys_it->asymm()) {
407 p_rate *= logKappaForX(x * sys_it->scale(), sys_it->value_d(),
410 p_rate *= std::pow(sys_it->value_u(), x * sys_it->scale());
413 proc_shape.Scale(p_rate);
415 proc_shape.Copy(shape);
419 shape.Add(&proc_shape);
427 for (
unsigned i = 0; i < obs_.size(); ++i) {
428 rate += obs_[i]->rate();
435 bool shape_init =
false;
437 for (
unsigned i = 0; i < obs_.size(); ++i) {
439 double p_rate = obs_[i]->rate();
440 if (obs_[i]->shape()) {
441 proc_shape = obs_[i]->ShapeAsTH1F();
442 }
else if (obs_[i]->
data()) {
443 TH1F* tmp = dynamic_cast<TH1F*>(obs_[i]->
data()->createHistogram(
444 "", *(RooRealVar*)obs_[i]->
data()->get()->first()));
446 tmp->SetBinErrorOption(TH1::kPoisson);
449 proc_shape.Scale(1. / proc_shape.Integral());
451 proc_shape.Scale(p_rate);
453 proc_shape.Copy(shape);
457 shape.Add(&proc_shape);
462 void CombineHarvester::ShapeDiff(
double x,
468 double fx = smoothStepFunc(x);
469 for (
int i = 1; i <= target->GetNbinsX(); ++i) {
470 float h = high->GetBinContent(i);
471 float l = low->GetBinContent(i);
472 float n = nom->GetBinContent(i);
474 float t = target->GetBinContent(i);
475 target->SetBinContent(i, t > 0. ? std::log(t) : -999.);
476 h = (h > 0. && n > 0.) ? std::log(h/n) : 0.;
477 l = (l > 0. && n > 0.) ? std::log(l/n) : 0.;
478 target->SetBinContent(i, target->GetBinContent(i) +
479 0.5 * x * ((h - l) + (h + l) * fx));
480 target->SetBinContent(i, std::exp(target->GetBinContent(i)));
482 target->SetBinContent(i, target->GetBinContent(i) +
483 0.5 * x * ((h - l) + (h + l - 2. * n) * fx));
488 void CombineHarvester::ShapeDiff(
double x,
490 RooDataHist
const* nom,
491 RooDataHist
const* low,
492 RooDataHist
const* high) {
493 double fx = smoothStepFunc(x);
494 for (
int i = 1; i <= target->GetNbinsX(); ++i) {
500 float h = high->weight() / high->sumEntries();
501 float l = low->weight() / low->sumEntries();
502 float n = nom->weight() / nom->sumEntries();
503 target->SetBinContent(i, target->GetBinContent(i) +
504 0.5 * x * ((h - l) + (h + l - 2. * n) * fx));
516 std::string
const& newname) {
517 auto it = params_.find(oldname);
518 if (it != params_.end()) {
519 params_[newname] = it->second;
520 params_[newname]->set_name(newname);
526 std::string
const& name)
const {
527 auto it = params_.find(name);
528 if (it != params_.end()) {
529 return it->second.get();
536 auto it = params_.find(name);
537 if (it != params_.end()) {
538 return it->second.get();
545 std::vector<ch::Parameter>
const& params) {
546 for (
unsigned i = 0; i < params.size(); ++i) {
547 auto it = params_.find(params[i].name());
548 if (it != params_.end()) {
549 it->second->set_val(params[i].val());
550 it->second->set_err_d(params[i].err_d());
551 it->second->set_err_u(params[i].err_u());
553 if (verbosity_ >= 1) {
554 LOGLINE(log(),
"Parameter " + params[i].name() +
" is not defined");
561 for (
int i = 0; i < fit.floatParsFinal().getSize(); ++i) {
562 RooRealVar
const* var =
563 dynamic_cast<RooRealVar const*>(fit.floatParsFinal().at(i));
565 auto it = params_.find(std::string(var->GetName()));
566 if (it != params_.end()) {
567 it->second->set_val(var->getVal());
568 it->second->set_err_d(var->getErrorLo());
569 it->second->set_err_u(var->getErrorHi());
571 if (verbosity_ >= 1) {
573 "Parameter " + std::string(var->GetName()) +
" is not defined");
584 std::vector<ch::Parameter> params;
585 for (
auto const& it : params_) {
586 params.push_back(*(it.second));
598 std::vector<double> prev_proc_rates(procs_.size());
602 std::vector<std::unique_ptr<TH1>> scaled_procs(procs_.size());
604 for (
unsigned i = 0; i < procs_.size(); ++i) {
605 if (procs_[i]->shape()) {
607 std::unique_ptr<TH1> copy(procs_[i]->ClonedScaledShape());
609 prev_proc_rates[i] = procs_[i]->no_norm_rate();
610 std::unique_ptr<TH1> copy2(copy->Rebin(bins.size()-1,
"", &(bins[0])));
612 procs_[i]->set_shape(std::move(copy2),
true);
613 scaled_procs[i] = procs_[i]->ClonedScaledShape();
616 for (
unsigned i = 0; i < obs_.size(); ++i) {
617 if (obs_[i]->shape()) {
618 std::unique_ptr<TH1> copy(obs_[i]->ClonedScaledShape());
619 std::unique_ptr<TH1> copy2(copy->Rebin(bins.size()-1,
"", &(bins[0])));
620 obs_[i]->set_shape(std::move(copy2),
true);
623 for (
unsigned i = 0; i < systs_.size(); ++i) {
624 TH1
const* proc_hist =
nullptr;
625 double prev_rate = 0.;
626 for (
unsigned j = 0; j < procs_.size(); ++j) {
628 proc_hist = scaled_procs[j].get();
629 prev_rate = prev_proc_rates[j];
632 if (systs_[i]->shape_u() && systs_[i]->shape_d()) {
634 std::unique_ptr<TH1> copy_u(systs_[i]->ClonedShapeU());
635 std::unique_ptr<TH1> copy_d(systs_[i]->ClonedShapeD());
640 copy_u->Scale(systs_[i]->value_u() * prev_rate);
641 copy_d->Scale(systs_[i]->value_d() * prev_rate);
643 std::unique_ptr<TH1> copy2_u(
644 copy_u->Rebin(bins.size() - 1,
"", &(bins[0])));
645 std::unique_ptr<TH1> copy2_d(
646 copy_d->Rebin(bins.size() - 1,
"", &(bins[0])));
649 systs_[i]->set_shapes(std::move(copy2_u), std::move(copy2_d), proc_hist);
658 std::vector<double> prev_proc_rates(procs_.size());
663 std::vector<std::unique_ptr<TH1>> scaled_procs(procs_.size());
665 for (
unsigned i = 0; i < procs_.size(); ++i) {
666 if (procs_[i]->shape()) {
668 std::unique_ptr<TH1> copy(procs_[i]->ClonedScaledShape());
670 prev_proc_rates[i] = procs_[i]->no_norm_rate();
671 for (
int j = 1; j <= copy->GetNbinsX();j ++){
672 if(copy->GetBinLowEdge(j) >= min && copy->GetBinLowEdge(j+1) <= max){
673 copy->SetBinContent(j,0.);
674 copy->SetBinError(j,0.);
678 procs_[i]->set_shape(std::move(copy),
true);
679 scaled_procs[i] = procs_[i]->ClonedScaledShape();
682 for (
unsigned i = 0; i < obs_.size(); ++i) {
683 if (obs_[i]->shape()) {
684 std::unique_ptr<TH1> copy(obs_[i]->ClonedScaledShape());
685 for (
int j = 1; j <= copy->GetNbinsX();j ++){
686 if(copy->GetBinLowEdge(j) >= min && copy->GetBinLowEdge(j+1) <= max){
687 copy->SetBinContent(j,0.);
688 copy->SetBinError(j,0.);
691 obs_[i]->set_shape(std::move(copy),
true);
694 for (
unsigned i = 0; i < systs_.size(); ++i) {
695 TH1
const* proc_hist =
nullptr;
696 double prev_rate = 0.;
697 for (
unsigned j = 0; j < procs_.size(); ++j) {
699 proc_hist = scaled_procs[j].get();
700 prev_rate = prev_proc_rates[j];
703 if (systs_[i]->shape_u() && systs_[i]->shape_d()) {
705 std::unique_ptr<TH1> copy_u(systs_[i]->ClonedShapeU());
706 std::unique_ptr<TH1> copy_d(systs_[i]->ClonedShapeD());
711 copy_u->Scale(systs_[i]->value_u() * prev_rate);
712 copy_d->Scale(systs_[i]->value_d() * prev_rate);
714 for (
int j = 1; j <= copy_u->GetNbinsX();j ++){
715 if(copy_u->GetBinLowEdge(j) >= min && copy_u->GetBinLowEdge(j+1) <= max){
716 copy_u->SetBinContent(j,0.);
717 copy_u->SetBinError(j,0.);
719 if(copy_d->GetBinLowEdge(j) >= min && copy_d->GetBinLowEdge(j+1) <= max){
720 copy_d->SetBinContent(j,0.);
721 copy_d->SetBinError(j,0.);
726 systs_[i]->set_shapes(std::move(copy_u), std::move(copy_d), proc_hist);
733 for (
unsigned i = 0; i < procs_.size(); ++i) {
734 std::set<std::string> binning_vars;
735 if (procs_[i]->pdf()) {
736 RooAbsData
const* data_obj = FindMatchingData(procs_[i].get());
737 std::string var_name =
"CMS_th1x";
738 if (data_obj) var_name = data_obj->get()->first()->GetName();
739 binning_vars.insert(var_name);
741 for (
auto & it : wspaces_) {
742 for (
auto & var : binning_vars) {
744 dynamic_cast<RooRealVar*>(it.second->var(var.c_str()));
745 if (avar) avar->setBins(nbins);
753 double CombineHarvester::logKappaForX(
double x,
double k_low,
754 double k_high)
const {
755 if (k_high == 0. || k_low == 0.) {
756 if (verbosity_ >= 1) {
757 LOGLINE(log(),
"Have kappa=0.0 (scaling ill-defined), returning 1.0");
762 return (x >= 0 ? std::pow(k_high, x) : std::pow(k_low, -1.0 * x));
771 double logKhi = std::log(k_high);
772 double logKlo = -std::log(k_low);
773 double avg = 0.5 * (logKhi + logKlo), halfdiff = 0.5 * (logKhi - logKlo);
774 double twox = x + x, twox2 = twox * twox;
775 double alpha = 0.125 * twox * (twox2 * (3 * twox2 - 10.) + 15.);
776 double ret = avg + alpha * halfdiff;
777 return std::exp(ret * x);
781 std::vector<std::string>
const& patterns) {
782 std::vector<boost::regex> rgx;
783 for (
auto const& pt : patterns) rgx.emplace_back(pt);
784 for (
auto it = params_.begin(); it != params_.end(); ++it) {
785 std::string par = it->first;
786 auto & groups = it->second->groups();
787 if (groups.count(name))
continue;
795 std::vector<std::string>
const& patterns) {
796 std::vector<boost::regex> rgx;
797 for (
auto const& pt : patterns) rgx.emplace_back(pt);
798 for (
auto it = params_.begin(); it != params_.end(); ++it) {
799 std::string par = it->first;
800 auto & groups = it->second->groups();
801 if (!groups.count(name))
continue;
809 std::string
const& newname) {
810 for (
auto it = params_.begin(); it != params_.end(); ++it) {
811 auto & groups = it->second->groups();
812 if (groups.count(oldname)) {
813 groups.erase(oldname);
814 groups.insert(newname);
820 post_lines_.push_back(line);