12 #include "boost/lexical_cast.hpp"
13 #include "boost/algorithm/string.hpp"
14 #include "boost/format.hpp"
15 #include "boost/regex.hpp"
16 #include "boost/filesystem.hpp"
17 #include "TDirectory.h"
19 #include "RooRealVar.h"
20 #include "RooFormulaVar.h"
21 #include "RooCategory.h"
31 #include "CombineHarvester/CombineTools/interface/zstr.hpp"
37 std::string parse_rules) {
38 boost::replace_all(parse_rules,
"$ANALYSIS",
"(?<ANALYSIS>[\\w\\.]+)");
39 boost::replace_all(parse_rules,
"$ERA",
"(?<ERA>[\\w\\.]+)");
40 boost::replace_all(parse_rules,
"$CHANNEL",
"(?<CHANNEL>[\\w\\.]+)");
41 boost::replace_all(parse_rules,
"$BINID",
"(?<BINID>[\\w\\.]+)");
42 boost::replace_all(parse_rules,
"$MASS",
"(?<MASS>[\\w\\.]+)");
43 boost::regex rgx(parse_rules);
44 boost::smatch matches;
45 boost::regex_search(filename, matches, rgx);
47 matches.str(
"ANALYSIS"),
49 matches.str(
"CHANNEL"),
50 matches.str(
"BINID").length() ?
51 boost::lexical_cast<int>(matches.str(
"BINID")) : 0,
57 std::string
const& analysis,
58 std::string
const&
era,
61 std::string
const& mass) {
62 TH1::AddDirectory(kFALSE);
69 std::vector<std::vector<std::string>> words;
70 for (
unsigned i = 0; i < lines.size(); ++i) {
71 boost::trim(lines[i]);
72 if (lines[i].size() == 0)
continue;
73 if (lines[i].at(0) ==
'#' || lines[i].at(0) ==
'-')
continue;
74 words.push_back(std::vector<std::string>());
75 boost::split(words.back(), lines[i], boost::is_any_of(
"\t "),
76 boost::token_compress_on);
79 std::vector<HistMapping> hist_mapping;
81 std::map<std::string, std::shared_ptr<TFile>> file_store;
82 std::map<std::string, std::shared_ptr<RooWorkspace>> ws_store;
84 bool start_nuisance_scan =
false;
90 std::shared_ptr<ch::Observation> single_obs =
nullptr;
91 std::set<std::string> bin_names;
94 std::map<std::string, std::set<std::string>> groups;
98 for (
unsigned i = 0; i < words.size(); ++i) {
100 if (words[i].size() <= 1)
continue;
104 if (boost::iequals(words[i][0],
"shapes") && words[i].size() >= 5) {
112 std::size_t slash = filename.find_last_of(
'/');
113 if (slash != filename.npos) {
114 dc_path = filename.substr(0, slash) +
"/" + words[i][3];
116 dc_path = words[i][3];
118 if (!file_store.count(dc_path))
119 file_store[dc_path] = std::make_shared<TFile>(dc_path.c_str());
120 mapping.
file = file_store.at(dc_path);
122 if (words[i].size() > 5) mapping.
syst_pattern = words[i][5];
124 if (mapping.
IsPdf()) {
125 std::string store_key =
127 if (!ws_store.count(store_key)) {
129 std::shared_ptr<RooWorkspace> ptr(dynamic_cast<RooWorkspace*>(
132 throw std::runtime_error(
FNERROR(
"Workspace not found in file"));
134 ws_store[store_key] = ptr;
136 mapping.
ws = SetupWorkspace(*(ws_store[store_key]),
true);
139 std::string store_key =
141 if (!ws_store.count(store_key)) {
143 std::shared_ptr<RooWorkspace> ptr(dynamic_cast<RooWorkspace*>(
146 throw std::runtime_error(
FNERROR(
"Workspace not found in file"));
148 ws_store[store_key] = ptr;
150 mapping.
sys_ws = SetupWorkspace(*(ws_store[store_key]),
true);
156 if (boost::iequals(words[i][0],
"shapes") && words[i].size() == 4 &&
157 boost::iequals(words[i][3],
"FAKE")) {
166 for (
unsigned i = 0; i < words.size(); ++i) {
167 if (words[i].size() >= 3 &&
168 boost::iequals(words[i][1],
"extArg")) {
169 if (verbosity_ > 1) {
170 FNLOG(log()) <<
"Processing extArg line:\n";
171 for (
auto const& str : words[i]) {
172 log() << str <<
"\t";
177 bool has_range = words[i].size() == 4 && words[i][3][0] ==
'[';
178 std::string param_name = words[i][0];
179 bool is_wsp_rateparam =
false;
181 boost::lexical_cast<double>(words[i][2]);
182 }
catch (boost::bad_lexical_cast &) {
183 is_wsp_rateparam =
true;
185 if ((!is_wsp_rateparam) && (words[i].size() == 3 || has_range)) {
187 param_name, boost::lexical_cast<double>(words[i][2]),
true);
191 std::vector<std::string> tokens;
192 boost::split(tokens, words[i][3], boost::is_any_of(
"[],"));
193 if (tokens.size() == 4) {
194 param->
set_range_d(boost::lexical_cast<double>(tokens[1]));
195 param->
set_range_u(boost::lexical_cast<double>(tokens[2]));
196 FNLOGC(log(), verbosity_ > 1) <<
"Setting parameter range to " << words[i][2];
199 }
else if (words[i].size() == 3 && is_wsp_rateparam) {
200 SetupRateParamWspObj(param_name, words[i][2],
true);
206 for (
unsigned i = 0; i < words.size(); ++i) {
208 if (words[i].size() <= 1)
continue;
215 if ( boost::iequals(words[i][0],
"observation") &&
216 boost::iequals(words[i-1][0],
"bin") &&
217 words[i].size() == words[i-1].size()) {
218 for (
unsigned p = 1; p < words[i].size(); ++p) {
219 auto obs = std::make_shared<Observation>();
220 obs->set_bin(words[i-1][p]);
221 obs->set_rate(boost::lexical_cast<double>(words[i][p]));
228 LoadShapes(obs.get(), hist_mapping);
235 if (boost::iequals(words[i][0],
"observation") &&
236 !boost::iequals(words[i-1][0],
"bin") &&
237 words[i].size() == 2 &&
238 single_obs.get() ==
nullptr) {
239 for (
unsigned p = 1; p < words[i].size(); ++p) {
240 single_obs = std::make_shared<Observation>();
241 single_obs->set_bin(
"");
242 single_obs->set_rate(boost::lexical_cast<double>(words[i][p]));
244 single_obs->set_era(
era);
245 single_obs->set_channel(
channel);
246 single_obs->set_bin_id(
bin_id);
247 single_obs->set_mass(
mass);
258 if ( boost::iequals(words[i][0],
"rate") &&
259 boost::iequals(words[i-1][0],
"process") &&
260 boost::iequals(words[i-2][0],
"process") &&
261 boost::iequals(words[i-3][0],
"bin") &&
262 words[i].size() == words[i-1].size() &&
263 words[i].size() == words[i-2].size() &&
264 words[i].size() == words[i-3].size()) {
265 for (
unsigned p = 1; p < words[i].size(); ++p) {
266 auto proc = std::make_shared<Process>();
267 proc->set_bin(words[i-3][p]);
268 bin_names.insert(words[i-3][p]);
270 int process_id = boost::lexical_cast<int>(words[i-2][p]);
271 proc->set_signal(process_id <= 0);
272 proc->set_process(words[i-1][p]);
273 }
catch(boost::bad_lexical_cast &) {
274 int process_id = boost::lexical_cast<int>(words[i-1][p]);
275 proc->set_signal(process_id <= 0);
276 proc->set_process(words[i-2][p]);
278 proc->set_rate(boost::lexical_cast<double>(words[i][p]));
283 proc->set_mass(
mass);
285 LoadShapes(proc.get(), hist_mapping);
287 procs_.push_back(proc);
290 start_nuisance_scan =
true;
294 if (start_nuisance_scan && words[i].size() >= 4) {
295 if (boost::iequals(words[i][1],
"param")) {
296 std::string param_name = words[i][0];
297 if (!params_.count(param_name))
298 params_[param_name] = std::make_shared<Parameter>(
Parameter());
299 Parameter * param = params_.at(param_name).get();
301 param->
set_val(boost::lexical_cast<double>(words[i][2]));
302 std::size_t slash_pos = words[i][3].find(
"/");
303 if (slash_pos != words[i][3].npos) {
305 boost::lexical_cast<double>(words[i][3].substr(0, slash_pos)));
307 boost::lexical_cast<double>(words[i][3].substr(slash_pos+1)));
309 param->
set_err_u(+1.0 * boost::lexical_cast<double>(words[i][3]));
310 param->
set_err_d(-1.0 * boost::lexical_cast<double>(words[i][3]));
312 if (words[i].size() >= 5) {
314 std::vector<std::string> tokens;
315 boost::split(tokens, words[i][4], boost::is_any_of(
"[],"));
316 if (tokens.size() == 4) {
317 param->
set_range_d(boost::lexical_cast<double>(tokens[1]));
318 param->
set_range_u(boost::lexical_cast<double>(tokens[2]));
325 if (start_nuisance_scan && words[i].size() >= 5 &&
326 boost::iequals(words[i][1],
"rateParam")) {
327 if (verbosity_ > 1) {
328 FNLOG(log()) <<
"Processing rateParam line:\n";
329 for (
auto const& str : words[i]) {
330 log() << str <<
"\t";
335 bool has_range = words[i].size() == 6 && words[i][5][0] ==
'[';
336 std::string param_name = words[i][0];
341 bool is_wsp_rateparam =
false;
343 boost::lexical_cast<double>(words[i][4]);
344 }
catch (boost::bad_lexical_cast &) {
345 is_wsp_rateparam =
true;
347 if ((!is_wsp_rateparam) && (words[i].size() == 5 || has_range)) {
349 param_name, boost::lexical_cast<double>(words[i][4]));
353 std::vector<std::string> tokens;
354 boost::split(tokens, words[i][5], boost::is_any_of(
"[],"));
355 if (tokens.size() == 4) {
356 param->
set_range_d(boost::lexical_cast<double>(tokens[1]));
357 param->
set_range_u(boost::lexical_cast<double>(tokens[2]));
358 FNLOGC(log(), verbosity_ > 1) <<
"Setting parameter range to " << words[i][5];
361 }
else if (words[i].size() == 6 && !has_range) {
362 SetupRateParamFunc(param_name, words[i][4], words[i][5]);
363 }
else if (words[i].size() == 5 && is_wsp_rateparam) {
364 SetupRateParamWspObj(param_name, words[i][4]);
366 for (
unsigned p = 1; p < words[r].size(); ++p) {
367 bool matches_bin =
false;
368 bool matches_proc =
false;
371 std::string
bin = words[r-3][p];
374 process_id = boost::lexical_cast<int>(words[r-2][p]);
376 }
catch(boost::bad_lexical_cast &) {
377 process_id = boost::lexical_cast<int>(words[r-1][p]);
381 if (words[i][2] ==
"*" || fnmatch(words[i][2].c_str(),
bin.c_str(), 0) == 0) {
385 if (words[i][3] ==
"*" || fnmatch(words[i][3].c_str(),
process.c_str(), 0) == 0) {
388 if (!matches_bin || !matches_proc)
continue;
389 auto sys = std::make_shared<Systematic>();
391 sys->set_signal(process_id <= 0);
393 sys->set_name(param_name);
394 sys->set_type(
"rateParam");
400 systs_.push_back(sys);
405 if (start_nuisance_scan && words[i].size() >= 4 &&
406 boost::iequals(words[i][1],
"group")) {
407 if (!groups.count(words[i][0])) {
408 groups[words[i][0]] = std::set<std::string>();
410 for (
unsigned ig = 3; ig < words[i].size(); ++ig) {
411 groups[words[i][0]].insert(words[i][ig]);
416 if (start_nuisance_scan && words[i].size() >= 3 &&
417 boost::iequals(words[i][1],
"autoMCStats")) {
418 std::vector<std::string> for_bins;
419 if (words[i][0] ==
"*") {
422 for_bins.push_back(words[i][0]);
424 for (
auto const&
bin : for_bins) {
425 double thresh = boost::lexical_cast<double>(words[i][2]);
426 if (words[i].size() == 3) {
427 auto_stats_settings_[
bin] = AutoMCStatsSettings(thresh);
428 }
else if (words[i].size() == 4) {
429 auto_stats_settings_[
bin] = AutoMCStatsSettings(thresh, boost::lexical_cast<int>(words[i][3]));
431 auto_stats_settings_[
bin] = AutoMCStatsSettings(thresh, boost::lexical_cast<int>(words[i][3]), boost::lexical_cast<int>(words[i][4]));
436 if (start_nuisance_scan && words[i].size()-1 == words[r].size() && !boost::iequals(words[i][1],
"autoMCStats")) {
437 for (
unsigned p = 2; p < words[i].size(); ++p) {
438 if (words[i][p] ==
"-")
continue;
439 auto sys = std::make_shared<Systematic>();
440 sys->set_bin(words[r-3][p-1]);
442 int process_id = boost::lexical_cast<int>(words[r-2][p-1]);
443 sys->set_signal(process_id <= 0);
444 sys->set_process(words[r-1][p-1]);
445 }
catch(boost::bad_lexical_cast &) {
446 int process_id = boost::lexical_cast<int>(words[r-1][p-1]);
447 sys->set_signal(process_id <= 0);
448 sys->set_process(words[r-2][p-1]);
450 sys->set_name(words[i][0]);
451 std::string type = words[i][1];
452 if (!
contains(std::vector<std::string>{
"shape",
"shape?",
"shapeN2",
"shapeU",
"lnN",
"lnU"},
454 throw std::runtime_error(
455 FNERROR(
"Systematic type " + type +
" not supported"));
457 sys->set_type(words[i][1]);
464 std::size_t slash_pos = words[i][p].find(
"/");
465 if (slash_pos != words[i][p].npos) {
468 boost::lexical_cast<double>(words[i][p].substr(0, slash_pos)));
470 boost::lexical_cast<double>(words[i][p].substr(slash_pos+1)));
471 sys->set_asymm(
true);
473 sys->set_value_u(boost::lexical_cast<double>(words[i][p]));
474 sys->set_asymm(
false);
476 if (sys->type() ==
"shape" || sys->type() ==
"shapeN2" ||
477 sys->type() ==
"shapeU") {
478 sys->set_scale(boost::lexical_cast<double>(words[i][p]));
479 LoadShapes(sys.get(), hist_mapping);
480 }
else if (sys->type() ==
"shape?") {
483 LoadShapes(sys.get(), hist_mapping);
484 }
catch (std::exception & e) {
485 if (verbosity_ > 0) {
486 LOGLINE(log(),
"Systematic with shape? did not resolve to a shape");
490 if (!sys->shape_u() || !sys->shape_d()) {
491 sys->set_type(
"lnN");
493 sys->set_type(
"shape");
494 sys->set_scale(boost::lexical_cast<double>(words[i][p]));
497 if (sys->type() ==
"shape" || sys->type() ==
"shapeN2" ||
498 sys->type() ==
"shapeU")
499 sys->set_asymm(
true);
502 if (sys->type() ==
"lnU" || sys->type() ==
"shapeU") {
503 params_.at(sys->name())->set_err_d(0.);
504 params_.at(sys->name())->set_err_u(0.);
506 systs_.push_back(sys);
511 if (bin_names.size() == 1) {
512 single_obs->set_bin(*(bin_names.begin()));
513 LoadShapes(single_obs.get(), hist_mapping);
514 obs_.push_back(single_obs);
516 throw std::runtime_error(
FNERROR(
517 "Input card specifies a single observation entry without a bin, but "
518 "multiple bins defined elsewhere"));
523 for (
auto const& grp : groups) {
530 std::string
const& root_file) {
531 TFile file(root_file.c_str(),
"RECREATE");
542 void CombineHarvester::FillHistMappings(std::vector<HistMapping> & mappings) {
546 std::map<RooAbsData const*, RooWorkspace*> data_ws_map;
547 std::map<RooAbsReal const*, RooWorkspace*> pdf_ws_map;
548 for (
auto const& iter : wspaces_) {
549 auto dat = iter.second->allData();
551 data_ws_map[d] = iter.second.get();
553 RooArgSet vars = iter.second->allPdfs();
554 auto v = vars.createIterator();
556 RooAbsReal *y = dynamic_cast<RooAbsReal*>(**v);
557 if (y) pdf_ws_map[iter.second->pdf(y->GetName())] = iter.second.get();
559 RooArgSet fvars = iter.second->allFunctions();
560 auto fv = fvars.createIterator();
562 RooAbsReal *y = dynamic_cast<RooAbsReal*>(**fv);
563 if (y) pdf_ws_map[iter.second->function(y->GetName())] = iter.second.get();
564 }
while (fv->Next());
570 std::set<std::string> hist_bins;
572 for (
auto bin : bins) {
573 unsigned shape_count = std::count_if(procs_.begin(), procs_.end(),
574 [&](std::shared_ptr<ch::Process> p) {
575 return (p->bin() ==
bin && p->shape() && (!p->signal()));
577 shape_count += std::count_if(obs_.begin(), obs_.end(),
578 [&](std::shared_ptr<ch::Observation> p) {
579 return (p->bin() ==
bin && p->shape());
581 unsigned counting = std::count_if(
582 procs_.begin(), procs_.end(), [&](std::shared_ptr<ch::Process> p) {
583 return (p->bin() ==
bin && p->shape() ==
nullptr &&
584 p->pdf() ==
nullptr && p->
data() ==
nullptr);
586 counting += std::count_if(
587 obs_.begin(), obs_.end(), [&](std::shared_ptr<ch::Observation> p) {
588 return (p->bin() ==
bin && p->shape() ==
nullptr &&
589 p->
data() ==
nullptr);
592 if (shape_count > 0) {
593 mappings.emplace_back(
"*",
bin,
bin +
"/$PROCESS",
594 bin +
"/$PROCESS_$SYSTEMATIC");
595 hist_bins.insert(
bin);
596 }
else if (counting > 0) {
597 mappings.emplace_back(
"*",
bin,
"",
"");
598 mappings.back().is_fake =
true;
605 for (
auto sig_proc : sig_proc_set) {
608 auto masses =
Set2Vec(ch_signals.cp()
611 if (masses.size() != 1) {
612 throw std::runtime_error(
FNERROR(
"Process " + sig_proc +
" in bin " +
614 " has multiple entries with multiple "
615 "mass values, this is not supported"));
619 mappings.emplace_back(sig_proc,
bin,
bin +
"/" + sig_proc +
"$MASS",
620 bin +
"/" + sig_proc +
"$MASS_$SYSTEMATIC");
626 for (
auto bin : bins) {
628 for (
auto obs : ch_bin.obs_) {
629 if (!obs->data())
continue;
630 std::string obj_name = std::string(data_ws_map[obs->data()]->GetName()) +
631 ":" + std::string(obs->data()->GetName());
632 mappings.emplace_back(
"data_obs", obs->bin(), obj_name,
"");
635 bool prototype_ok =
false;
636 HistMapping prototype;
637 std::vector<HistMapping> full_list;
638 auto pmap = ch_bin.GenerateProcSystMap();
639 for (
unsigned i = 0; i < ch_bin.procs_.size(); ++i) {
641 if (!proc->
data() && !proc->
pdf())
continue;
642 std::string obj_name;
643 std::string obj_sys_name;
645 obj_name = std::string(proc->
data()->GetName());
646 boost::replace_all(obj_name, proc->
process(),
"$PROCESS");
647 obj_name = std::string(data_ws_map[proc->
data()]->GetName()) +
":" +
651 obj_name = std::string(proc->
pdf()->GetName());
652 boost::replace_all(obj_name, proc->
process(),
"$PROCESS");
653 obj_name = std::string(pdf_ws_map[proc->
pdf()]->GetName()) +
656 for (
unsigned j = 0; j < pmap[i].size(); ++j) {
659 obj_sys_name = std::string(sys->
data_u()->GetName());
660 boost::replace_all(obj_sys_name, sys->
name() +
"Up",
"$SYSTEMATIC");
661 boost::replace_all(obj_sys_name, sys->
process(),
"$PROCESS");
662 obj_sys_name = std::string(data_ws_map[sys->
data_u()]->GetName()) +
667 obj_sys_name = std::string(sys->
pdf_u()->GetName());
668 boost::replace_all(obj_sys_name, sys->
name() +
"Up",
"$SYSTEMATIC");
669 boost::replace_all(obj_sys_name, sys->
process(),
"$PROCESS");
670 obj_sys_name = std::string(pdf_ws_map[sys->
pdf_u()]->GetName()) +
678 if (prototype.pattern.size() && prototype.pattern != obj_name) {
679 prototype_ok =
false;
682 if (prototype.syst_pattern.size() && obj_sys_name.size() &&
683 prototype.syst_pattern != obj_sys_name) {
684 prototype_ok =
false;
687 if (!prototype.pattern.size()) {
690 prototype.category =
bin;
691 prototype.pattern = obj_name;
693 if (!prototype.syst_pattern.size()) {
694 prototype.syst_pattern = obj_sys_name;
697 full_list.emplace_back(proc->
process(), proc->
bin(), obj_name,
704 if (!prototype_ok || hist_bins.count(
bin)) {
705 for (
auto m : full_list) {
706 mappings.push_back(m);
709 mappings.push_back(prototype);
720 bool is_counting =
true;
722 if (obs->
shape() !=
nullptr || obs->
data() !=
nullptr) {
728 if (proc->
shape() !=
nullptr || proc->
data() !=
nullptr ||
729 proc->
pdf() !=
nullptr) {
736 if (!root_file.IsOpen() && !is_counting) {
737 throw std::runtime_error(
FNERROR(
738 std::string(
"Output ROOT file is not open: ") + root_file.GetName()));
741 std::unique_ptr<std::ostream> txt_file_ptr =
nullptr;
744 std::string zip_ext =
".gz";
745 bool has_zip_ext = (name.length() >= zip_ext.length() && name.compare(name.length() - zip_ext.length(), zip_ext.length(), zip_ext) == 0);
748 txt_file_ptr = std::make_unique<zstr::ofstream>(name);
750 txt_file_ptr = std::make_unique<std::ofstream>(name);
752 if (txt_file_ptr->fail()) {
753 throw std::runtime_error(
FNERROR(
"Unable to create file: " + name));
756 std::ostream & txt_file = *txt_file_ptr;
761 std::string dashes(80,
'-');
765 std::set<std::string> sys_set;
766 std::set<std::string> rateparam_set;
768 if (sys->
type() ==
"rateParam") {
769 rateparam_set.insert(sys->
name());
771 sys_set.insert(sys->
name());
774 txt_file <<
"imax " <<
bin_set.size()
775 <<
" number of bins\n";
776 txt_file <<
"jmax " << proc_set.size() - 1
777 <<
" number of processes minus 1\n";
778 txt_file <<
"kmax " <<
"*" <<
" number of nuisance parameters\n";
779 txt_file << dashes <<
"\n";
782 std::vector<HistMapping> mappings;
783 FillHistMappings(mappings);
787 auto proc_sys_map = this->GenerateProcSystMap();
789 std::set<std::string> all_dependents_pars;
790 std::set<std::string> multipdf_cats;
791 for (
auto proc : procs_) {
792 if (!proc->
pdf())
continue;
797 RooAbsData
const* data_obj = FindMatchingData(proc.get());
799 RooArgSet norm_par_list;
807 RooRealVar mx(
"CMS_th1x" ,
"CMS_th1x", 0, 1);
808 RooArgSet tmp_set(mx);
814 RooFIter par_list_it = par_list.fwdIterator();
815 RooAbsArg *par_list_var =
nullptr;
816 while ((par_list_var = par_list_it.next())) {
817 if (dynamic_cast<RooRealVar*>(par_list_var)) {
818 all_dependents_pars.insert(par_list_var->GetName());
825 if (dynamic_cast<RooCategory*>(par_list_var) &&
826 proc->
pdf()->InheritsFrom(
"RooMultiPdf")) {
827 multipdf_cats.insert(par_list_var->GetName());
831 RooFIter nm_list_it = norm_par_list.fwdIterator();
832 RooAbsArg *nm_list_var =
nullptr;
833 while ((nm_list_var = nm_list_it.next())) {
834 if (dynamic_cast<RooRealVar*>(nm_list_var)) {
835 all_dependents_pars.insert(nm_list_var->GetName());
842 std::string file_name = root_file.GetName();
846 boost::filesystem::path root_file_path =
847 boost::filesystem::absolute(file_name);
849 boost::filesystem::path txt_file_path =
850 boost::filesystem::absolute(name).parent_path();
852 file_name =
make_relative(txt_file_path, root_file_path).string();
856 std::set<std::string> used_wsps;
858 for (
auto const& mapping : mappings) {
859 if (!mapping.is_fake) {
860 txt_file << format(
"shapes %s %s %s %s %s\n")
865 % mapping.syst_pattern;
866 if (mapping.IsPdf() || mapping.IsData()) {
867 used_wsps.insert(mapping.WorkspaceName());
868 if (mapping.syst_pattern !=
"") {
869 used_wsps.insert(mapping.SystWorkspaceName());
873 txt_file << format(
"shapes %s %s %s\n")
879 txt_file << dashes <<
"\n";
882 for (
auto ws_it : wspaces_) {
883 if (ws_it.first ==
"_rateParams")
continue;
885 if (!used_wsps.count(ws_it.second->GetName()))
continue;
891 if (obs_.size() > 0) {
893 for (
auto const& obs : obs_) {
894 txt_file << format(
"%-15s ") % obs->
bin();
896 bool add_dir = TH1::AddDirectoryStatus();
897 TH1::AddDirectory(
false);
898 std::unique_ptr<TH1> h((TH1*)(obs->
shape()->Clone()));
899 h->Scale(obs->
rate());
900 WriteHistToFile(h.get(), &root_file, mappings, obs->
bin(),
"data_obs",
902 TH1::AddDirectory(add_dir);
906 txt_file <<
"observation ";
911 std::string obs_fmt_int =
"%-15.1f ";
912 std::string obs_fmt_flt =
"%-15.4f ";
913 for (
auto const& obs : obs_) {
915 std::fabs(obs->
rate() - std::round(obs->
rate())) > 1E-4;
916 txt_file << format(
is_float ? obs_fmt_flt : obs_fmt_int)
920 txt_file << dashes <<
"\n";
923 unsigned sys_str_len = 14;
924 for (
auto const& sys : sys_set) {
925 if (sys.length() > sys_str_len) sys_str_len = sys.length();
927 for (
auto const& sys : all_dependents_pars) {
928 if (sys.length() > sys_str_len) sys_str_len = sys.length();
930 std::string sys_str_short = boost::lexical_cast<std::string>(sys_str_len);
931 std::string sys_str_long = boost::lexical_cast<std::string>(sys_str_len+9);
933 auto getProcLen = [](
auto const& proc) -> std::string {
934 std::size_t proc_len = 15;
935 proc_len = std::max(proc_len, proc->
process().size());
936 std::string proc_len_str = boost::lexical_cast<std::string>(proc_len);
940 txt_file << format(
"%-"+sys_str_long+
"s") %
"bin";
941 for (
auto const& proc : procs_) {
943 bool add_dir = TH1::AddDirectoryStatus();
944 TH1::AddDirectory(
false);
946 WriteHistToFile(h.get(), &root_file, mappings, proc->
bin(),
948 TH1::AddDirectory(add_dir);
950 txt_file << format(
"%-"+getProcLen(proc)+
"s ") % proc->
bin();
954 txt_file << format(
"%-"+sys_str_long+
"s") %
"process";
956 for (
auto const& proc : procs_) {
957 txt_file << format(
"%-"+getProcLen(proc)+
"s ") % proc->
process();
961 txt_file << format(
"%-"+sys_str_long+
"s") %
"process";
964 std::map<std::string, int> p_ids;
967 for (
unsigned p = 0; p < procs_.size(); ++p) {
968 if (!procs_[p]->signal()) {
969 if (p_ids.count(procs_[p]->process()) == 0) {
970 p_ids[procs_[p]->process()] = bkg_id;
975 if (p_ids[procs_[p]->
process()] <= 0)
throw std::runtime_error(
FNERROR(
"Ambiguous definition of process (" + procs_[p]->
process() +
") as both signal and background"));
978 unsigned q = procs_.size() - (p + 1);
979 if (procs_[q]->signal()) {
980 if (p_ids.count(procs_[q]->process()) == 0) {
981 p_ids[procs_[q]->process()] = sig_id;
986 if (p_ids[procs_[q]->
process()] > 0)
throw std::runtime_error(
FNERROR(
"Ambiguous definition of process (" + procs_[q]->
process() +
") as both signal and background"));
990 for (
auto const& proc : procs_) {
991 txt_file << format(
"%-"+getProcLen(proc)+
"s ") % p_ids[proc->
process()];
996 txt_file << format(
"%-"+sys_str_long+
"s") %
"rate";
997 for (
auto const& proc : procs_) {
998 txt_file << format(
"%-"+getProcLen(proc)+
".6g ") % proc->
no_norm_rate();
1001 txt_file << dashes <<
"\n";
1005 for (
auto par : params_) {
1007 if (p->
err_d() != 0.0 && p->
err_u() != 0.0 &&
1008 all_dependents_pars.count(p->
name()) && sys_set.count(p->
name())) {
1009 txt_file << format((format(
"%%-%is param %%g %%g") % sys_str_len).str()) %
1011 if (p->
range_d() != std::numeric_limits<double>::lowest() &&
1012 p->
range_u() != std::numeric_limits<double>::max()) {
1013 txt_file << format(
" [%.4g,%.4g]") % p->
range_d() % p->
range_u();
1019 for (
auto const& sys : sys_set) {
1020 std::vector<std::string> line(procs_.size() + 2);
1022 bool seen_lnN =
false;
1023 bool seen_lnU =
false;
1024 bool seen_shape =
false;
1025 bool seen_shapeN2 =
false;
1026 bool seen_shapeU =
false;
1027 for (
unsigned p = 0; p < procs_.size(); ++p) {
1029 for (
unsigned n = 0; n < proc_sys_map[p].size(); ++n) {
1032 if (ptr->
name() != sys)
continue;
1033 std::string tp = ptr->
type();
1034 if (tp ==
"lnN" || tp ==
"lnU") {
1035 if (tp ==
"lnN") seen_lnN =
true;
1036 if (tp ==
"lnU") seen_lnU =
true;
1040 : (format(
"%g") % ptr->
value_u()).str();
1043 if (tp ==
"shape" || tp ==
"shapeN2" || tp ==
"shapeU") {
1044 if (tp ==
"shape") seen_shape =
true;
1045 if (tp ==
"shapeN2") seen_shapeN2 =
true;
1046 if (tp ==
"shapeU") seen_shapeU =
true;
1047 line[p + 2] = (format(
"%g") % ptr->
scale()).str();
1049 bool add_dir = TH1::AddDirectoryStatus();
1050 TH1::AddDirectory(
false);
1052 h_d->Scale(procs_[p]->rate() * ptr->
value_d());
1053 WriteHistToFile(h_d.get(), &root_file, mappings, ptr->
bin(),
1056 h_u->Scale(procs_[p]->rate() * ptr->
value_u());
1057 WriteHistToFile(h_u.get(), &root_file, mappings, ptr->
bin(),
1059 TH1::AddDirectory(add_dir);
1063 if (!flags_.at(
"allow-missing-shapes")) {
1064 std::stringstream err;
1065 err <<
"Trying to write shape uncertainty with missing "
1068 throw std::runtime_error(
FNERROR(err.str()));
1075 line[1] =
"shapeN2";
1076 }
else if (seen_shapeU) {
1078 }
else if (seen_lnU) {
1080 }
else if (seen_lnN && !seen_shape) {
1082 }
else if (!seen_lnN && seen_shape) {
1084 }
else if (seen_lnN && seen_shape) {
1087 throw std::runtime_error(
FNERROR(
"Systematic type could not be deduced"));
1089 txt_file << format(
"%-" + sys_str_short +
"s %-7s ") % line[0] % line[1];
1090 for (
unsigned p = 0; p < procs_.size(); ++p) {
1091 txt_file << format(
"%-"+getProcLen(procs_[p])+
"s ") % line[p + 2];
1100 std::vector<std::vector<std::string> > floating_params;
1101 for (
auto const& sys : rateparam_set) {
1102 FNLOGC(log(), verbosity_ > 1) <<
"Doing rateParam: " << sys <<
"\n";
1105 FNLOGC(log(), verbosity_ > 1) <<
"Procs for this rateParam: \n";
1106 for (
auto const& proc : procs_rp) {
1107 FNLOGC(log(), verbosity_ > 1) <<
" - " << proc <<
"\n";
1112 if (bins_rp.size() > 0 && bins_rp.size() == bins_all.size()) {
1113 floating_params.push_back({sys,
"*", proc});
1115 for (
auto const&
bin : bins_rp) {
1116 floating_params.push_back({sys,
bin, proc});
1119 FNLOGC(log(), verbosity_ > 1)
1120 << bins_rp.size() <<
"/" << bins_all.size()
1121 <<
" bins with this process have a rateParam\n";
1124 std::set<std::string> frozen_params;
1125 for (
auto const& rp : floating_params) {
1126 if (params_.count(rp[0])) {
1128 txt_file << format(
"%-" + sys_str_short +
"s %-10s %-10s %-10s %g") %
1129 rp[0] %
"rateParam" % rp[1] % rp[2] % par->
val();
1132 if (par->
range_d() != std::numeric_limits<double>::lowest() &&
1133 par->
range_u() != std::numeric_limits<double>::max()) {
1134 txt_file << format(
" [%.4g,%.4g]") % par->
range_d() % par->
range_u();
1138 frozen_params.insert(rp[0]);
1142 for (
auto const& par : frozen_params) {
1143 txt_file <<
"nuisance edit freeze " << par <<
"\n";
1145 std::set<std::string> all_fn_param_args;
1146 for (
auto const& rp : floating_params) {
1147 if (!params_.count(rp[0])) {
1150 RooWorkspace *rp_ws = wspaces_.at(
"_rateParams").get();
1151 RooAbsArg *arg = rp_ws->arg(rp[0].c_str());
1152 if (arg->getStringAttribute(
"wspSource")) {
1153 txt_file << format(
"%-" + sys_str_short +
1154 "s %-10s %-10s %-10s %-20s\n") %
1155 rp[0] %
"rateParam" % rp[1] % rp[2] % std::string(arg->getStringAttribute(
"wspSource"));
1158 RooFormulaVar* form =
1159 dynamic_cast<RooFormulaVar*>(arg);
1162 std::stringstream ss;
1163 form->printMetaArgs(ss);
1164 std::string form_str = ss.str();
1167 std::size_t first = form_str.find_first_of(
'"');
1168 std::size_t last = form_str.find_last_of(
'"');
1169 form_str = form_str.substr(first + 1, last - first - 1);
1172 std::unique_ptr<RooArgSet>(form->getParameters(RooArgList()))->getSize();
1173 std::string args =
"";
1174 for (
unsigned i = 0; i < npars; ++i) {
1175 all_fn_param_args.insert(std::string(form->getParameter(i)->GetName()));
1176 args += std::string(form->getParameter(i)->GetName());
1177 if (i < (npars-1)) {
1181 txt_file << format(
"%-" + sys_str_short +
1182 "s %-10s %-10s %-10s %-20s %s\n") %
1183 rp[0] %
"rateParam" % rp[1] % rp[2] % form_str % args;
1187 if (wspaces_.count(
"_rateParams")) {
1188 RooWorkspace *rp_ws = wspaces_.at(
"_rateParams").get();
1189 RooArgSet vars = rp_ws->allVars();
1190 auto v = vars.createIterator();
1192 RooRealVar *y = dynamic_cast<RooRealVar*>(**v);
1193 if (y && y->getAttribute(
"extArg") && all_fn_param_args.count(std::string(y->GetName()))) {
1194 Parameter const* p = params_.at(y->GetName()).get();
1195 txt_file << format(
"%-" + sys_str_short +
"s %-10s %g") %
1196 y->GetName() %
"extArg" % p->
val();
1197 if (p->
range_d() != std::numeric_limits<double>::lowest() &&
1198 p->
range_u() != std::numeric_limits<double>::max()) {
1199 txt_file << format(
" [%.4g,%.4g]") % p->
range_d() % p->
range_u();
1204 }
while (v->Next());
1205 RooArgSet funcs = rp_ws->allFunctions();
1206 v = funcs.createIterator();
1208 RooAbsReal *y = dynamic_cast<RooAbsReal*>(**v);
1209 if (y && y->getAttribute(
"extArg") && y->getStringAttribute(
"wspSource") && all_fn_param_args.count(std::string(y->GetName()))) {
1210 txt_file << format(
"%-" + sys_str_short +
1211 "s %-10s %-20s\n") %
1212 y->GetName() %
"extArg" % std::string(y->getStringAttribute(
"wspSource"));
1216 }
while (v->Next());
1219 std::set<std::string> ws_vars;
1220 for (
auto iter : wspaces_) {
1221 RooArgSet vars = iter.second->allVars();
1222 auto v = vars.createIterator();
1224 RooRealVar *y = dynamic_cast<RooRealVar*>(**v);
1226 ws_vars.insert(y->GetName());
1228 }
while (v->Next());
1238 for (
auto par : params_) {
1240 if (p->
err_d() != 0.0 && p->
err_u() != 0.0 &&
1241 all_dependents_pars.count(p->
name()) && !sys_set.count(p->
name())) {
1242 txt_file << format((format(
"%%-%is param %%g %%g") % sys_str_len).str()) %
1244 if (p->
range_d() != std::numeric_limits<double>::lowest() &&
1245 p->
range_u() != std::numeric_limits<double>::max()) {
1246 txt_file << format(
" [%.4g,%.4g]") % p->
range_d() % p->
range_u();
1252 for (
auto par : multipdf_cats) {
1253 txt_file << format((format(
"%%-%is discrete\n") % sys_str_len).str()) % par;
1256 for (
auto stat_settings : auto_stats_settings_) {
1257 if (!
bin_set.count(stat_settings.first))
continue;
1258 txt_file << format(
"%s autoMCStats %g %i %i\n") % stat_settings.first %
1259 stat_settings.second.event_threshold %
1260 stat_settings.second.include_signal %
1261 stat_settings.second.hist_mode;
1268 std::map<std::string, std::set<std::string>> groups;
1269 for (
auto const& par : params_) {
1271 if (!all_dependents_pars.count(p->
name()) && !sys_set.count(p->
name())) {
1274 if (p->
err_d() == 0.0 && p->
err_u() == 0.0)
continue;
1275 for (
auto const& str : p->
groups()) {
1276 if (!groups.count(str)) {
1277 groups[str] = std::set<std::string>();
1279 groups[str].insert(p->
name());
1282 for (
auto const& gr : groups) {
1283 txt_file << format(
"%s group =") % gr.first;
1284 for (
auto const& np : gr.second) {
1285 txt_file <<
" " << np;
1290 for (
auto const& postl : post_lines_) {
1291 txt_file << postl <<
"\n";
1295 void CombineHarvester::WriteHistToFile(
1298 std::vector<HistMapping>
const& mappings,
1299 std::string
const& bin,
1301 std::string
const& mass,
1302 std::string
const& nuisance,
1304 StrPairVec attempts = this->GenerateShapeMapAttempts(
process,
bin);
1305 for (
unsigned a = 0; a < attempts.size(); ++a) {
1306 for (
unsigned m = 0; m < mappings.size(); ++m) {
1307 if ((attempts[a].first == mappings[m].
process) &&
1308 (attempts[a].second == mappings[m].category)) {
1309 std::string p = (type == 0 ?
1310 mappings[m].pattern : mappings[m].syst_pattern);
1311 boost::replace_all(p,
"$CHANNEL",
bin);
1312 boost::replace_all(p,
"$PROCESS",
process);
1313 boost::replace_all(p,
"$MASS",
mass);
1314 if (type == 1) boost::replace_all(p,
"$SYSTEMATIC", nuisance+
"Down");
1315 if (type == 2) boost::replace_all(p,
"$SYSTEMATIC", nuisance+
"Up");