12 #include "boost/lexical_cast.hpp"
13 #include "boost/algorithm/string.hpp"
14 #include "boost/format.hpp"
15 #include "boost/regex.hpp"
16 #include "boost/filesystem.hpp"
17 #include "TDirectory.h"
19 #include "RooRealVar.h"
20 #include "RooFormulaVar.h"
21 #include "RooCategory.h"
22 #include "RooConstVar.h"
32 #include "CombineHarvester/CombineTools/interface/zstr.hpp"
36 RooWorkspace* CombineHarvester::fixRooConstVar(RooWorkspace *win,
bool useRooRealVar,
bool clean)
38 if ( !
GetFlag(
"fix-rooconstvar"))
return win;
39 std::cout<<
"[INFO]"<<
"Running fixRooConstVar on "<<win->GetName()<<std::endl;
42 std::string name = win->GetName();
43 RooWorkspace *wout =
new RooWorkspace(name.c_str(), (std::string( win->GetTitle() ) +
"_RCVfix").c_str() ) ;
44 for (
auto cobj : win->components()) {
45 if (cobj->IsA() == RooConstVar::Class() )
47 RooConstVar* cvar =
dynamic_cast<RooConstVar*
> (cobj);
48 std::cout<<
"[INFO] Found RooConstVar '"<<cvar->GetName()<<
"' with value "<< cvar->getVal()<<
" in workspace "<<name<<std::endl;
51 RooRealVar rrv = RooRealVar(cvar->GetName(),cvar->GetTitle(),cvar->getVal());
54 wout ->
import ( rrv, RooFit::RecycleConflictNodes());
57 RooConstVar rcv = RooConstVar(cvar->GetName(),cvar->GetTitle(),cvar->getVal());
58 wout ->
import ( rcv, RooFit::RecycleConflictNodes());
61 std::cout<<
" -> imported in workspace"<<std::endl;
62 wout -> obj( cvar->GetName() ) -> Print(
"T");
68 wout ->
import(win->components(), RooFit::RecycleConflictNodes());
69 for (
auto d : win->allData() ) wout ->
import(*d);
70 if (clean)
delete win;
77 std::string parse_rules) {
78 boost::replace_all(parse_rules,
"$ANALYSIS",
"(?<ANALYSIS>[\\w\\.]+)");
79 boost::replace_all(parse_rules,
"$ERA",
"(?<ERA>[\\w\\.]+)");
80 boost::replace_all(parse_rules,
"$CHANNEL",
"(?<CHANNEL>[\\w\\.]+)");
81 boost::replace_all(parse_rules,
"$BINID",
"(?<BINID>[\\w\\.]+)");
82 boost::replace_all(parse_rules,
"$MASS",
"(?<MASS>[\\w\\.]+)");
83 boost::regex rgx(parse_rules);
84 boost::smatch matches;
85 boost::regex_search(filename, matches, rgx);
87 matches.str(
"ANALYSIS"),
89 matches.str(
"CHANNEL"),
90 matches.str(
"BINID").length() ?
91 boost::lexical_cast<int>(matches.str(
"BINID")) : 0,
97 std::string
const& analysis,
98 std::string
const&
era,
101 std::string
const& mass) {
102 TH1::AddDirectory(kFALSE);
109 std::vector<std::vector<std::string>> words;
110 for (
unsigned i = 0; i < lines.size(); ++i) {
111 boost::trim(lines[i]);
112 if (lines[i].size() == 0)
continue;
113 if (lines[i].at(0) ==
'#' || lines[i].at(0) ==
'-')
continue;
114 words.push_back(std::vector<std::string>());
115 boost::split(words.back(), lines[i], boost::is_any_of(
"\t "),
116 boost::token_compress_on);
119 std::vector<HistMapping> hist_mapping;
121 std::map<std::string, std::shared_ptr<TFile>> file_store;
122 std::map<std::string, std::shared_ptr<RooWorkspace>> ws_store;
124 bool start_nuisance_scan =
false;
130 std::shared_ptr<ch::Observation> single_obs =
nullptr;
131 std::set<std::string> bin_names;
134 std::map<std::string, std::set<std::string>> groups;
138 for (
unsigned i = 0; i < words.size(); ++i) {
140 if (words[i].size() <= 1)
continue;
144 if (boost::iequals(words[i][0],
"shapes") && words[i].size() >= 5) {
152 std::size_t slash = filename.find_last_of(
'/');
153 if (slash != filename.npos) {
154 dc_path = filename.substr(0, slash) +
"/" + words[i][3];
156 dc_path = words[i][3];
158 if (!file_store.count(dc_path))
159 file_store[dc_path] = std::make_shared<TFile>(dc_path.c_str());
160 mapping.
file = file_store.at(dc_path);
162 if (words[i].size() > 5) mapping.
syst_pattern = words[i][5];
164 if (mapping.
IsPdf()) {
165 std::string store_key =
167 if (!ws_store.count(store_key)) {
169 std::shared_ptr<RooWorkspace> ptr(
171 dynamic_cast<RooWorkspace*
>(gDirectory->Get(mapping.
WorkspaceName().c_str()))
175 throw std::runtime_error(
FNERROR(
"Workspace not found in file"));
177 ws_store[store_key] = ptr;
179 mapping.
ws = SetupWorkspace(*(ws_store[store_key]),
true);
182 std::string store_key =
184 if (!ws_store.count(store_key)) {
186 std::shared_ptr<RooWorkspace> ptr(
188 dynamic_cast<RooWorkspace*
>(gDirectory->Get(mapping.
SystWorkspaceName().c_str()))
192 throw std::runtime_error(
FNERROR(
"Workspace not found in file"));
194 ws_store[store_key] = ptr;
196 mapping.
sys_ws = SetupWorkspace(*(ws_store[store_key]),
true);
202 if (boost::iequals(words[i][0],
"shapes") && words[i].size() == 4 &&
203 boost::iequals(words[i][3],
"FAKE")) {
212 for (
unsigned i = 0; i < words.size(); ++i) {
213 if (words[i].size() >= 3 &&
214 boost::iequals(words[i][1],
"extArg")) {
215 if (verbosity_ > 1) {
216 FNLOG(log()) <<
"Processing extArg line:\n";
217 for (
auto const& str : words[i]) {
218 log() << str <<
"\t";
223 bool has_range = words[i].size() == 4 && words[i][3][0] ==
'[';
224 std::string param_name = words[i][0];
225 bool is_wsp_rateparam =
false;
227 boost::lexical_cast<double>(words[i][2]);
228 }
catch (boost::bad_lexical_cast &) {
229 is_wsp_rateparam =
true;
231 if ((!is_wsp_rateparam) && (words[i].size() == 3 || has_range)) {
233 param_name, boost::lexical_cast<double>(words[i][2]),
true);
237 std::vector<std::string> tokens;
238 boost::split(tokens, words[i][3], boost::is_any_of(
"[],"));
239 if (tokens.size() == 4) {
240 param->
set_range_d(boost::lexical_cast<double>(tokens[1]));
241 param->
set_range_u(boost::lexical_cast<double>(tokens[2]));
242 FNLOGC(log(), verbosity_ > 1) <<
"Setting parameter range to " << words[i][2];
245 }
else if (words[i].size() == 3 && is_wsp_rateparam) {
246 if (!SetupRateParamWspObjFromWsStore(param_name, words[i][2], ws_store)) {
247 SetupRateParamWspObj(param_name, words[i][2],
true);
254 for (
unsigned i = 0; i < words.size(); ++i) {
256 if (words[i].size() <= 1)
continue;
263 if ( boost::iequals(words[i][0],
"observation") &&
264 boost::iequals(words[i-1][0],
"bin") &&
265 words[i].size() == words[i-1].size()) {
266 for (
unsigned p = 1; p < words[i].size(); ++p) {
267 auto obs = std::make_shared<Observation>();
268 obs->set_bin(words[i-1][p]);
269 obs->set_rate(boost::lexical_cast<double>(words[i][p]));
276 LoadShapes(obs.get(), hist_mapping);
283 if (boost::iequals(words[i][0],
"observation") &&
284 !boost::iequals(words[i-1][0],
"bin") &&
285 words[i].size() == 2 &&
286 single_obs.get() ==
nullptr) {
287 for (
unsigned p = 1; p < words[i].size(); ++p) {
288 single_obs = std::make_shared<Observation>();
289 single_obs->set_bin(
"");
290 single_obs->set_rate(boost::lexical_cast<double>(words[i][p]));
292 single_obs->set_era(
era);
293 single_obs->set_channel(
channel);
294 single_obs->set_bin_id(
bin_id);
295 single_obs->set_mass(
mass);
306 if ( boost::iequals(words[i][0],
"rate") &&
307 boost::iequals(words[i-1][0],
"process") &&
308 boost::iequals(words[i-2][0],
"process") &&
309 boost::iequals(words[i-3][0],
"bin") &&
310 words[i].size() == words[i-1].size() &&
311 words[i].size() == words[i-2].size() &&
312 words[i].size() == words[i-3].size()) {
313 for (
unsigned p = 1; p < words[i].size(); ++p) {
314 auto proc = std::make_shared<Process>();
315 proc->set_bin(words[i-3][p]);
316 bin_names.insert(words[i-3][p]);
318 int process_id = boost::lexical_cast<int>(words[i-2][p]);
319 proc->set_signal(process_id <= 0);
320 proc->set_process(words[i-1][p]);
321 }
catch(boost::bad_lexical_cast &) {
322 int process_id = boost::lexical_cast<int>(words[i-1][p]);
323 proc->set_signal(process_id <= 0);
324 proc->set_process(words[i-2][p]);
326 proc->set_rate(boost::lexical_cast<double>(words[i][p]));
331 proc->set_mass(
mass);
333 LoadShapes(proc.get(), hist_mapping);
335 procs_.push_back(proc);
338 start_nuisance_scan =
true;
341 if (start_nuisance_scan && words[i].size() >= 4) {
342 if (boost::iequals(words[i][1],
"param")) {
343 std::string param_name = words[i][0];
344 Parameter * param = SetupRateParamVar(param_name, boost::lexical_cast<double>(words[i][2]));
345 param->
set_val(boost::lexical_cast<double>(words[i][2]));
346 std::size_t slash_pos = words[i][3].find(
"/");
347 if (slash_pos != words[i][3].npos) {
349 boost::lexical_cast<double>(words[i][3].substr(0, slash_pos)));
351 boost::lexical_cast<double>(words[i][3].substr(slash_pos+1)));
353 param->
set_err_u(+1.0 * boost::lexical_cast<double>(words[i][3]));
354 param->
set_err_d(-1.0 * boost::lexical_cast<double>(words[i][3]));
356 if (words[i].size() >= 5) {
358 std::vector<std::string> tokens;
359 boost::split(tokens, words[i][4], boost::is_any_of(
"[],"));
360 if (tokens.size() == 4) {
361 param->
set_range_d(boost::lexical_cast<double>(tokens[1]));
362 param->
set_range_u(boost::lexical_cast<double>(tokens[2]));
365 auto sys = std::make_shared<Systematic>();
366 sys->set_name(param_name);
367 sys->set_type(
"param");
374 std::string syst_str_ext = (boost::format(
"%g %g") % param->
val() % ((param->
err_u() - param->
err_d()) / 2.0)).str();
375 if (param->
range_d() != std::numeric_limits<double>::lowest() &&
376 param->
range_u() != std::numeric_limits<double>::max()) {
377 syst_str_ext += (boost::format(
" [%.4g,%.4g]") % param->
range_d() % param->
range_u()).str();
379 sys->set_param_str_ext(syst_str_ext);
380 systs_.push_back(sys);
385 if (start_nuisance_scan && words[i].size() >= 5 &&
386 boost::iequals(words[i][1],
"rateParam")) {
387 if (verbosity_ > 1) {
388 FNLOG(log()) <<
"Processing rateParam line:\n";
389 for (
auto const& str : words[i]) {
390 log() << str <<
"\t";
395 bool has_range = words[i].size() == 6 && words[i][5][0] ==
'[';
396 std::string param_name = words[i][0];
401 bool is_wsp_rateparam =
false;
403 boost::lexical_cast<double>(words[i][4]);
404 }
catch (boost::bad_lexical_cast &) {
405 is_wsp_rateparam =
true;
407 if ((!is_wsp_rateparam) && (words[i].size() == 5 || has_range)) {
409 param_name, boost::lexical_cast<double>(words[i][4]));
413 std::vector<std::string> tokens;
414 boost::split(tokens, words[i][5], boost::is_any_of(
"[],"));
415 if (tokens.size() == 4) {
416 param->
set_range_d(boost::lexical_cast<double>(tokens[1]));
417 param->
set_range_u(boost::lexical_cast<double>(tokens[2]));
418 FNLOGC(log(), verbosity_ > 1) <<
"Setting parameter range to " << words[i][5];
421 }
else if (words[i].size() == 6 && !has_range) {
422 SetupRateParamFunc(param_name, words[i][4], words[i][5]);
423 }
else if (words[i].size() == 5 && is_wsp_rateparam) {
424 SetupRateParamWspObj(param_name, words[i][4]);
426 for (
unsigned p = 1; p < words[r].size(); ++p) {
427 bool matches_bin =
false;
428 bool matches_proc =
false;
431 std::string
bin = words[r-3][p];
434 process_id = boost::lexical_cast<int>(words[r-2][p]);
436 }
catch(boost::bad_lexical_cast &) {
437 process_id = boost::lexical_cast<int>(words[r-1][p]);
441 if (words[i][2] ==
"*" || fnmatch(words[i][2].c_str(),
bin.c_str(), 0) == 0) {
445 if (words[i][3] ==
"*" || fnmatch(words[i][3].c_str(),
process.c_str(), 0) == 0) {
448 if (!matches_bin || !matches_proc)
continue;
449 auto sys = std::make_shared<Systematic>();
451 sys->set_signal(process_id <= 0);
453 sys->set_name(param_name);
454 sys->set_type(
"rateParam");
460 systs_.push_back(sys);
465 if (start_nuisance_scan && words[i].size() >= 4 &&
466 boost::iequals(words[i][1],
"group")) {
467 if (!groups.count(words[i][0])) {
468 groups[words[i][0]] = std::set<std::string>();
470 for (
unsigned ig = 3; ig < words[i].size(); ++ig) {
471 groups[words[i][0]].insert(words[i][ig]);
476 if (start_nuisance_scan && words[i].size() >= 3 &&
477 boost::iequals(words[i][1],
"autoMCStats")) {
478 std::vector<std::string> for_bins;
479 if (words[i][0] ==
"*") {
482 for_bins.push_back(words[i][0]);
484 for (
auto const&
bin : for_bins) {
485 double thresh = boost::lexical_cast<double>(words[i][2]);
486 if (words[i].size() == 3) {
487 auto_stats_settings_[
bin] = AutoMCStatsSettings(thresh);
488 }
else if (words[i].size() == 4) {
489 auto_stats_settings_[
bin] = AutoMCStatsSettings(thresh, boost::lexical_cast<int>(words[i][3]));
491 auto_stats_settings_[
bin] = AutoMCStatsSettings(thresh, boost::lexical_cast<int>(words[i][3]), boost::lexical_cast<int>(words[i][4]));
496 if (start_nuisance_scan && words[i].size()-1 == words[r].size() && !boost::iequals(words[i][1],
"autoMCStats")) {
497 for (
unsigned p = 2; p < words[i].size(); ++p) {
498 if (words[i][p] ==
"-")
continue;
499 auto sys = std::make_shared<Systematic>();
500 sys->set_bin(words[r-3][p-1]);
502 int process_id = boost::lexical_cast<int>(words[r-2][p-1]);
503 sys->set_signal(process_id <= 0);
504 sys->set_process(words[r-1][p-1]);
505 }
catch(boost::bad_lexical_cast &) {
506 int process_id = boost::lexical_cast<int>(words[r-1][p-1]);
507 sys->set_signal(process_id <= 0);
508 sys->set_process(words[r-2][p-1]);
510 sys->set_name(words[i][0]);
511 std::string type = words[i][1];
512 if (!
contains(std::vector<std::string>{
"shape",
"shape?",
"shapeN2",
"shapeU",
"lnN",
"lnU"},
514 throw std::runtime_error(
515 FNERROR(
"Systematic type " + type +
" not supported"));
517 sys->set_type(words[i][1]);
524 std::size_t slash_pos = words[i][p].find(
"/");
525 if (slash_pos != words[i][p].npos) {
528 boost::lexical_cast<double>(words[i][p].substr(0, slash_pos)));
530 boost::lexical_cast<double>(words[i][p].substr(slash_pos+1)));
531 sys->set_asymm(
true);
533 sys->set_value_u(boost::lexical_cast<double>(words[i][p]));
534 sys->set_asymm(
false);
536 if (sys->type() ==
"shape" || sys->type() ==
"shapeN2" ||
537 sys->type() ==
"shapeU") {
538 sys->set_scale(boost::lexical_cast<double>(words[i][p]));
539 LoadShapes(sys.get(), hist_mapping);
540 }
else if (sys->type() ==
"shape?") {
543 LoadShapes(sys.get(), hist_mapping);
544 }
catch (std::exception & e) {
545 if (verbosity_ > 0) {
546 LOGLINE(log(),
"Systematic with shape? did not resolve to a shape");
550 if (!sys->shape_u() || !sys->shape_d()) {
551 sys->set_type(
"lnN");
553 sys->set_type(
"shape");
554 sys->set_scale(boost::lexical_cast<double>(words[i][p]));
557 if (sys->type() ==
"shape" || sys->type() ==
"shapeN2" ||
558 sys->type() ==
"shapeU")
559 sys->set_asymm(
true);
562 if (sys->type() ==
"lnU" || sys->type() ==
"shapeU") {
563 params_.at(sys->name())->set_err_d(0.);
564 params_.at(sys->name())->set_err_u(0.);
566 systs_.push_back(sys);
571 if (bin_names.size() == 1) {
572 single_obs->set_bin(*(bin_names.begin()));
573 LoadShapes(single_obs.get(), hist_mapping);
574 obs_.push_back(single_obs);
576 throw std::runtime_error(
FNERROR(
577 "Input card specifies a single observation entry without a bin, but "
578 "multiple bins defined elsewhere"));
583 for (
auto const& grp : groups) {
590 std::string
const& root_file) {
591 TFile file(root_file.c_str(),
"RECREATE");
602 void CombineHarvester::FillHistMappings(std::vector<HistMapping> & mappings) {
606 std::map<RooAbsData const*, RooWorkspace*> data_ws_map;
607 std::map<RooAbsReal const*, RooWorkspace*> pdf_ws_map;
608 for (
auto const& iter : wspaces_) {
609 auto dat = iter.second->allData();
611 data_ws_map[d] = iter.second.get();
613 RooArgSet vars = iter.second->allPdfs();
614 for (RooAbsArg *v : vars) {
615 RooAbsPdf *y =
dynamic_cast<RooAbsPdf*
>(v);
617 if (y) pdf_ws_map[iter.second->pdf(y->GetName())] = iter.second.get();
619 RooArgSet fvars = iter.second->allFunctions();
620 for (RooAbsArg *fv : fvars) {
621 RooAbsReal *y =
dynamic_cast<RooAbsReal*
>(fv);
622 if (y) pdf_ws_map[iter.second->function(y->GetName())] = iter.second.get();
629 std::set<std::string> hist_bins;
631 for (
auto bin : bins) {
632 unsigned shape_count = std::count_if(procs_.begin(), procs_.end(),
633 [&](std::shared_ptr<ch::Process> p) {
634 return (p->bin() == bin && p->shape() && (!p->signal()));
636 shape_count += std::count_if(obs_.begin(), obs_.end(),
637 [&](std::shared_ptr<ch::Observation> p) {
638 return (p->bin() == bin && p->shape());
640 unsigned counting = std::count_if(
641 procs_.begin(), procs_.end(), [&](std::shared_ptr<ch::Process> p) {
642 return (p->bin() == bin && p->shape() == nullptr &&
643 p->pdf() == nullptr && p->data() == nullptr);
645 counting += std::count_if(
646 obs_.begin(), obs_.end(), [&](std::shared_ptr<ch::Observation> p) {
647 return (p->bin() == bin && p->shape() == nullptr &&
648 p->data() == nullptr);
651 if (shape_count > 0) {
652 mappings.emplace_back(
"*",
bin,
bin +
"/$PROCESS",
653 bin +
"/$PROCESS_$SYSTEMATIC");
654 hist_bins.insert(
bin);
655 }
else if (counting > 0) {
656 mappings.emplace_back(
"*",
bin,
"",
"");
657 mappings.back().is_fake =
true;
664 for (
auto sig_proc : sig_proc_set) {
667 auto masses =
Set2Vec(ch_signals.cp()
670 if (masses.size() != 1) {
671 throw std::runtime_error(
FNERROR(
"Process " + sig_proc +
" in bin " +
673 " has multiple entries with multiple "
674 "mass values, this is not supported"));
678 mappings.emplace_back(sig_proc,
bin,
bin +
"/" + sig_proc +
"$MASS",
679 bin +
"/" + sig_proc +
"$MASS_$SYSTEMATIC");
685 for (
auto bin : bins) {
687 for (
auto obs : ch_bin.obs_) {
688 if (!obs->data())
continue;
689 std::string obj_name = std::string(data_ws_map[obs->data()]->GetName()) +
690 ":" + std::string(obs->data()->GetName());
691 mappings.emplace_back(
"data_obs", obs->bin(), obj_name,
"");
694 bool prototype_ok =
false;
695 HistMapping prototype;
696 std::vector<HistMapping> full_list;
697 auto pmap = ch_bin.GenerateProcSystMap();
698 for (
unsigned i = 0; i < ch_bin.procs_.size(); ++i) {
700 if (!proc->
data() && !proc->
pdf())
continue;
701 std::string obj_name;
702 std::string obj_sys_name;
704 obj_name = std::string(proc->
data()->GetName());
705 boost::replace_all(obj_name, proc->
process(),
"$PROCESS");
706 obj_name = std::string(data_ws_map[proc->
data()]->GetName()) +
":" +
710 obj_name = std::string(proc->
pdf()->GetName());
711 boost::replace_all(obj_name, proc->
process(),
"$PROCESS");
712 obj_name = std::string(pdf_ws_map[proc->
pdf()]->GetName()) +
715 for (
unsigned j = 0; j < pmap[i].size(); ++j) {
718 obj_sys_name = std::string(sys->
data_u()->GetName());
719 boost::replace_all(obj_sys_name, sys->
name() +
"Up",
"$SYSTEMATIC");
720 boost::replace_all(obj_sys_name, sys->
process(),
"$PROCESS");
721 obj_sys_name = std::string(data_ws_map[sys->
data_u()]->GetName()) +
726 obj_sys_name = std::string(sys->
pdf_u()->GetName());
727 boost::replace_all(obj_sys_name, sys->
name() +
"Up",
"$SYSTEMATIC");
728 boost::replace_all(obj_sys_name, sys->
process(),
"$PROCESS");
729 obj_sys_name = std::string(pdf_ws_map[sys->
pdf_u()]->GetName()) +
737 if (prototype.pattern.size() && prototype.pattern != obj_name) {
738 prototype_ok =
false;
741 if (prototype.syst_pattern.size() && obj_sys_name.size() &&
742 prototype.syst_pattern != obj_sys_name) {
743 prototype_ok =
false;
746 if (!prototype.pattern.size()) {
748 prototype.process =
"*";
749 prototype.category =
bin;
750 prototype.pattern = obj_name;
752 if (!prototype.syst_pattern.size()) {
753 prototype.syst_pattern = obj_sys_name;
756 full_list.emplace_back(proc->
process(), proc->
bin(), obj_name,
763 if (!prototype_ok || hist_bins.count(
bin)) {
764 for (
auto m : full_list) {
765 mappings.push_back(m);
768 mappings.push_back(prototype);
779 bool is_counting =
true;
781 if (obs->
shape() !=
nullptr || obs->
data() !=
nullptr) {
787 if (proc->
shape() !=
nullptr || proc->
data() !=
nullptr ||
788 proc->
pdf() !=
nullptr) {
795 if (!root_file.IsOpen() && !is_counting) {
796 throw std::runtime_error(
FNERROR(
797 std::string(
"Output ROOT file is not open: ") + root_file.GetName()));
800 std::unique_ptr<std::ostream> txt_file_ptr =
nullptr;
803 std::string zip_ext =
".gz";
804 bool has_zip_ext = (name.length() >= zip_ext.length() && name.compare(name.length() - zip_ext.length(), zip_ext.length(), zip_ext) == 0);
807 txt_file_ptr = std::make_unique<zstr::ofstream>(name);
809 txt_file_ptr = std::make_unique<std::ofstream>(name);
811 if (txt_file_ptr->fail()) {
812 throw std::runtime_error(
FNERROR(
"Unable to create file: " + name));
815 std::ostream & txt_file = *txt_file_ptr;
820 std::string dashes(80,
'-');
824 std::set<std::string> sys_set;
825 std::set<std::string> param_set;
826 std::set<std::string> rateparam_set;
828 if (sys->
type() ==
"rateParam") {
829 rateparam_set.insert(sys->name());
831 else if (sys->
type() ==
"param"){
832 param_set.insert(sys->name());
834 else sys_set.insert(sys->
name());
836 txt_file <<
"imax " <<
bin_set.size()
837 <<
" number of bins\n";
838 txt_file <<
"jmax " << proc_set.size() - 1
839 <<
" number of processes minus 1\n";
840 txt_file <<
"kmax " <<
"*" <<
" number of nuisance parameters\n";
841 txt_file << dashes <<
"\n";
844 std::vector<HistMapping> mappings;
845 FillHistMappings(mappings);
849 auto proc_sys_map = this->GenerateProcSystMap();
851 std::set<std::string> all_dependents_pars;
852 std::set<std::string> multipdf_cats;
853 for (
auto proc : procs_) {
854 if (!proc->
pdf())
continue;
859 RooAbsData
const* data_obj = FindMatchingData(proc.get());
861 RooArgSet norm_par_list;
869 RooRealVar mx(
"CMS_th1x" ,
"CMS_th1x", 0, 1);
870 RooArgSet tmp_set(mx);
876 for (RooAbsArg *par_list_var : par_list) {
877 if (
dynamic_cast<RooRealVar*
>(par_list_var)) {
878 all_dependents_pars.insert(par_list_var->GetName());
885 if (
dynamic_cast<RooCategory*
>(par_list_var) &&
886 proc->
pdf()->InheritsFrom(
"RooMultiPdf")) {
887 multipdf_cats.insert(par_list_var->GetName());
892 for (RooAbsArg *nm_list_var: norm_par_list) {
893 if (
dynamic_cast<RooRealVar*
>(nm_list_var)) {
894 all_dependents_pars.insert(nm_list_var->GetName());
901 std::string file_name = root_file.GetName();
905 boost::filesystem::path root_file_path =
906 boost::filesystem::absolute(file_name);
908 boost::filesystem::path txt_file_path =
909 boost::filesystem::absolute(name).parent_path();
911 file_name =
make_relative(txt_file_path, root_file_path).string();
915 std::set<std::string> used_wsps;
917 for (
auto const& mapping : mappings) {
918 if (!mapping.is_fake) {
919 txt_file << format(
"shapes %s %s %s %s %s\n")
924 % mapping.syst_pattern;
925 if (mapping.IsPdf() || mapping.IsData()) {
926 used_wsps.insert(mapping.WorkspaceName());
927 if (mapping.syst_pattern !=
"") {
928 used_wsps.insert(mapping.SystWorkspaceName());
932 txt_file << format(
"shapes %s %s %s\n")
938 txt_file << dashes <<
"\n";
941 for (
auto ws_it : wspaces_) {
942 if (ws_it.first ==
"_rateParams")
continue;
944 if (!used_wsps.count(ws_it.second->GetName()))
continue;
950 if (obs_.size() > 0) {
952 for (
auto const& obs : obs_) {
953 txt_file << format(
"%-15s ") % obs->
bin();
955 bool add_dir = TH1::AddDirectoryStatus();
956 TH1::AddDirectory(
false);
957 std::unique_ptr<TH1> h((TH1*)(obs->
shape()->Clone()));
958 h->Scale(obs->
rate());
959 WriteHistToFile(h.get(), &root_file, mappings, obs->
bin(),
"data_obs",
961 TH1::AddDirectory(add_dir);
965 txt_file <<
"observation ";
970 std::string obs_fmt_int =
"%-15.1f ";
971 std::string obs_fmt_flt =
"%-15.4f ";
972 for (
auto const& obs : obs_) {
974 std::fabs(obs->
rate() - std::round(obs->
rate())) > 1E-4;
975 txt_file << format(
is_float ? obs_fmt_flt : obs_fmt_int)
979 txt_file << dashes <<
"\n";
982 unsigned sys_str_len = 14;
983 for (
auto const& sys : sys_set) {
984 if (sys.length() > sys_str_len) sys_str_len = sys.length();
986 for (
auto const& sys : all_dependents_pars) {
987 if (sys.length() > sys_str_len) sys_str_len = sys.length();
989 std::string sys_str_short = boost::lexical_cast<std::string>(sys_str_len);
990 std::string sys_str_long = boost::lexical_cast<std::string>(sys_str_len+9);
992 auto getProcLen = [](
auto const& proc) -> std::string {
993 std::size_t proc_len = 15;
994 proc_len = std::max(proc_len, proc->
process().size());
995 std::string proc_len_str = boost::lexical_cast<std::string>(proc_len);
999 txt_file << format(
"%-"+sys_str_long+
"s") %
"bin";
1000 for (
auto const& proc : procs_) {
1001 if (proc->
shape()) {
1002 bool add_dir = TH1::AddDirectoryStatus();
1003 TH1::AddDirectory(
false);
1005 WriteHistToFile(h.get(), &root_file, mappings, proc->
bin(),
1007 TH1::AddDirectory(add_dir);
1009 txt_file << format(
"%-"+getProcLen(proc)+
"s ") % proc->
bin();
1013 txt_file << format(
"%-"+sys_str_long+
"s") %
"process";
1015 for (
auto const& proc : procs_) {
1016 txt_file << format(
"%-"+getProcLen(proc)+
"s ") % proc->
process();
1020 txt_file << format(
"%-"+sys_str_long+
"s") %
"process";
1023 std::map<std::string, int> p_ids;
1024 unsigned sig_id = 0;
1025 unsigned bkg_id = 1;
1026 for (
unsigned p = 0; p < procs_.size(); ++p) {
1027 if (!procs_[p]->signal()) {
1028 if (p_ids.count(procs_[p]->process()) == 0) {
1029 p_ids[procs_[p]->process()] = bkg_id;
1034 if (p_ids[procs_[p]->
process()] <= 0)
throw std::runtime_error(
FNERROR(
"Ambiguous definition of process (" + procs_[p]->
process() +
") as both signal and background"));
1037 unsigned q = procs_.size() - (p + 1);
1038 if (procs_[q]->signal()) {
1039 if (p_ids.count(procs_[q]->process()) == 0) {
1040 p_ids[procs_[q]->process()] = sig_id;
1045 if (p_ids[procs_[q]->
process()] > 0)
throw std::runtime_error(
FNERROR(
"Ambiguous definition of process (" + procs_[q]->
process() +
") as both signal and background"));
1049 for (
auto const& proc : procs_) {
1050 txt_file << format(
"%-"+getProcLen(proc)+
"s ") % p_ids[proc->
process()];
1055 txt_file << format(
"%-"+sys_str_long+
"s") %
"rate";
1056 for (
auto const& proc : procs_) {
1057 txt_file << format(
"%-"+getProcLen(proc)+
".6g ") % proc->
no_norm_rate();
1060 txt_file << dashes <<
"\n";
1064 for (
auto par : params_) {
1066 if (p->
err_d() != 0.0 && p->
err_u() != 0.0 &&
1067 all_dependents_pars.count(p->
name()) && sys_set.count(p->
name())) {
1068 txt_file << format((format(
"%%-%is param %%g %%g") % sys_str_len).str()) %
1070 if (p->
range_d() != std::numeric_limits<double>::lowest() &&
1071 p->
range_u() != std::numeric_limits<double>::max()) {
1072 txt_file << format(
" [%.4g,%.4g]") % p->
range_d() % p->
range_u();
1079 for (
auto par : params_) {
1081 if (p->
err_d() != 0.0 && p->
err_u() != 0.0 &&
1082 param_set.count(p->
name()) && !sys_set.count(p->
name())) {
1083 txt_file << format((format(
"%%-%is param %%g %%g") % sys_str_len).str()) %
1085 if (p->
range_d() != std::numeric_limits<double>::lowest() &&
1086 p->
range_u() != std::numeric_limits<double>::max()) {
1087 txt_file << format(
" [%.4g,%.4g]") % p->
range_d() % p->
range_u();
1094 for (
auto syst : systs_) {
1095 if (syst->type() ==
"param" && param_set.count(syst->name()) && rateparam_set.count(syst->name())) {
1096 txt_file << format((format(
"%%-%is param %%s") % sys_str_len).str()) %
1097 syst->name() % syst->param_str_ext();
1102 for (
auto const& sys : sys_set) {
1103 std::vector<std::string> line(procs_.size() + 2);
1105 bool seen_lnN =
false;
1106 bool seen_lnU =
false;
1107 bool seen_shape =
false;
1108 bool seen_shapeN2 =
false;
1109 bool seen_shapeU =
false;
1110 if (param_set.count(sys))
continue;
1111 for (
unsigned p = 0; p < procs_.size(); ++p) {
1113 for (
unsigned n = 0; n < proc_sys_map[p].size(); ++n) {
1116 if (ptr->
name() != sys)
continue;
1117 std::string tp = ptr->
type();
1118 if (tp ==
"lnN" || tp ==
"lnU") {
1119 if (tp ==
"lnN") seen_lnN =
true;
1120 if (tp ==
"lnU") seen_lnU =
true;
1124 : (format(
"%g") % ptr->
value_u()).str();
1127 if (tp ==
"shape" || tp ==
"shapeN2" || tp ==
"shapeU") {
1128 if (tp ==
"shape") seen_shape =
true;
1129 if (tp ==
"shapeN2") seen_shapeN2 =
true;
1130 if (tp ==
"shapeU") seen_shapeU =
true;
1131 line[p + 2] = (format(
"%g") % ptr->
scale()).str();
1133 bool add_dir = TH1::AddDirectoryStatus();
1134 TH1::AddDirectory(
false);
1136 h_d->Scale(procs_[p]->rate() * ptr->
value_d());
1137 WriteHistToFile(h_d.get(), &root_file, mappings, ptr->
bin(),
1140 h_u->Scale(procs_[p]->rate() * ptr->
value_u());
1141 WriteHistToFile(h_u.get(), &root_file, mappings, ptr->
bin(),
1143 TH1::AddDirectory(add_dir);
1147 if (!flags_.at(
"allow-missing-shapes")) {
1148 std::stringstream err;
1149 err <<
"Trying to write shape uncertainty with missing "
1152 throw std::runtime_error(
FNERROR(err.str()));
1159 line[1] =
"shapeN2";
1160 }
else if (seen_shapeU) {
1162 }
else if (seen_lnU) {
1164 }
else if (seen_lnN && !seen_shape) {
1166 }
else if (!seen_lnN && seen_shape) {
1168 }
else if (seen_lnN && seen_shape) {
1171 throw std::runtime_error(
FNERROR(
"Systematic type could not be deduced"));
1173 txt_file << format(
"%-" + sys_str_short +
"s %-7s ") % line[0] % line[1];
1174 for (
unsigned p = 0; p < procs_.size(); ++p) {
1175 txt_file << format(
"%-"+getProcLen(procs_[p])+
"s ") % line[p + 2];
1184 std::vector<std::vector<std::string> > floating_params;
1185 for (
auto const& sys : rateparam_set) {
1186 FNLOGC(log(), verbosity_ > 1) <<
"Doing rateParam: " << sys <<
"\n";
1189 FNLOGC(log(), verbosity_ > 1) <<
"Procs for this rateParam: \n";
1190 for (
auto const& proc : procs_rp) {
1191 FNLOGC(log(), verbosity_ > 1) <<
" - " << proc <<
"\n";
1196 if (bins_rp.size() > 0 && bins_rp.size() == bins_all.size()) {
1197 floating_params.push_back({sys,
"*", proc});
1199 for (
auto const&
bin : bins_rp) {
1200 floating_params.push_back({sys,
bin, proc});
1203 FNLOGC(log(), verbosity_ > 1)
1204 << bins_rp.size() <<
"/" << bins_all.size()
1205 <<
" bins with this process have a rateParam\n";
1208 std::set<std::string> frozen_params;
1209 for (
auto const& rp : floating_params) {
1210 if (params_.count(rp[0])) {
1212 txt_file << format(
"%-" + sys_str_short +
"s %-10s %-10s %-10s %g") %
1213 rp[0] %
"rateParam" % rp[1] % rp[2] % par->
val();
1216 if (par->
range_d() != std::numeric_limits<double>::lowest() &&
1217 par->
range_u() != std::numeric_limits<double>::max()) {
1218 txt_file << format(
" [%.4g,%.4g]") % par->
range_d() % par->
range_u();
1222 frozen_params.insert(rp[0]);
1226 for (
auto const& par : frozen_params) {
1227 txt_file <<
"nuisance edit freeze " << par <<
"\n";
1229 std::set<std::string> all_fn_param_args;
1230 for (
auto const& rp : floating_params) {
1231 if (!params_.count(rp[0])) {
1234 RooWorkspace *rp_ws = wspaces_.at(
"_rateParams").get();
1235 RooAbsArg *arg = rp_ws->arg(rp[0].c_str());
1236 if (arg->getStringAttribute(
"wspSource")) {
1237 txt_file << format(
"%-" + sys_str_short +
1238 "s %-10s %-10s %-10s %-20s\n") %
1239 rp[0] %
"rateParam" % rp[1] % rp[2] % std::string(arg->getStringAttribute(
"wspSource"));
1242 RooFormulaVar* form =
1243 dynamic_cast<RooFormulaVar*
>(arg);
1246 std::stringstream ss;
1247 form->printMetaArgs(ss);
1248 std::string form_str = ss.str();
1251 std::size_t first = form_str.find_first_of(
'"');
1252 std::size_t last = form_str.find_last_of(
'"');
1253 form_str = form_str.substr(first + 1, last - first - 1);
1256 std::unique_ptr<RooArgSet>(form->getParameters(RooArgList()))->getSize();
1257 std::string args =
"";
1258 for (
unsigned i = 0; i < npars; ++i) {
1259 all_fn_param_args.insert(std::string(form->getParameter(i)->GetName()));
1260 args += std::string(form->getParameter(i)->GetName());
1261 if (i < (npars-1)) {
1265 txt_file << format(
"%-" + sys_str_short +
1266 "s %-10s %-10s %-10s %-20s %s\n") %
1267 rp[0] %
"rateParam" % rp[1] % rp[2] % form_str % args;
1271 if (wspaces_.count(
"_rateParams")) {
1272 RooWorkspace *rp_ws = wspaces_.at(
"_rateParams").get();
1273 RooArgSet vars = rp_ws->allVars();
1274 for (RooAbsArg *v : vars) {
1275 RooRealVar *y =
dynamic_cast<RooRealVar*
>(v);
1276 if (y && y->getAttribute(
"extArg") && all_fn_param_args.count(std::string(y->GetName()))) {
1277 if (!params_.count(y->GetName()) && y->getStringAttribute(
"wspSource")) {
1278 std::vector<std::string> tokens;
1279 boost::split(tokens, y->getStringAttribute(
"wspSource"), boost::is_any_of(
":"));
1280 std::string wsp_name_extArg = tokens[1];
1281 y->setStringAttribute(
"wspSource", (std::string(root_file.GetName()) +
":" + tokens[1]).c_str());
1282 txt_file << format(
"%-" + sys_str_short +
1283 "s %-10s %-20s\n") %
1284 y->GetName() %
"extArg" % std::string(y->getStringAttribute(
"wspSource"));
1288 Parameter const* p = params_.at(y->GetName()).get();
1289 txt_file << format(
"%-" + sys_str_short +
"s %-10s %g") %
1290 y->GetName() %
"extArg" % p->
val();
1291 if (p->
range_d() != std::numeric_limits<double>::lowest() &&
1292 p->
range_u() != std::numeric_limits<double>::max()) {
1293 txt_file << format(
" [%.4g,%.4g]") % p->
range_d() % p->
range_u();
1301 RooArgSet funcs = rp_ws->allFunctions();
1302 for (RooAbsArg *v : funcs){
1303 RooAbsReal *y =
dynamic_cast<RooAbsReal*
>(v);
1304 if (y && y->getAttribute(
"extArg") && y->getStringAttribute(
"wspSource") && all_fn_param_args.count(std::string(y->GetName()))) {
1305 txt_file << format(
"%-" + sys_str_short +
1306 "s %-10s %-20s\n") %
1307 y->GetName() %
"extArg" % std::string(y->getStringAttribute(
"wspSource"));
1314 std::set<std::string> ws_vars;
1315 for (
auto iter : wspaces_) {
1316 RooArgSet vars = iter.second->allVars();
1317 for (RooAbsArg *v : vars) {
1318 RooRealVar *y =
dynamic_cast<RooRealVar*
>(v);
1319 if (y) ws_vars.insert(y->GetName());
1331 for (
auto par : params_) {
1333 if (p->
err_d() != 0.0 && p->
err_u() != 0.0 &&
1334 all_dependents_pars.count(p->
name()) && !sys_set.count(p->
name()) && !param_set.count(p->
name())) {
1335 txt_file << format((format(
"%%-%is param %%g %%g") % sys_str_len).str()) %
1337 if (p->
range_d() != std::numeric_limits<double>::lowest() &&
1338 p->
range_u() != std::numeric_limits<double>::max()) {
1339 txt_file << format(
" [%.4g,%.4g]") % p->
range_d() % p->
range_u();
1345 for (
auto par : multipdf_cats) {
1346 txt_file << format((format(
"%%-%is discrete\n") % sys_str_len).str()) % par;
1349 for (
auto stat_settings : auto_stats_settings_) {
1350 if (!
bin_set.count(stat_settings.first))
continue;
1351 txt_file << format(
"%s autoMCStats %g %i %i\n") % stat_settings.first %
1352 stat_settings.second.event_threshold %
1353 stat_settings.second.include_signal %
1354 stat_settings.second.hist_mode;
1361 std::map<std::string, std::set<std::string>> groups;
1362 for (
auto const& par : params_) {
1364 if (!all_dependents_pars.count(p->
name()) && !sys_set.count(p->
name())) {
1367 if (p->
err_d() == 0.0 && p->
err_u() == 0.0)
continue;
1368 for (
auto const& str : p->
groups()) {
1369 if (!groups.count(str)) {
1370 groups[str] = std::set<std::string>();
1372 groups[str].insert(p->
name());
1375 for (
auto const& gr : groups) {
1376 txt_file << format(
"%s group =") % gr.first;
1377 for (
auto const& np : gr.second) {
1378 txt_file <<
" " << np;
1383 for (
auto const& postl : post_lines_) {
1384 txt_file << postl <<
"\n";
1388 void CombineHarvester::WriteHistToFile(
1391 std::vector<HistMapping>
const& mappings,
1392 std::string
const& bin,
1394 std::string
const& mass,
1395 std::string
const& nuisance,
1397 StrPairVec attempts = this->GenerateShapeMapAttempts(
process,
bin);
1398 for (
unsigned a = 0; a < attempts.size(); ++a) {
1399 for (
unsigned m = 0; m < mappings.size(); ++m) {
1400 if ((attempts[a].first == mappings[m].
process) &&
1401 (attempts[a].second == mappings[m].category)) {
1402 std::string p = (type == 0 ?
1403 mappings[m].pattern : mappings[m].syst_pattern);
1404 boost::replace_all(p,
"$CHANNEL",
bin);
1405 boost::replace_all(p,
"$PROCESS",
process);
1406 boost::replace_all(p,
"$MASS",
mass);
1407 if (type == 1) boost::replace_all(p,
"$SYSTEMATIC", nuisance+
"Down");
1408 if (type == 2) boost::replace_all(p,
"$SYSTEMATIC", nuisance+
"Up");
CombineHarvester & signals()
CombineHarvester & mass(std::vector< std::string > const &vec, bool cond=true)
void CreateParameterIfEmpty(std::string const &name)
CombineHarvester & process(std::vector< std::string > const &vec, bool cond=true)
void ForEachSyst(Function func)
void ForEachProc(Function func)
std::set< R > SetFromObs(T func)
Fill an std::set using only the Observation entries.
void SetGroup(std::string const &name, std::vector< std::string > const &patterns)
Add parameters to a given group.
void WriteDatacard(std::string const &name, std::string const &root_file)
CombineHarvester & analysis(std::vector< std::string > const &vec, bool cond=true)
std::set< R > SetFromSysts(T func)
Fill an std::set using only the Systematic entries.
std::set< R > SetFromProcs(T func)
Fill an std::set using only the Process entries.
CombineHarvester & bin(std::vector< std::string > const &vec, bool cond=true)
std::set< std::string > bin_set()
CombineHarvester & syst_name(std::vector< std::string > const &vec, bool cond=true)
void ForEachObs(Function func)
bool GetFlag(std::string const &flag) const
int ParseDatacard(std::string const &filename, std::string const &analysis, std::string const &era, std::string const &channel, int bin_id, std::string const &mass)
CombineHarvester & histograms()
CombineHarvester cp()
Creates and returns a shallow copy of the CombineHarvester instance.
CombineHarvester & syst_type(std::vector< std::string > const &vec, bool cond=true)
virtual std::string const & process() const
virtual std::string const & bin() const
virtual std::string const & mass() const
TH1 const * shape() const
RooAbsData const * data() const
std::set< std::string > & groups()
void set_err_u(double const &err_u)
void set_err_d(double const &err_d)
void set_range_u(double const &range_u)
void set_val(double const &val)
std::string const & name() const
void set_range_d(double const &range_d)
double no_norm_rate() const
Get the process normalisation without multiplying by the RooAbsReal value (in the case that it's pres...
RooAbsReal const * pdf() const
std::unique_ptr< TH1 > ClonedScaledShape() const
RooAbsReal const * norm() const
RooAbsData const * data() const
TH1 const * shape() const
RooDataHist const * data_d() const
std::unique_ptr< TH1 > ClonedShapeU() const
std::string const & type() const
std::unique_ptr< TH1 > ClonedShapeD() const
TH1 const * shape_d() const
TH1 const * shape_u() const
std::string const & name() const
RooAbsReal const * pdf_d() const
RooAbsReal const * pdf_u() const
RooDataHist const * data_u() const
static std::ostream & PrintHeader(std::ostream &out)
void WriteToTFile(T *ptr, TFile *file, std::string const &path)
std::vector< std::string > ParseFileLines(std::string const &file_name)
boost::filesystem::path make_relative(boost::filesystem::path p_from, boost::filesystem::path p_to)
Determine the relative path from one file to another.
std::vector< T > Set2Vec(std::set< T > const &in)
bool contains(const Range &r, T p)
bool is_float(std::string const &str)
RooArgSet ParametersByName(RooAbsReal const *pdf, RooArgSet const *dat_vars)
std::shared_ptr< TFile > file
std::string SystWorkspaceName() const
std::shared_ptr< RooWorkspace > ws
std::shared_ptr< RooWorkspace > sys_ws
std::string WorkspaceName() const