CombineHarvester
CombineHarvester_Datacards.cc
Go to the documentation of this file.
2 #include <cmath>
3 #include <vector>
4 #include <map>
5 #include <string>
6 #include <iostream>
7 #include <utility>
8 #include <set>
9 #include <fstream>
10 #include <sstream>
11 #include <fnmatch.h>
12 #include "boost/lexical_cast.hpp"
13 #include "boost/algorithm/string.hpp"
14 #include "boost/format.hpp"
15 #include "boost/regex.hpp"
16 #include "boost/filesystem.hpp"
17 #include "TDirectory.h"
18 #include "TH1.h"
19 #include "RooRealVar.h"
20 #include "RooFormulaVar.h"
21 #include "RooCategory.h"
22 #include "RooConstVar.h"
23 
32 #include "CombineHarvester/CombineTools/interface/zstr.hpp"
33 namespace ch {
34 
35 // Bug fix for RooConstVar CompatibilityA
36 RooWorkspace* CombineHarvester::fixRooConstVar(RooWorkspace *win, bool useRooRealVar, bool clean)
37 {
38  if ( !GetFlag("fix-rooconstvar")) return win;
39  std::cout<<"[INFO]"<< "Running fixRooConstVar on "<<win->GetName()<<std::endl;
40  // to fix RooConstVar we need to create a new workspace,
41  // import them first and later reimport all the other objects
42  std::string name = win->GetName();
43  RooWorkspace *wout = new RooWorkspace(name.c_str(), (std::string( win->GetTitle() ) + "_RCVfix").c_str() ) ;
44  for (auto cobj : win->components()) {
45  if (cobj->IsA() == RooConstVar::Class() )
46  {
47  RooConstVar* cvar = dynamic_cast<RooConstVar*> (cobj);
48  std::cout<<"[INFO] Found RooConstVar '"<<cvar->GetName()<<"' with value "<< cvar->getVal()<<" in workspace "<<name<<std::endl;
49  if (useRooRealVar)
50  {
51  RooRealVar rrv = RooRealVar(cvar->GetName(),cvar->GetTitle(),cvar->getVal());
52  rrv . removeRange(); // the line above may already do it
53  rrv . setConstant();
54  wout -> import ( rrv, RooFit::RecycleConflictNodes());
55  }
56  else{
57  RooConstVar rcv = RooConstVar(cvar->GetName(),cvar->GetTitle(),cvar->getVal());
58  wout -> import ( rcv, RooFit::RecycleConflictNodes());
59  }
60 
61  std::cout<<" -> imported in workspace"<<std::endl;
62  wout -> obj( cvar->GetName() ) -> Print("T");
63  }
64 
65  }
66 
67  // Import all other stuff
68  wout -> import(win->components(), RooFit::RecycleConflictNodes());
69  for (auto d : win->allData() ) wout -> import(*d);
70  if (clean) delete win; // if chained with clone and other stuff, this is the best option.
71  return wout;
72 }
73 
74 // Extract info from filename using parse rule like:
75 // ".*{MASS}/{ANALYSIS}_{CHANNEL}_{BINID}_{ERA}.txt"
76 int CombineHarvester::ParseDatacard(std::string const& filename,
77  std::string parse_rules) {
78  boost::replace_all(parse_rules, "$ANALYSIS", "(?<ANALYSIS>[\\w\\.]+)");
79  boost::replace_all(parse_rules, "$ERA", "(?<ERA>[\\w\\.]+)");
80  boost::replace_all(parse_rules, "$CHANNEL", "(?<CHANNEL>[\\w\\.]+)");
81  boost::replace_all(parse_rules, "$BINID", "(?<BINID>[\\w\\.]+)");
82  boost::replace_all(parse_rules, "$MASS", "(?<MASS>[\\w\\.]+)");
83  boost::regex rgx(parse_rules);
84  boost::smatch matches;
85  boost::regex_search(filename, matches, rgx);
86  this->ParseDatacard(filename,
87  matches.str("ANALYSIS"),
88  matches.str("ERA"),
89  matches.str("CHANNEL"),
90  matches.str("BINID").length() ?
91  boost::lexical_cast<int>(matches.str("BINID")) : 0,
92  matches.str("MASS"));
93  return 0;
94 }
95 
96 int CombineHarvester::ParseDatacard(std::string const& filename,
97  std::string const& analysis,
98  std::string const& era,
99  std::string const& channel,
100  int bin_id,
101  std::string const& mass) {
102  TH1::AddDirectory(kFALSE);
103  // Load the entire datacard into memory as a vector of strings
104  std::vector<std::string> lines = ch::ParseFileLines(filename);
105  // Loop through lines, trimming whitespace at the beginning or end
106  // then splitting each line into a vector of words (using any amount
107  // of whitespace as the separator). We skip any line of zero length
108  // or which starts with a "#" or "-" character.
109  std::vector<std::vector<std::string>> words;
110  for (unsigned i = 0; i < lines.size(); ++i) {
111  boost::trim(lines[i]);
112  if (lines[i].size() == 0) continue;
113  if (lines[i].at(0) == '#' || lines[i].at(0) == '-') continue;
114  words.push_back(std::vector<std::string>());
115  boost::split(words.back(), lines[i], boost::is_any_of("\t "),
116  boost::token_compress_on);
117  }
118 
119  std::vector<HistMapping> hist_mapping;
120  // std::map<std::string, RooAbsData*> data_map;
121  std::map<std::string, std::shared_ptr<TFile>> file_store;
122  std::map<std::string, std::shared_ptr<RooWorkspace>> ws_store;
123 
124  bool start_nuisance_scan = false;
125  unsigned r = 0;
126 
127  // We will allow cards that describe a single bin to have an "observation"
128  // line without a "bin" line above it. We probably won't know the bin name
129  // when we parse this line, so we'll store it here and fix it later
130  std::shared_ptr<ch::Observation> single_obs = nullptr;
131  std::set<std::string> bin_names;
132 
133  // Store the groups that we encounter
134  std::map<std::string, std::set<std::string>> groups;
135 
136  // Do a first pass just for shapes, as some cards
137  // declare observations / processes before the shapes lines
138  for (unsigned i = 0; i < words.size(); ++i) {
139  // Ignore line if it only has one word
140  if (words[i].size() <= 1) continue;
141 
142  // If the line begins "shapes" then we've
143  // found process --> TH1 mapping information
144  if (boost::iequals(words[i][0], "shapes") && words[i].size() >= 5) {
145  hist_mapping.push_back(HistMapping());
146  HistMapping &mapping = hist_mapping.back();
147  mapping.process = words[i][1];
148  mapping.category = words[i][2];
149  // The root file path given in the datacard is relative to the datacard
150  // path, so we join the path to the datacard with the path to the file
151  std::string dc_path;
152  std::size_t slash = filename.find_last_of('/');
153  if (slash != filename.npos) {
154  dc_path = filename.substr(0, slash) + "/" + words[i][3];
155  } else {
156  dc_path = words[i][3];
157  }
158  if (!file_store.count(dc_path))
159  file_store[dc_path] = std::make_shared<TFile>(dc_path.c_str());
160  mapping.file = file_store.at(dc_path);
161  mapping.pattern = words[i][4];
162  if (words[i].size() > 5) mapping.syst_pattern = words[i][5];
163 
164  if (mapping.IsPdf()) {
165  std::string store_key =
166  mapping.file->GetName() + mapping.WorkspaceName();
167  if (!ws_store.count(store_key)) {
168  mapping.file->cd();
169  std::shared_ptr<RooWorkspace> ptr(
170  fixRooConstVar(
171  dynamic_cast<RooWorkspace*>(gDirectory->Get(mapping.WorkspaceName().c_str()))
172  )
173  );
174  if (!ptr) {
175  throw std::runtime_error(FNERROR("Workspace not found in file"));
176  }
177  ws_store[store_key] = ptr;
178  }
179  mapping.ws = SetupWorkspace(*(ws_store[store_key]), true);
180  }
181  if (mapping.IsPdf() && mapping.syst_pattern != "") {
182  std::string store_key =
183  mapping.file->GetName() + mapping.SystWorkspaceName();
184  if (!ws_store.count(store_key)) {
185  mapping.file->cd();
186  std::shared_ptr<RooWorkspace> ptr(
187  fixRooConstVar(
188  dynamic_cast<RooWorkspace*>(gDirectory->Get(mapping.SystWorkspaceName().c_str()))
189  )
190  );
191  if (!ptr) {
192  throw std::runtime_error(FNERROR("Workspace not found in file"));
193  }
194  ws_store[store_key] = ptr;
195  }
196  mapping.sys_ws = SetupWorkspace(*(ws_store[store_key]), true);
197  }
198  }
199 
200  // We can also have a "FAKE" shape directive
201  // Must be four words long: shapes * * FAKE
202  if (boost::iequals(words[i][0], "shapes") && words[i].size() == 4 &&
203  boost::iequals(words[i][3], "FAKE")) {
204  hist_mapping.push_back(HistMapping());
205  HistMapping &mapping = hist_mapping.back();
206  mapping.process = words[i][1];
207  mapping.category = words[i][2];
208  mapping.is_fake = true;
209  }
210  }
211 
212  for (unsigned i = 0; i < words.size(); ++i) {
213  if (words[i].size() >= 3 &&
214  boost::iequals(words[i][1], "extArg")) {
215  if (verbosity_ > 1) {
216  FNLOG(log()) << "Processing extArg line:\n";
217  for (auto const& str : words[i]) {
218  log() << str << "\t";
219  }
220  log() << "\n";
221  }
222 
223  bool has_range = words[i].size() == 4 && words[i][3][0] == '[';
224  std::string param_name = words[i][0];
225  bool is_wsp_rateparam = false;
226  try {
227  boost::lexical_cast<double>(words[i][2]);
228  } catch (boost::bad_lexical_cast &) {
229  is_wsp_rateparam = true;
230  }
231  if ((!is_wsp_rateparam) && (words[i].size() == 3 || has_range)) {
232  ch::Parameter* param = SetupRateParamVar(
233  param_name, boost::lexical_cast<double>(words[i][2]), true);
234  param->set_err_u(0.);
235  param->set_err_d(0.);
236  if (has_range) {
237  std::vector<std::string> tokens;
238  boost::split(tokens, words[i][3], boost::is_any_of("[],"));
239  if (tokens.size() == 4) {
240  param->set_range_d(boost::lexical_cast<double>(tokens[1]));
241  param->set_range_u(boost::lexical_cast<double>(tokens[2]));
242  FNLOGC(log(), verbosity_ > 1) << "Setting parameter range to " << words[i][2];
243  }
244  }
245  } else if (words[i].size() == 3 && is_wsp_rateparam) {
246  if (!SetupRateParamWspObjFromWsStore(param_name, words[i][2], ws_store)) {
247  SetupRateParamWspObj(param_name, words[i][2], true);
248  }
249  }
250  }
251  }
252 
253  // Loop through the vector of word vectors
254  for (unsigned i = 0; i < words.size(); ++i) {
255  // Ignore line if it only has one word
256  if (words[i].size() <= 1) continue;
257 
258  // Want to check this line and the previous one, so need i >= 1.
259  // If the first word on this line is "observation" and "bin" on
260  // the previous line then we've found the entries for data, and
261  // can add Observation objects
262  if (i >= 1) {
263  if ( boost::iequals(words[i][0], "observation") &&
264  boost::iequals(words[i-1][0], "bin") &&
265  words[i].size() == words[i-1].size()) {
266  for (unsigned p = 1; p < words[i].size(); ++p) {
267  auto obs = std::make_shared<Observation>();
268  obs->set_bin(words[i-1][p]);
269  obs->set_rate(boost::lexical_cast<double>(words[i][p]));
270  obs->set_analysis(analysis);
271  obs->set_era(era);
272  obs->set_channel(channel);
273  obs->set_bin_id(bin_id);
274  obs->set_mass(mass);
275 
276  LoadShapes(obs.get(), hist_mapping);
277 
278  obs_.push_back(obs);
279  }
280  }
281  }
282 
283  if (boost::iequals(words[i][0], "observation") &&
284  !boost::iequals(words[i-1][0], "bin") &&
285  words[i].size() == 2 &&
286  single_obs.get() == nullptr) {
287  for (unsigned p = 1; p < words[i].size(); ++p) {
288  single_obs = std::make_shared<Observation>();
289  single_obs->set_bin("");
290  single_obs->set_rate(boost::lexical_cast<double>(words[i][p]));
291  single_obs->set_analysis(analysis);
292  single_obs->set_era(era);
293  single_obs->set_channel(channel);
294  single_obs->set_bin_id(bin_id);
295  single_obs->set_mass(mass);
296  }
297  }
298 
299  // Similarly look for the lines indicating the different signal
300  // and background processes
301  // Once these are found save in line index for the rate line as r
302  // to we can refer back to these later, then assume that every
303  // line that follows is a nuisance parameter
304 
305  if (i >= 3) {
306  if ( boost::iequals(words[i][0], "rate") &&
307  boost::iequals(words[i-1][0], "process") &&
308  boost::iequals(words[i-2][0], "process") &&
309  boost::iequals(words[i-3][0], "bin") &&
310  words[i].size() == words[i-1].size() &&
311  words[i].size() == words[i-2].size() &&
312  words[i].size() == words[i-3].size()) {
313  for (unsigned p = 1; p < words[i].size(); ++p) {
314  auto proc = std::make_shared<Process>();
315  proc->set_bin(words[i-3][p]);
316  bin_names.insert(words[i-3][p]);
317  try {
318  int process_id = boost::lexical_cast<int>(words[i-2][p]);
319  proc->set_signal(process_id <= 0);
320  proc->set_process(words[i-1][p]);
321  } catch(boost::bad_lexical_cast &) {
322  int process_id = boost::lexical_cast<int>(words[i-1][p]);
323  proc->set_signal(process_id <= 0);
324  proc->set_process(words[i-2][p]);
325  }
326  proc->set_rate(boost::lexical_cast<double>(words[i][p]));
327  proc->set_analysis(analysis);
328  proc->set_era(era);
329  proc->set_channel(channel);
330  proc->set_bin_id(bin_id);
331  proc->set_mass(mass);
332 
333  LoadShapes(proc.get(), hist_mapping);
334 
335  procs_.push_back(proc);
336  }
337  r = i;
338  start_nuisance_scan = true;
339  }
340  }
341  if (start_nuisance_scan && words[i].size() >= 4) {
342  if (boost::iequals(words[i][1], "param")) {
343  std::string param_name = words[i][0];
344  Parameter * param = SetupRateParamVar(param_name, boost::lexical_cast<double>(words[i][2]));
345  param->set_val(boost::lexical_cast<double>(words[i][2]));
346  std::size_t slash_pos = words[i][3].find("/");
347  if (slash_pos != words[i][3].npos) {
348  param->set_err_d(
349  boost::lexical_cast<double>(words[i][3].substr(0, slash_pos)));
350  param->set_err_u(
351  boost::lexical_cast<double>(words[i][3].substr(slash_pos+1)));
352  } else {
353  param->set_err_u(+1.0 * boost::lexical_cast<double>(words[i][3]));
354  param->set_err_d(-1.0 * boost::lexical_cast<double>(words[i][3]));
355  }
356  if (words[i].size() >= 5) {
357  // We have a range
358  std::vector<std::string> tokens;
359  boost::split(tokens, words[i][4], boost::is_any_of("[],"));
360  if (tokens.size() == 4) {
361  param->set_range_d(boost::lexical_cast<double>(tokens[1]));
362  param->set_range_u(boost::lexical_cast<double>(tokens[2]));
363  }
364  }
365  auto sys = std::make_shared<Systematic>();
366  sys->set_name(param_name);
367  sys->set_type("param");
368  sys->set_analysis(analysis);
369  sys->set_era(era);
370  sys->set_channel(channel);
371  sys->set_bin_id(bin_id);
372  sys->set_mass(mass);
373  // Add string extension for edge case where param and rateParam have the same name
374  std::string syst_str_ext = (boost::format("%g %g") % param->val() % ((param->err_u() - param->err_d()) / 2.0)).str();
375  if (param->range_d() != std::numeric_limits<double>::lowest() &&
376  param->range_u() != std::numeric_limits<double>::max()) {
377  syst_str_ext += (boost::format(" [%.4g,%.4g]") % param->range_d() % param->range_u()).str();
378  }
379  sys->set_param_str_ext(syst_str_ext);
380  systs_.push_back(sys);
381  continue; // skip the rest of this now
382  }
383  }
384 
385  if (start_nuisance_scan && words[i].size() >= 5 &&
386  boost::iequals(words[i][1], "rateParam")) {
387  if (verbosity_ > 1) {
388  FNLOG(log()) << "Processing rateParam line:\n";
389  for (auto const& str : words[i]) {
390  log() << str << "\t";
391  }
392  log() << "\n";
393  }
394 
395  bool has_range = words[i].size() == 6 && words[i][5][0] == '[';
396  std::string param_name = words[i][0];
397  // If this is a free param may need to create a Parameter object
398  // If the line has 5 words then it can either be a floating param
399  // or one from a workspace. Otherwise if it has 6 then it's either
400  // a floating param with a range or a formula
401  bool is_wsp_rateparam = false;
402  try {
403  boost::lexical_cast<double>(words[i][4]);
404  } catch (boost::bad_lexical_cast &) {
405  is_wsp_rateparam = true;
406  }
407  if ((!is_wsp_rateparam) && (words[i].size() == 5 || has_range)) {
408  ch::Parameter* param = SetupRateParamVar(
409  param_name, boost::lexical_cast<double>(words[i][4]));
410  param->set_err_u(0.);
411  param->set_err_d(0.);
412  if (has_range) {
413  std::vector<std::string> tokens;
414  boost::split(tokens, words[i][5], boost::is_any_of("[],"));
415  if (tokens.size() == 4) {
416  param->set_range_d(boost::lexical_cast<double>(tokens[1]));
417  param->set_range_u(boost::lexical_cast<double>(tokens[2]));
418  FNLOGC(log(), verbosity_ > 1) << "Setting parameter range to " << words[i][5];
419  }
420  }
421  } else if (words[i].size() == 6 && !has_range) {
422  SetupRateParamFunc(param_name, words[i][4], words[i][5]);
423  } else if (words[i].size() == 5 && is_wsp_rateparam) {
424  SetupRateParamWspObj(param_name, words[i][4]);
425  }
426  for (unsigned p = 1; p < words[r].size(); ++p) {
427  bool matches_bin = false;
428  bool matches_proc = false;
429  int process_id;
430  std::string process;
431  std::string bin = words[r-3][p];
432  // Not great that we repeat this below
433  try {
434  process_id = boost::lexical_cast<int>(words[r-2][p]);
435  process = words[r-1][p];
436  } catch(boost::bad_lexical_cast &) {
437  process_id = boost::lexical_cast<int>(words[r-1][p]);
438  process = words[r-2][p];
439  }
440  // if (words[i][2] == "*" || words[i][2] == bin) {
441  if (words[i][2] == "*" || fnmatch(words[i][2].c_str(), bin.c_str(), 0) == 0) {
442  matches_bin = true;
443  }
444  // if (words[i][3] == "*" || words[i][3] == process) {
445  if (words[i][3] == "*" || fnmatch(words[i][3].c_str(), process.c_str(), 0) == 0) {
446  matches_proc = true;
447  }
448  if (!matches_bin || !matches_proc) continue;
449  auto sys = std::make_shared<Systematic>();
450  sys->set_bin(bin);
451  sys->set_signal(process_id <= 0);
452  sys->set_process(process);
453  sys->set_name(param_name);
454  sys->set_type("rateParam");
455  sys->set_analysis(analysis);
456  sys->set_era(era);
457  sys->set_channel(channel);
458  sys->set_bin_id(bin_id);
459  sys->set_mass(mass);
460  systs_.push_back(sys);
461  }
462  continue;
463  }
464 
465  if (start_nuisance_scan && words[i].size() >= 4 &&
466  boost::iequals(words[i][1], "group")) {
467  if (!groups.count(words[i][0])) {
468  groups[words[i][0]] = std::set<std::string>();
469  }
470  for (unsigned ig = 3; ig < words[i].size(); ++ig) {
471  groups[words[i][0]].insert(words[i][ig]);
472  }
473  continue;
474  }
475 
476  if (start_nuisance_scan && words[i].size() >= 3 &&
477  boost::iequals(words[i][1], "autoMCStats")) {
478  std::vector<std::string> for_bins;
479  if (words[i][0] == "*") {
480  for_bins = Set2Vec(bin_names);
481  } else {
482  for_bins.push_back(words[i][0]);
483  }
484  for (auto const& bin : for_bins) {
485  double thresh = boost::lexical_cast<double>(words[i][2]);
486  if (words[i].size() == 3) {
487  auto_stats_settings_[bin] = AutoMCStatsSettings(thresh);
488  } else if (words[i].size() == 4) {
489  auto_stats_settings_[bin] = AutoMCStatsSettings(thresh, boost::lexical_cast<int>(words[i][3]));
490  } else {
491  auto_stats_settings_[bin] = AutoMCStatsSettings(thresh, boost::lexical_cast<int>(words[i][3]), boost::lexical_cast<int>(words[i][4]));
492  }
493  }
494  }
495 
496  if (start_nuisance_scan && words[i].size()-1 == words[r].size() && !boost::iequals(words[i][1], "autoMCStats")) {
497  for (unsigned p = 2; p < words[i].size(); ++p) {
498  if (words[i][p] == "-") continue;
499  auto sys = std::make_shared<Systematic>();
500  sys->set_bin(words[r-3][p-1]);
501  try {
502  int process_id = boost::lexical_cast<int>(words[r-2][p-1]);
503  sys->set_signal(process_id <= 0);
504  sys->set_process(words[r-1][p-1]);
505  } catch(boost::bad_lexical_cast &) {
506  int process_id = boost::lexical_cast<int>(words[r-1][p-1]);
507  sys->set_signal(process_id <= 0);
508  sys->set_process(words[r-2][p-1]);
509  }
510  sys->set_name(words[i][0]);
511  std::string type = words[i][1];
512  if (!contains(std::vector<std::string>{"shape", "shape?", "shapeN2", "shapeU", "lnN", "lnU"},
513  type)) {
514  throw std::runtime_error(
515  FNERROR("Systematic type " + type + " not supported"));
516  }
517  sys->set_type(words[i][1]);
518  sys->set_analysis(analysis);
519  sys->set_era(era);
520  sys->set_channel(channel);
521  sys->set_bin_id(bin_id);
522  sys->set_mass(mass);
523  sys->set_scale(1.0);
524  std::size_t slash_pos = words[i][p].find("/");
525  if (slash_pos != words[i][p].npos) {
526  // Assume asymmetric of form kDown/kUp
527  sys->set_value_d(
528  boost::lexical_cast<double>(words[i][p].substr(0, slash_pos)));
529  sys->set_value_u(
530  boost::lexical_cast<double>(words[i][p].substr(slash_pos+1)));
531  sys->set_asymm(true);
532  } else {
533  sys->set_value_u(boost::lexical_cast<double>(words[i][p]));
534  sys->set_asymm(false);
535  }
536  if (sys->type() == "shape" || sys->type() == "shapeN2" ||
537  sys->type() == "shapeU") {
538  sys->set_scale(boost::lexical_cast<double>(words[i][p]));
539  LoadShapes(sys.get(), hist_mapping);
540  } else if (sys->type() == "shape?") {
541  // This might fail, so we have to "try"
542  try {
543  LoadShapes(sys.get(), hist_mapping);
544  } catch (std::exception & e) {
545  if (verbosity_ > 0) {
546  LOGLINE(log(), "Systematic with shape? did not resolve to a shape");
547  log() << e.what();
548  }
549  }
550  if (!sys->shape_u() || !sys->shape_d()) {
551  sys->set_type("lnN");
552  } else {
553  sys->set_type("shape");
554  sys->set_scale(boost::lexical_cast<double>(words[i][p]));
555  }
556  }
557  if (sys->type() == "shape" || sys->type() == "shapeN2" ||
558  sys->type() == "shapeU")
559  sys->set_asymm(true);
560 
562  if (sys->type() == "lnU" || sys->type() == "shapeU") {
563  params_.at(sys->name())->set_err_d(0.);
564  params_.at(sys->name())->set_err_u(0.);
565  }
566  systs_.push_back(sys);
567  }
568  }
569  }
570  if (single_obs) {
571  if (bin_names.size() == 1) {
572  single_obs->set_bin(*(bin_names.begin()));
573  LoadShapes(single_obs.get(), hist_mapping);
574  obs_.push_back(single_obs);
575  } else {
576  throw std::runtime_error(FNERROR(
577  "Input card specifies a single observation entry without a bin, but "
578  "multiple bins defined elsewhere"));
579  }
580  }
581 
582  // Finally populate the groups
583  for (auto const& grp : groups) {
584  this->SetGroup(grp.first, ch::Set2Vec(grp.second));
585  }
586  return 0;
587 }
588 
589 void CombineHarvester::WriteDatacard(std::string const& name,
590  std::string const& root_file) {
591  TFile file(root_file.c_str(), "RECREATE");
593  file.Close();
594 }
595 
596 void CombineHarvester::WriteDatacard(std::string const& name) {
597  TFile dummy;
598  CombineHarvester::WriteDatacard(name, dummy);
599  dummy.Close();
600 }
601 
602 void CombineHarvester::FillHistMappings(std::vector<HistMapping> & mappings) {
603  // Build maps of
604  // RooAbsData --> RooWorkspace
605  // RooAbsPdf --> RooWorkspace
606  std::map<RooAbsData const*, RooWorkspace*> data_ws_map;
607  std::map<RooAbsReal const*, RooWorkspace*> pdf_ws_map;
608  for (auto const& iter : wspaces_) {
609  auto dat = iter.second->allData();
610  for (auto d : dat) {
611  data_ws_map[d] = iter.second.get();
612  }
613  RooArgSet vars = iter.second->allPdfs();
614  for (RooAbsArg *v : vars) {
615  RooAbsPdf *y = dynamic_cast<RooAbsPdf*>(v);
616 
617  if (y) pdf_ws_map[iter.second->pdf(y->GetName())] = iter.second.get();
618  }
619  RooArgSet fvars = iter.second->allFunctions();
620  for (RooAbsArg *fv : fvars) {
621  RooAbsReal *y = dynamic_cast<RooAbsReal*>(fv);
622  if (y) pdf_ws_map[iter.second->function(y->GetName())] = iter.second.get();
623  }
624  }
625 
626  // For writing TH1s we will hard code a set of patterns for each bin
627  // This assumes that the backgrounds will not depend on "mass" but the
628  // signal will. Will probably want to change this in the future
629  std::set<std::string> hist_bins;
630  auto bins = this->bin_set();
631  for (auto bin : bins) {
632  unsigned shape_count = std::count_if(procs_.begin(), procs_.end(),
633  [&](std::shared_ptr<ch::Process> p) {
634  return (p->bin() == bin && p->shape() && (!p->signal()));
635  });
636  shape_count += std::count_if(obs_.begin(), obs_.end(),
637  [&](std::shared_ptr<ch::Observation> p) {
638  return (p->bin() == bin && p->shape());
639  });
640  unsigned counting = std::count_if(
641  procs_.begin(), procs_.end(), [&](std::shared_ptr<ch::Process> p) {
642  return (p->bin() == bin && p->shape() == nullptr &&
643  p->pdf() == nullptr && p->data() == nullptr);
644  });
645  counting += std::count_if(
646  obs_.begin(), obs_.end(), [&](std::shared_ptr<ch::Observation> p) {
647  return (p->bin() == bin && p->shape() == nullptr &&
648  p->data() == nullptr);
649  });
650 
651  if (shape_count > 0) {
652  mappings.emplace_back("*", bin, bin + "/$PROCESS",
653  bin + "/$PROCESS_$SYSTEMATIC");
654  hist_bins.insert(bin);
655  } else if (counting > 0) {
656  mappings.emplace_back("*", bin, "", "");
657  mappings.back().is_fake = true;
658  }
659 
660  CombineHarvester ch_signals =
661  std::move(this->cp().bin({bin}).signals().histograms());
662  auto sig_proc_set =
663  ch_signals.SetFromProcs(std::mem_fn(&ch::Process::process));
664  for (auto sig_proc : sig_proc_set) {
665  // should only add this mapping if the signal process has a numeric mass
666  // value, otherwise we will write it using the background rule above
667  auto masses = Set2Vec(ch_signals.cp()
668  .process({sig_proc})
669  .SetFromProcs(std::mem_fn(&ch::Process::mass)));
670  if (masses.size() != 1) {
671  throw std::runtime_error(FNERROR("Process " + sig_proc + " in bin " +
672  bin +
673  " has multiple entries with multiple "
674  "mass values, this is not supported"));
675 
676  }
677  if (is_float(masses[0])) {
678  mappings.emplace_back(sig_proc, bin, bin + "/" + sig_proc + "$MASS",
679  bin + "/" + sig_proc + "$MASS_$SYSTEMATIC");
680  }
681  }
682  }
683 
684  // Generate mappings for RooFit objects
685  for (auto bin : bins) {
686  CombineHarvester ch_bin = std::move(this->cp().bin({bin}));
687  for (auto obs : ch_bin.obs_) {
688  if (!obs->data()) continue;
689  std::string obj_name = std::string(data_ws_map[obs->data()]->GetName()) +
690  ":" + std::string(obs->data()->GetName());
691  mappings.emplace_back("data_obs", obs->bin(), obj_name, "");
692  }
693 
694  bool prototype_ok = false;
695  HistMapping prototype;
696  std::vector<HistMapping> full_list;
697  auto pmap = ch_bin.GenerateProcSystMap();
698  for (unsigned i = 0; i < ch_bin.procs_.size(); ++i) {
699  ch::Process * proc = ch_bin.procs_[i].get();
700  if (!proc->data() && !proc->pdf()) continue;
701  std::string obj_name;
702  std::string obj_sys_name;
703  if (proc->data()) {
704  obj_name = std::string(proc->data()->GetName());
705  boost::replace_all(obj_name, proc->process(), "$PROCESS");
706  obj_name = std::string(data_ws_map[proc->data()]->GetName()) + ":" +
707  obj_name;
708  }
709  if (proc->pdf()) {
710  obj_name = std::string(proc->pdf()->GetName());
711  boost::replace_all(obj_name, proc->process(), "$PROCESS");
712  obj_name = std::string(pdf_ws_map[proc->pdf()]->GetName()) +
713  ":" + obj_name;
714  }
715  for (unsigned j = 0; j < pmap[i].size(); ++j) {
716  ch::Systematic const* sys = pmap[i][j];
717  if (sys->data_u()) {
718  obj_sys_name = std::string(sys->data_u()->GetName());
719  boost::replace_all(obj_sys_name, sys->name() + "Up", "$SYSTEMATIC");
720  boost::replace_all(obj_sys_name, sys->process(), "$PROCESS");
721  obj_sys_name = std::string(data_ws_map[sys->data_u()]->GetName()) +
722  ":" + obj_sys_name;
723  break;
724  }
725  if (sys->pdf_u()) {
726  obj_sys_name = std::string(sys->pdf_u()->GetName());
727  boost::replace_all(obj_sys_name, sys->name() + "Up", "$SYSTEMATIC");
728  boost::replace_all(obj_sys_name, sys->process(), "$PROCESS");
729  obj_sys_name = std::string(pdf_ws_map[sys->pdf_u()]->GetName()) +
730  ":" + obj_sys_name;
731  break;
732  }
733  }
734 
735  // If the prototype pattern is already filled, but doesn't equal this
736  // new pattern - then we can't use the prototype
737  if (prototype.pattern.size() && prototype.pattern != obj_name) {
738  prototype_ok = false;
739  }
740 
741  if (prototype.syst_pattern.size() && obj_sys_name.size() &&
742  prototype.syst_pattern != obj_sys_name) {
743  prototype_ok = false;
744  }
745 
746  if (!prototype.pattern.size()) {
747  prototype_ok = true;
748  prototype.process = "*";
749  prototype.category = bin;
750  prototype.pattern = obj_name;
751  }
752  if (!prototype.syst_pattern.size()) {
753  prototype.syst_pattern = obj_sys_name;
754  }
755 
756  full_list.emplace_back(proc->process(), proc->bin(), obj_name,
757  obj_sys_name);
758  }
759  // There are two reasons we won't want to write a generic mapping
760  // for the processes in this bin:
761  // 1) One or more processes is not described by the prototype mapping
762  // 2) We also have a generic histogram mapping for this bin
763  if (!prototype_ok || hist_bins.count(bin)) {
764  for (auto m : full_list) {
765  mappings.push_back(m);
766  }
767  } else {
768  mappings.push_back(prototype);
769  }
770  }
771 }
772 
773 
774 void CombineHarvester::WriteDatacard(std::string const& name,
775  TFile& root_file) {
776  using boost::format;
777 
778  // First figure out if this is a counting-experiment only
779  bool is_counting = true;
780  this->ForEachObs([&](ch::Observation *obs) {
781  if (obs->shape() != nullptr || obs->data() != nullptr) {
782  is_counting = false;
783  }
784  });
785  if (is_counting) {
786  this->ForEachProc([&](ch::Process *proc) {
787  if (proc->shape() != nullptr || proc->data() != nullptr ||
788  proc->pdf() != nullptr) {
789  is_counting = false;
790  }
791  });
792  }
793 
794  // Allow a non-open ROOT file if this is purely a counting experiment
795  if (!root_file.IsOpen() && !is_counting) {
796  throw std::runtime_error(FNERROR(
797  std::string("Output ROOT file is not open: ") + root_file.GetName()));
798  }
799 
800  std::unique_ptr<std::ostream> txt_file_ptr = nullptr;
801 
802  // Figure out if the datacard name ends with ".gz"
803  std::string zip_ext = ".gz";
804  bool has_zip_ext = (name.length() >= zip_ext.length() && name.compare(name.length() - zip_ext.length(), zip_ext.length(), zip_ext) == 0);
805 
806  if (has_zip_ext) {
807  txt_file_ptr = std::make_unique<zstr::ofstream>(name);
808  } else {
809  txt_file_ptr = std::make_unique<std::ofstream>(name);
810  }
811  if (txt_file_ptr->fail()) {
812  throw std::runtime_error(FNERROR("Unable to create file: " + name));
813  }
814 
815  std::ostream & txt_file = *txt_file_ptr;
816 
817  //txt_file << "# Datacard produced by CombineHarvester with git status: "
818  // << ch::GitVersion() << "\n";
819 
820  std::string dashes(80, '-');
821 
822  auto bin_set = this->SetFromProcs(std::mem_fn(&ch::Process::bin));
823  auto proc_set = this->SetFromProcs(std::mem_fn(&ch::Process::process));
824  std::set<std::string> sys_set;
825  std::set<std::string> param_set;
826  std::set<std::string> rateparam_set;
827  this->ForEachSyst([&](ch::Systematic const* sys) {
828  if (sys->type() == "rateParam") {
829  rateparam_set.insert(sys->name());
830  }
831  else if (sys->type() == "param"){
832  param_set.insert(sys->name());
833  }
834  else sys_set.insert(sys->name());
835  });
836  txt_file << "imax " << bin_set.size()
837  << " number of bins\n";
838  txt_file << "jmax " << proc_set.size() - 1
839  << " number of processes minus 1\n";
840  txt_file << "kmax " << "*" << " number of nuisance parameters\n";
841  txt_file << dashes << "\n";
842 
843 
844  std::vector<HistMapping> mappings;
845  FillHistMappings(mappings);
846 
847  auto bins = this->SetFromObs(std::mem_fn(&ch::Observation::bin));
848 
849  auto proc_sys_map = this->GenerateProcSystMap();
850 
851  std::set<std::string> all_dependents_pars;
852  std::set<std::string> multipdf_cats;
853  for (auto proc : procs_) {
854  if (!proc->pdf()) continue;
855  // The rest of this is building the list of dependents
859  RooAbsData const* data_obj = FindMatchingData(proc.get());
860  RooArgSet par_list;
861  RooArgSet norm_par_list;
862 
863  if (data_obj) {
864  par_list.add(ParametersByName(proc->pdf(), data_obj->get()));
865  if (proc->norm()) {
866  norm_par_list.add(ParametersByName(proc->norm(), data_obj->get()));
867  }
868  } else {
869  RooRealVar mx("CMS_th1x" , "CMS_th1x", 0, 1);
870  RooArgSet tmp_set(mx);
871  par_list.add(ParametersByName(proc->pdf(), &tmp_set));
872  if (proc->norm()) {
873  norm_par_list.add(ParametersByName(proc->norm(), &tmp_set));
874  }
875  }
876  for (RooAbsArg *par_list_var : par_list) {
877  if (dynamic_cast<RooRealVar*>(par_list_var)) {
878  all_dependents_pars.insert(par_list_var->GetName());
879  }
880  // If this pdf has a RooCategory parameter then it's probably a
881  // RooMultiPdf. We'll need to write a special line in the datacard to
882  // indicate that this is a discrete pdf index. However it's best to be
883  // sure, so we'll use ROOT's string-based RTTI to check the type without
884  // having to link against HiggsAnalysis/CombinedLimit
885  if (dynamic_cast<RooCategory*>(par_list_var) &&
886  proc->pdf()->InheritsFrom("RooMultiPdf")) {
887  multipdf_cats.insert(par_list_var->GetName());
888  }
889 
890 }
891  if (proc->norm()) {
892  for (RooAbsArg *nm_list_var: norm_par_list) {
893  if (dynamic_cast<RooRealVar*>(nm_list_var)) {
894  all_dependents_pars.insert(nm_list_var->GetName());
895  }
896  }
897  }
898  }
899 
900  // The ROOT file mapping should be given as a relative path
901  std::string file_name = root_file.GetName();
902  // Get the full path to the output root file
903  // NOTE: was using canonical here instead of absolute, but not
904  // supported in boost 1.47
905  boost::filesystem::path root_file_path =
906  boost::filesystem::absolute(file_name);
907  // Get the full path to the directory containing the txt file
908  boost::filesystem::path txt_file_path =
909  boost::filesystem::absolute(name).parent_path();
910  // Compute the relative path from the txt file to the root file
911  file_name = make_relative(txt_file_path, root_file_path).string();
912 
913  // Keep track of the workspaces we actually need to write into the
914  // output file
915  std::set<std::string> used_wsps;
916 
917  for (auto const& mapping : mappings) {
918  if (!mapping.is_fake) {
919  txt_file << format("shapes %s %s %s %s %s\n")
920  % mapping.process
921  % mapping.category
922  % file_name
923  % mapping.pattern
924  % mapping.syst_pattern;
925  if (mapping.IsPdf() || mapping.IsData()) {
926  used_wsps.insert(mapping.WorkspaceName());
927  if (mapping.syst_pattern != "") {
928  used_wsps.insert(mapping.SystWorkspaceName());
929  }
930  }
931  } else {
932  txt_file << format("shapes %s %s %s\n")
933  % mapping.process
934  % mapping.category
935  % "FAKE";
936  }
937  }
938  txt_file << dashes << "\n";
939 
940  if (!is_counting) {
941  for (auto ws_it : wspaces_) {
942  if (ws_it.first == "_rateParams") continue; // don't write this one
943  // Also skip any workspace that isn't needed for this card
944  if (!used_wsps.count(ws_it.second->GetName())) continue;
945  ch::WriteToTFile(ws_it.second.get(), &root_file, ws_it.second->GetName());
946  }
947  }
948 
949  // Writing observations
950  if (obs_.size() > 0) {
951  txt_file << "bin ";
952  for (auto const& obs : obs_) {
953  txt_file << format("%-15s ") % obs->bin();
954  if (obs->shape()) {
955  bool add_dir = TH1::AddDirectoryStatus();
956  TH1::AddDirectory(false);
957  std::unique_ptr<TH1> h((TH1*)(obs->shape()->Clone()));
958  h->Scale(obs->rate());
959  WriteHistToFile(h.get(), &root_file, mappings, obs->bin(), "data_obs",
960  obs->mass(), "", 0);
961  TH1::AddDirectory(add_dir);
962  }
963  }
964  txt_file << "\n";
965  txt_file << "observation ";
966  // On the precision of the observation yields: .1f is not sufficient for
967  // combine to be happy if we have some asimov dataset with non-integer values.
968  // We could just always give .4f but this doesn't look nice for the majority
969  // of cards that have real data. Instead we'll check...
970  std::string obs_fmt_int = "%-15.1f ";
971  std::string obs_fmt_flt = "%-15.4f ";
972  for (auto const& obs : obs_) {
973  bool is_float =
974  std::fabs(obs->rate() - std::round(obs->rate())) > 1E-4;
975  txt_file << format(is_float ? obs_fmt_flt : obs_fmt_int)
976  % obs->rate();
977  }
978  txt_file << "\n";
979  txt_file << dashes << "\n";
980  }
981 
982  unsigned sys_str_len = 14;
983  for (auto const& sys : sys_set) {
984  if (sys.length() > sys_str_len) sys_str_len = sys.length();
985  }
986  for (auto const& sys : all_dependents_pars) {
987  if (sys.length() > sys_str_len) sys_str_len = sys.length();
988  }
989  std::string sys_str_short = boost::lexical_cast<std::string>(sys_str_len);
990  std::string sys_str_long = boost::lexical_cast<std::string>(sys_str_len+9);
991 
992  auto getProcLen = [](auto const& proc) -> std::string {
993  std::size_t proc_len = 15;
994  proc_len = std::max(proc_len, proc->process().size());
995  std::string proc_len_str = boost::lexical_cast<std::string>(proc_len);
996  return proc_len_str;
997  };
998 
999  txt_file << format("%-"+sys_str_long+"s") % "bin";
1000  for (auto const& proc : procs_) {
1001  if (proc->shape()) {
1002  bool add_dir = TH1::AddDirectoryStatus();
1003  TH1::AddDirectory(false);
1004  std::unique_ptr<TH1> h = proc->ClonedScaledShape();
1005  WriteHistToFile(h.get(), &root_file, mappings, proc->bin(),
1006  proc->process(), proc->mass(), "", 0);
1007  TH1::AddDirectory(add_dir);
1008  }
1009  txt_file << format("%-"+getProcLen(proc)+"s ") % proc->bin();
1010  }
1011  txt_file << "\n";
1012 
1013  txt_file << format("%-"+sys_str_long+"s") % "process";
1014 
1015  for (auto const& proc : procs_) {
1016  txt_file << format("%-"+getProcLen(proc)+"s ") % proc->process();
1017  }
1018  txt_file << "\n";
1019 
1020  txt_file << format("%-"+sys_str_long+"s") % "process";
1021 
1022  // Setup process_ids first
1023  std::map<std::string, int> p_ids;
1024  unsigned sig_id = 0;
1025  unsigned bkg_id = 1;
1026  for (unsigned p = 0; p < procs_.size(); ++p) {
1027  if (!procs_[p]->signal()) {
1028  if (p_ids.count(procs_[p]->process()) == 0) {
1029  p_ids[procs_[p]->process()] = bkg_id;
1030  ++bkg_id;
1031  }
1032  else {
1033  // check if process was already picked up as signal
1034  if (p_ids[procs_[p]->process()] <= 0) throw std::runtime_error(FNERROR("Ambiguous definition of process (" + procs_[p]->process() + ") as both signal and background"));
1035  }
1036  }
1037  unsigned q = procs_.size() - (p + 1);
1038  if (procs_[q]->signal()) {
1039  if (p_ids.count(procs_[q]->process()) == 0) {
1040  p_ids[procs_[q]->process()] = sig_id;
1041  --sig_id;
1042  }
1043  else {
1044  // check if process was already picked up as background
1045  if (p_ids[procs_[q]->process()] > 0) throw std::runtime_error(FNERROR("Ambiguous definition of process (" + procs_[q]->process() + ") as both signal and background"));
1046  }
1047  }
1048  }
1049  for (auto const& proc : procs_) {
1050  txt_file << format("%-"+getProcLen(proc)+"s ") % p_ids[proc->process()];
1051  }
1052  txt_file << "\n";
1053 
1054 
1055  txt_file << format("%-"+sys_str_long+"s") % "rate";
1056  for (auto const& proc : procs_) {
1057  txt_file << format("%-"+getProcLen(proc)+".6g ") % proc->no_norm_rate();
1058  }
1059  txt_file << "\n";
1060  txt_file << dashes << "\n";
1061 
1062  // Need to write parameters here that feature both in the list of pdf
1063  // dependents and sys_set.
1064  for (auto par : params_) {
1065  Parameter const* p = par.second.get();
1066  if (p->err_d() != 0.0 && p->err_u() != 0.0 &&
1067  all_dependents_pars.count(p->name()) && sys_set.count(p->name())) {
1068  txt_file << format((format("%%-%is param %%g %%g") % sys_str_len).str()) %
1069  p->name() % p->val() % ((p->err_u() - p->err_d()) / 2.0);
1070  if (p->range_d() != std::numeric_limits<double>::lowest() &&
1071  p->range_u() != std::numeric_limits<double>::max()) {
1072  txt_file << format(" [%.4g,%.4g]") % p->range_d() % p->range_u();
1073  }
1074  txt_file << "\n";
1075  }
1076  }
1077 
1078  // Write parameters here that feature in param set but not in the syst set
1079  for (auto par : params_) {
1080  Parameter const* p = par.second.get();
1081  if (p->err_d() != 0.0 && p->err_u() != 0.0 &&
1082  param_set.count(p->name()) && !sys_set.count(p->name())) {
1083  txt_file << format((format("%%-%is param %%g %%g") % sys_str_len).str()) %
1084  p->name() % p->val() % ((p->err_u() - p->err_d()) / 2.0);
1085  if (p->range_d() != std::numeric_limits<double>::lowest() &&
1086  p->range_u() != std::numeric_limits<double>::max()) {
1087  txt_file << format(" [%.4g,%.4g]") % p->range_d() % p->range_u();
1088  }
1089  txt_file << "\n";
1090  }
1091  }
1092 
1093  // Write members of syst collection here that feature in both param set and rateParam set (constraint on rateParam term)
1094  for (auto syst : systs_) {
1095  if (syst->type() == "param" && param_set.count(syst->name()) && rateparam_set.count(syst->name())) {
1096  txt_file << format((format("%%-%is param %%s") % sys_str_len).str()) %
1097  syst->name() % syst->param_str_ext();
1098  txt_file << "\n";
1099  }
1100  }
1101 
1102  for (auto const& sys : sys_set) {
1103  std::vector<std::string> line(procs_.size() + 2);
1104  line[0] = sys;
1105  bool seen_lnN = false;
1106  bool seen_lnU = false;
1107  bool seen_shape = false;
1108  bool seen_shapeN2 = false;
1109  bool seen_shapeU = false;
1110  if (param_set.count(sys)) continue;
1111  for (unsigned p = 0; p < procs_.size(); ++p) {
1112  line[p + 2] = "-";
1113  for (unsigned n = 0; n < proc_sys_map[p].size(); ++n) {
1114  ch::Systematic const* ptr = proc_sys_map[p][n];
1115  // Only interested into Systematics with name "sys"
1116  if (ptr->name() != sys) continue;
1117  std::string tp = ptr->type();
1118  if (tp == "lnN" || tp == "lnU") {
1119  if (tp == "lnN") seen_lnN = true;
1120  if (tp == "lnU") seen_lnU = true;
1121  line[p + 2] =
1122  ptr->asymm()
1123  ? (format("%g/%g") % ptr->value_d() % ptr->value_u()).str()
1124  : (format("%g") % ptr->value_u()).str();
1125  break;
1126  }
1127  if (tp == "shape" || tp == "shapeN2" || tp == "shapeU") {
1128  if (tp == "shape") seen_shape = true;
1129  if (tp == "shapeN2") seen_shapeN2 = true;
1130  if (tp == "shapeU") seen_shapeU = true;
1131  line[p + 2] = (format("%g") % ptr->scale()).str();
1132  if (ptr->shape_u() && ptr->shape_d()) {
1133  bool add_dir = TH1::AddDirectoryStatus();
1134  TH1::AddDirectory(false);
1135  std::unique_ptr<TH1> h_d = ptr->ClonedShapeD();
1136  h_d->Scale(procs_[p]->rate() * ptr->value_d());
1137  WriteHistToFile(h_d.get(), &root_file, mappings, ptr->bin(),
1138  ptr->process(), ptr->mass(), ptr->name(), 1);
1139  std::unique_ptr<TH1> h_u = ptr->ClonedShapeU();
1140  h_u->Scale(procs_[p]->rate() * ptr->value_u());
1141  WriteHistToFile(h_u.get(), &root_file, mappings, ptr->bin(),
1142  ptr->process(), ptr->mass(), ptr->name(), 2);
1143  TH1::AddDirectory(add_dir);
1144  break;
1145  } else if ( (ptr->data_u() && ptr->data_d()) || (ptr->pdf_u() && ptr->pdf_d()) ) {
1146  } else {
1147  if (!flags_.at("allow-missing-shapes")) {
1148  std::stringstream err;
1149  err << "Trying to write shape uncertainty with missing "
1150  "shapes:\n";
1151  err << Systematic::PrintHeader << *ptr;
1152  throw std::runtime_error(FNERROR(err.str()));
1153  }
1154  }
1155  }
1156  }
1157  }
1158  if (seen_shapeN2) {
1159  line[1] = "shapeN2";
1160  } else if (seen_shapeU) {
1161  line[1] = "shapeU";
1162  } else if (seen_lnU) {
1163  line[1] = "lnU";
1164  } else if (seen_lnN && !seen_shape) {
1165  line[1] = "lnN";
1166  } else if (!seen_lnN && seen_shape) {
1167  line[1] = "shape";
1168  } else if (seen_lnN && seen_shape) {
1169  line[1] = "shape?";
1170  } else {
1171  throw std::runtime_error(FNERROR("Systematic type could not be deduced"));
1172  }
1173  txt_file << format("%-" + sys_str_short + "s %-7s ") % line[0] % line[1];
1174  for (unsigned p = 0; p < procs_.size(); ++p) {
1175  txt_file << format("%-"+getProcLen(procs_[p])+"s ") % line[p + 2];
1176  }
1177  txt_file << "\n";
1178  }
1179  // write param line for any parameter which has a non-zero error
1180  // and which doesn't appear in list of nuisances
1181  CombineHarvester ch_rp = this->cp().syst_type({"rateParam"});
1182  // These are all the processes with a rateParam. We'll look at each one and
1183  // see if every bin with this process also has this rateParam.
1184  std::vector<std::vector<std::string> > floating_params;
1185  for (auto const& sys : rateparam_set) {
1186  FNLOGC(log(), verbosity_ > 1) << "Doing rateParam: " << sys << "\n";
1187  auto procs_rp = ch_rp.cp().syst_name({sys}).SetFromSysts(
1188  std::mem_fn(&ch::Systematic::process));
1189  FNLOGC(log(), verbosity_ > 1) << "Procs for this rateParam: \n";
1190  for (auto const& proc : procs_rp) {
1191  FNLOGC(log(), verbosity_ > 1) << " - " << proc << "\n";
1192  // Get the set of bins
1193  auto bins_rp = ch_rp.cp().process({proc}).syst_name({sys}).SetFromSysts(
1194  std::mem_fn(&ch::Systematic::bin));
1195  auto bins_all = this->cp().process({proc}).bin_set();
1196  if (bins_rp.size() > 0 && bins_rp.size() == bins_all.size()) {
1197  floating_params.push_back({sys, "*", proc});
1198  } else {
1199  for (auto const& bin : bins_rp) {
1200  floating_params.push_back({sys, bin, proc});
1201  }
1202  }
1203  FNLOGC(log(), verbosity_ > 1)
1204  << bins_rp.size() << "/" << bins_all.size()
1205  << " bins with this process have a rateParam\n";
1206  }
1207  }
1208  std::set<std::string> frozen_params;
1209  for (auto const& rp : floating_params) {
1210  if (params_.count(rp[0])) {
1211  ch::Parameter const* par = params_.at(rp[0]).get();
1212  txt_file << format("%-" + sys_str_short + "s %-10s %-10s %-10s %g") %
1213  rp[0] % "rateParam" % rp[1] % rp[2] % par->val();
1214  // Check if we have to set the range too
1215  // TODO: should package this into some function in ch::Parameter
1216  if (par->range_d() != std::numeric_limits<double>::lowest() &&
1217  par->range_u() != std::numeric_limits<double>::max()) {
1218  txt_file << format(" [%.4g,%.4g]") % par->range_d() % par->range_u();
1219  }
1220  txt_file << "\n";
1221  if (par->frozen()) {
1222  frozen_params.insert(rp[0]);
1223  }
1224  }
1225  }
1226  for (auto const& par : frozen_params) {
1227  txt_file << "nuisance edit freeze " << par << "\n";
1228  }
1229  std::set<std::string> all_fn_param_args;
1230  for (auto const& rp : floating_params) {
1231  if (!params_.count(rp[0])) {
1232  // If we don't have a ch::Parameter with this name, then we'll assume
1233  // this is a function
1234  RooWorkspace *rp_ws = wspaces_.at("_rateParams").get();
1235  RooAbsArg *arg = rp_ws->arg(rp[0].c_str());
1236  if (arg->getStringAttribute("wspSource")) {
1237  txt_file << format("%-" + sys_str_short +
1238  "s %-10s %-10s %-10s %-20s\n") %
1239  rp[0] % "rateParam" % rp[1] % rp[2] % std::string(arg->getStringAttribute("wspSource"));
1240  continue;
1241  }
1242  RooFormulaVar* form =
1243  dynamic_cast<RooFormulaVar*>(arg);
1244  // Want to extract the formula string, apparently "printMetaArgs" is the
1245  // only way to do it
1246  std::stringstream ss;
1247  form->printMetaArgs(ss);
1248  std::string form_str = ss.str();
1249  // The string will be like 'formula="blah"', strip everything on either
1250  // side of the double quotes (including the quotes themselves)
1251  std::size_t first = form_str.find_first_of('"');
1252  std::size_t last = form_str.find_last_of('"');
1253  form_str = form_str.substr(first + 1, last - first - 1);
1254  // unique_ptr because we own the RooArgSet that is generated
1255  unsigned npars =
1256  std::unique_ptr<RooArgSet>(form->getParameters(RooArgList()))->getSize();
1257  std::string args = "";
1258  for (unsigned i = 0; i < npars; ++i) {
1259  all_fn_param_args.insert(std::string(form->getParameter(i)->GetName()));
1260  args += std::string(form->getParameter(i)->GetName());
1261  if (i < (npars-1)) {
1262  args += ",";
1263  }
1264  }
1265  txt_file << format("%-" + sys_str_short +
1266  "s %-10s %-10s %-10s %-20s %s\n") %
1267  rp[0] % "rateParam" % rp[1] % rp[2] % form_str % args;
1268  }
1269  }
1270 
1271  if (wspaces_.count("_rateParams")) {
1272  RooWorkspace *rp_ws = wspaces_.at("_rateParams").get();
1273  RooArgSet vars = rp_ws->allVars();
1274  for (RooAbsArg *v : vars) {
1275  RooRealVar *y = dynamic_cast<RooRealVar*>(v);
1276  if (y && y->getAttribute("extArg") && all_fn_param_args.count(std::string(y->GetName()))) {
1277  if (!params_.count(y->GetName()) && y->getStringAttribute("wspSource")) {
1278  std::vector<std::string> tokens;
1279  boost::split(tokens, y->getStringAttribute("wspSource"), boost::is_any_of(":"));
1280  std::string wsp_name_extArg = tokens[1];
1281  y->setStringAttribute("wspSource", (std::string(root_file.GetName()) + ":" + tokens[1]).c_str());
1282  txt_file << format("%-" + sys_str_short +
1283  "s %-10s %-20s\n") %
1284  y->GetName() % "extArg" % std::string(y->getStringAttribute("wspSource"));
1285 
1286  }
1287  else{
1288  Parameter const* p = params_.at(y->GetName()).get();
1289  txt_file << format("%-" + sys_str_short + "s %-10s %g") %
1290  y->GetName() % "extArg" % p->val();
1291  if (p->range_d() != std::numeric_limits<double>::lowest() &&
1292  p->range_u() != std::numeric_limits<double>::max()) {
1293  txt_file << format(" [%.4g,%.4g]") % p->range_d() % p->range_u();
1294  }
1295  txt_file << "\n";
1296  }
1297 
1298  }
1299 
1300  }
1301  RooArgSet funcs = rp_ws->allFunctions();
1302  for (RooAbsArg *v : funcs){
1303  RooAbsReal *y = dynamic_cast<RooAbsReal*>(v);
1304  if (y && y->getAttribute("extArg") && y->getStringAttribute("wspSource") && all_fn_param_args.count(std::string(y->GetName()))) {
1305  txt_file << format("%-" + sys_str_short +
1306  "s %-10s %-20s\n") %
1307  y->GetName() % "extArg" % std::string(y->getStringAttribute("wspSource"));
1308  continue;
1309 
1310  }
1311  }
1312  }
1313 
1314  std::set<std::string> ws_vars;
1315  for (auto iter : wspaces_) {
1316  RooArgSet vars = iter.second->allVars();
1317  for (RooAbsArg *v : vars) {
1318  RooRealVar *y = dynamic_cast<RooRealVar*>(v);
1319  if (y) ws_vars.insert(y->GetName());
1320  }
1321  }
1322 
1323  // How to check for params we need to write:
1324  // Get a list of all pdf dependents for the processes we've just written
1325  // Also need a list of all Systematic entries we've just written
1326  // Then we write param if:
1327  // - it has non-zero errors
1328  // - it appears in the first list
1329  // - it doesn't appear in the second list
1330  // - it doesn't appear in the third list
1331  for (auto par : params_) {
1332  Parameter const* p = par.second.get();
1333  if (p->err_d() != 0.0 && p->err_u() != 0.0 &&
1334  all_dependents_pars.count(p->name()) && !sys_set.count(p->name()) && !param_set.count(p->name())) {
1335  txt_file << format((format("%%-%is param %%g %%g") % sys_str_len).str()) %
1336  p->name() % p->val() % ((p->err_u() - p->err_d()) / 2.0);
1337  if (p->range_d() != std::numeric_limits<double>::lowest() &&
1338  p->range_u() != std::numeric_limits<double>::max()) {
1339  txt_file << format(" [%.4g,%.4g]") % p->range_d() % p->range_u();
1340  }
1341  txt_file << "\n";
1342  }
1343  }
1344 
1345  for (auto par : multipdf_cats) {
1346  txt_file << format((format("%%-%is discrete\n") % sys_str_len).str()) % par;
1347  }
1348 
1349  for (auto stat_settings : auto_stats_settings_) {
1350  if (!bin_set.count(stat_settings.first)) continue;
1351  txt_file << format("%s autoMCStats %g %i %i\n") % stat_settings.first %
1352  stat_settings.second.event_threshold %
1353  stat_settings.second.include_signal %
1354  stat_settings.second.hist_mode;
1355  }
1356 
1357  // Finally write the NP groups
1358  // We should only consider parameters are either:
1359  // - in the list of pdf dependents
1360  // - in the list of systematics
1361  std::map<std::string, std::set<std::string>> groups;
1362  for (auto const& par : params_) {
1363  Parameter * p = par.second.get();
1364  if (!all_dependents_pars.count(p->name()) && !sys_set.count(p->name())) {
1365  continue;
1366  }
1367  if (p->err_d() == 0.0 && p->err_u() == 0.0) continue;
1368  for (auto const& str : p->groups()) {
1369  if (!groups.count(str)) {
1370  groups[str] = std::set<std::string>();
1371  }
1372  groups[str].insert(p->name());
1373  }
1374  }
1375  for (auto const& gr : groups) {
1376  txt_file << format("%s group =") % gr.first;
1377  for (auto const& np : gr.second) {
1378  txt_file << " " << np;
1379  }
1380  txt_file << "\n";
1381  }
1382 
1383  for (auto const& postl : post_lines_) {
1384  txt_file << postl << "\n";
1385  }
1386 }
1387 
1388 void CombineHarvester::WriteHistToFile(
1389  TH1 * hist,
1390  TFile * file,
1391  std::vector<HistMapping> const& mappings,
1392  std::string const& bin,
1393  std::string const& process,
1394  std::string const& mass,
1395  std::string const& nuisance,
1396  unsigned type) {
1397  StrPairVec attempts = this->GenerateShapeMapAttempts(process, bin);
1398  for (unsigned a = 0; a < attempts.size(); ++a) {
1399  for (unsigned m = 0; m < mappings.size(); ++m) {
1400  if ((attempts[a].first == mappings[m].process) &&
1401  (attempts[a].second == mappings[m].category)) {
1402  std::string p = (type == 0 ?
1403  mappings[m].pattern : mappings[m].syst_pattern);
1404  boost::replace_all(p, "$CHANNEL", bin);
1405  boost::replace_all(p, "$PROCESS", process);
1406  boost::replace_all(p, "$MASS", mass);
1407  if (type == 1) boost::replace_all(p, "$SYSTEMATIC", nuisance+"Down");
1408  if (type == 2) boost::replace_all(p, "$SYSTEMATIC", nuisance+"Up");
1409  WriteToTFile(hist, file, p);
1410  return;
1411  }
1412  }
1413  }
1414 }
1415 }
#define FNLOGC(x, y)
Definition: Logging.h:14
#define LOGLINE(x, y)
Definition: Logging.h:11
#define FNERROR(x)
Definition: Logging.h:9
#define FNLOG(x)
Definition: Logging.h:13
CombineHarvester & signals()
CombineHarvester & mass(std::vector< std::string > const &vec, bool cond=true)
void CreateParameterIfEmpty(std::string const &name)
CombineHarvester & process(std::vector< std::string > const &vec, bool cond=true)
void ForEachSyst(Function func)
void ForEachProc(Function func)
std::set< R > SetFromObs(T func)
Fill an std::set using only the Observation entries.
void SetGroup(std::string const &name, std::vector< std::string > const &patterns)
Add parameters to a given group.
void WriteDatacard(std::string const &name, std::string const &root_file)
CombineHarvester & analysis(std::vector< std::string > const &vec, bool cond=true)
std::set< R > SetFromSysts(T func)
Fill an std::set using only the Systematic entries.
std::set< R > SetFromProcs(T func)
Fill an std::set using only the Process entries.
CombineHarvester & bin(std::vector< std::string > const &vec, bool cond=true)
std::set< std::string > bin_set()
CombineHarvester & syst_name(std::vector< std::string > const &vec, bool cond=true)
void ForEachObs(Function func)
bool GetFlag(std::string const &flag) const
int ParseDatacard(std::string const &filename, std::string const &analysis, std::string const &era, std::string const &channel, int bin_id, std::string const &mass)
CombineHarvester & histograms()
CombineHarvester cp()
Creates and returns a shallow copy of the CombineHarvester instance.
CombineHarvester & syst_type(std::vector< std::string > const &vec, bool cond=true)
virtual std::string const & process() const
Definition: Object.h:20
virtual std::string const & bin() const
Definition: Object.h:17
virtual std::string const & mass() const
Definition: Object.h:38
TH1 const * shape() const
Definition: Observation.h:27
RooAbsData const * data() const
Definition: Observation.h:35
double rate() const
Definition: Observation.h:21
double err_u() const
Definition: Parameter.h:34
bool frozen() const
Definition: Parameter.h:51
std::set< std::string > & groups()
Definition: Parameter.h:54
void set_err_u(double const &err_u)
Definition: Parameter.h:33
void set_err_d(double const &err_d)
Definition: Parameter.h:36
double err_d() const
Definition: Parameter.h:37
void set_range_u(double const &range_u)
Definition: Parameter.h:39
void set_val(double const &val)
Definition: Parameter.h:23
std::string const & name() const
Definition: Parameter.h:21
void set_range_d(double const &range_d)
Definition: Parameter.h:42
double range_d() const
Definition: Parameter.h:43
double range_u() const
Definition: Parameter.h:40
double val() const
Definition: Parameter.h:31
double no_norm_rate() const
Get the process normalisation without multiplying by the RooAbsReal value (in the case that it's pres...
Definition: Process.h:46
RooAbsReal const * pdf() const
Definition: Process.h:60
std::unique_ptr< TH1 > ClonedScaledShape() const
Definition: Process.cc:117
RooAbsReal const * norm() const
Definition: Process.h:66
RooAbsData const * data() const
Definition: Process.h:63
TH1 const * shape() const
Definition: Process.h:52
double value_u() const
Definition: Systematic.h:28
RooDataHist const * data_d() const
Definition: Systematic.h:51
std::unique_ptr< TH1 > ClonedShapeU() const
Definition: Systematic.cc:200
std::string const & type() const
Definition: Systematic.h:25
std::unique_ptr< TH1 > ClonedShapeD() const
Definition: Systematic.cc:207
bool asymm() const
Definition: Systematic.h:37
TH1 const * shape_d() const
Definition: Systematic.h:47
TH1 const * shape_u() const
Definition: Systematic.h:39
std::string const & name() const
Definition: Systematic.h:22
RooAbsReal const * pdf_d() const
Definition: Systematic.h:55
RooAbsReal const * pdf_u() const
Definition: Systematic.h:53
RooDataHist const * data_u() const
Definition: Systematic.h:49
double value_d() const
Definition: Systematic.h:31
double scale() const
Definition: Systematic.h:34
static std::ostream & PrintHeader(std::ostream &out)
Definition: Systematic.cc:251
Definition: Algorithm.h:10
void WriteToTFile(T *ptr, TFile *file, std::string const &path)
Definition: TFileIO.h:31
std::vector< std::string > ParseFileLines(std::string const &file_name)
Definition: Utilities.cc:224
boost::filesystem::path make_relative(boost::filesystem::path p_from, boost::filesystem::path p_to)
Determine the relative path from one file to another.
Definition: Utilities.cc:313
std::vector< T > Set2Vec(std::set< T > const &in)
Definition: Utilities.h:101
bool contains(const Range &r, T p)
Definition: Algorithm.h:22
bool is_float(std::string const &str)
Definition: Utilities.cc:241
RooArgSet ParametersByName(RooAbsReal const *pdf, RooArgSet const *dat_vars)
Definition: Utilities.cc:18
std::shared_ptr< TFile > file
Definition: HistMapping.h:15
std::string category
Definition: HistMapping.h:12
std::string SystWorkspaceName() const
Definition: HistMapping.cc:53
std::string pattern
Definition: HistMapping.h:13
std::string process
Definition: HistMapping.h:11
std::string syst_pattern
Definition: HistMapping.h:14
bool IsPdf() const
Definition: HistMapping.cc:21
std::shared_ptr< RooWorkspace > ws
Definition: HistMapping.h:16
std::shared_ptr< RooWorkspace > sys_ws
Definition: HistMapping.h:17
std::string WorkspaceName() const
Definition: HistMapping.cc:37