CombineHarvester
CombineHarvester_Datacards.cc
Go to the documentation of this file.
2 #include <cmath>
3 #include <vector>
4 #include <map>
5 #include <string>
6 #include <iostream>
7 #include <utility>
8 #include <set>
9 #include <fstream>
10 #include <sstream>
11 #include <fnmatch.h>
12 #include "boost/lexical_cast.hpp"
13 #include "boost/algorithm/string.hpp"
14 #include "boost/format.hpp"
15 #include "boost/regex.hpp"
16 #include "boost/filesystem.hpp"
17 #include "TDirectory.h"
18 #include "TH1.h"
19 #include "RooRealVar.h"
20 #include "RooFormulaVar.h"
21 #include "RooCategory.h"
22 
31 #include "CombineHarvester/CombineTools/interface/zstr.hpp"
32 namespace ch {
33 
34 // Extract info from filename using parse rule like:
35 // ".*{MASS}/{ANALYSIS}_{CHANNEL}_{BINID}_{ERA}.txt"
36 int CombineHarvester::ParseDatacard(std::string const& filename,
37  std::string parse_rules) {
38  boost::replace_all(parse_rules, "$ANALYSIS", "(?<ANALYSIS>[\\w\\.]+)");
39  boost::replace_all(parse_rules, "$ERA", "(?<ERA>[\\w\\.]+)");
40  boost::replace_all(parse_rules, "$CHANNEL", "(?<CHANNEL>[\\w\\.]+)");
41  boost::replace_all(parse_rules, "$BINID", "(?<BINID>[\\w\\.]+)");
42  boost::replace_all(parse_rules, "$MASS", "(?<MASS>[\\w\\.]+)");
43  boost::regex rgx(parse_rules);
44  boost::smatch matches;
45  boost::regex_search(filename, matches, rgx);
46  this->ParseDatacard(filename,
47  matches.str("ANALYSIS"),
48  matches.str("ERA"),
49  matches.str("CHANNEL"),
50  matches.str("BINID").length() ?
51  boost::lexical_cast<int>(matches.str("BINID")) : 0,
52  matches.str("MASS"));
53  return 0;
54 }
55 
56 int CombineHarvester::ParseDatacard(std::string const& filename,
57  std::string const& analysis,
58  std::string const& era,
59  std::string const& channel,
60  int bin_id,
61  std::string const& mass) {
62  TH1::AddDirectory(kFALSE);
63  // Load the entire datacard into memory as a vector of strings
64  std::vector<std::string> lines = ch::ParseFileLines(filename);
65  // Loop through lines, trimming whitespace at the beginning or end
66  // then splitting each line into a vector of words (using any amount
67  // of whitespace as the separator). We skip any line of zero length
68  // or which starts with a "#" or "-" character.
69  std::vector<std::vector<std::string>> words;
70  for (unsigned i = 0; i < lines.size(); ++i) {
71  boost::trim(lines[i]);
72  if (lines[i].size() == 0) continue;
73  if (lines[i].at(0) == '#' || lines[i].at(0) == '-') continue;
74  words.push_back(std::vector<std::string>());
75  boost::split(words.back(), lines[i], boost::is_any_of("\t "),
76  boost::token_compress_on);
77  }
78 
79  std::vector<HistMapping> hist_mapping;
80  // std::map<std::string, RooAbsData*> data_map;
81  std::map<std::string, std::shared_ptr<TFile>> file_store;
82  std::map<std::string, std::shared_ptr<RooWorkspace>> ws_store;
83 
84  bool start_nuisance_scan = false;
85  unsigned r = 0;
86 
87  // We will allow cards that describe a single bin to have an "observation"
88  // line without a "bin" line above it. We probably won't know the bin name
89  // when we parse this line, so we'll store it here and fix it later
90  std::shared_ptr<ch::Observation> single_obs = nullptr;
91  std::set<std::string> bin_names;
92 
93  // Store the groups that we encounter
94  std::map<std::string, std::set<std::string>> groups;
95 
96  // Do a first pass just for shapes, as some cards
97  // declare observations / processes before the shapes lines
98  for (unsigned i = 0; i < words.size(); ++i) {
99  // Ignore line if it only has one word
100  if (words[i].size() <= 1) continue;
101 
102  // If the line begins "shapes" then we've
103  // found process --> TH1 mapping information
104  if (boost::iequals(words[i][0], "shapes") && words[i].size() >= 5) {
105  hist_mapping.push_back(HistMapping());
106  HistMapping &mapping = hist_mapping.back();
107  mapping.process = words[i][1];
108  mapping.category = words[i][2];
109  // The root file path given in the datacard is relative to the datacard
110  // path, so we join the path to the datacard with the path to the file
111  std::string dc_path;
112  std::size_t slash = filename.find_last_of('/');
113  if (slash != filename.npos) {
114  dc_path = filename.substr(0, slash) + "/" + words[i][3];
115  } else {
116  dc_path = words[i][3];
117  }
118  if (!file_store.count(dc_path))
119  file_store[dc_path] = std::make_shared<TFile>(dc_path.c_str());
120  mapping.file = file_store.at(dc_path);
121  mapping.pattern = words[i][4];
122  if (words[i].size() > 5) mapping.syst_pattern = words[i][5];
123 
124  if (mapping.IsPdf()) {
125  std::string store_key =
126  mapping.file->GetName() + mapping.WorkspaceName();
127  if (!ws_store.count(store_key)) {
128  mapping.file->cd();
129  std::shared_ptr<RooWorkspace> ptr(dynamic_cast<RooWorkspace*>(
130  gDirectory->Get(mapping.WorkspaceName().c_str())));
131  if (!ptr) {
132  throw std::runtime_error(FNERROR("Workspace not found in file"));
133  }
134  ws_store[store_key] = ptr;
135  }
136  mapping.ws = SetupWorkspace(*(ws_store[store_key]), true);
137  }
138  if (mapping.IsPdf() && mapping.syst_pattern != "") {
139  std::string store_key =
140  mapping.file->GetName() + mapping.SystWorkspaceName();
141  if (!ws_store.count(store_key)) {
142  mapping.file->cd();
143  std::shared_ptr<RooWorkspace> ptr(dynamic_cast<RooWorkspace*>(
144  gDirectory->Get(mapping.SystWorkspaceName().c_str())));
145  if (!ptr) {
146  throw std::runtime_error(FNERROR("Workspace not found in file"));
147  }
148  ws_store[store_key] = ptr;
149  }
150  mapping.sys_ws = SetupWorkspace(*(ws_store[store_key]), true);
151  }
152  }
153 
154  // We can also have a "FAKE" shape directive
155  // Must be four words long: shapes * * FAKE
156  if (boost::iequals(words[i][0], "shapes") && words[i].size() == 4 &&
157  boost::iequals(words[i][3], "FAKE")) {
158  hist_mapping.push_back(HistMapping());
159  HistMapping &mapping = hist_mapping.back();
160  mapping.process = words[i][1];
161  mapping.category = words[i][2];
162  mapping.is_fake = true;
163  }
164  }
165 
166  for (unsigned i = 0; i < words.size(); ++i) {
167  if (words[i].size() >= 3 &&
168  boost::iequals(words[i][1], "extArg")) {
169  if (verbosity_ > 1) {
170  FNLOG(log()) << "Processing extArg line:\n";
171  for (auto const& str : words[i]) {
172  log() << str << "\t";
173  }
174  log() << "\n";
175  }
176 
177  bool has_range = words[i].size() == 4 && words[i][3][0] == '[';
178  std::string param_name = words[i][0];
179  bool is_wsp_rateparam = false;
180  try {
181  boost::lexical_cast<double>(words[i][2]);
182  } catch (boost::bad_lexical_cast &) {
183  is_wsp_rateparam = true;
184  }
185  if ((!is_wsp_rateparam) && (words[i].size() == 3 || has_range)) {
186  ch::Parameter* param = SetupRateParamVar(
187  param_name, boost::lexical_cast<double>(words[i][2]), true);
188  param->set_err_u(0.);
189  param->set_err_d(0.);
190  if (has_range) {
191  std::vector<std::string> tokens;
192  boost::split(tokens, words[i][3], boost::is_any_of("[],"));
193  if (tokens.size() == 4) {
194  param->set_range_d(boost::lexical_cast<double>(tokens[1]));
195  param->set_range_u(boost::lexical_cast<double>(tokens[2]));
196  FNLOGC(log(), verbosity_ > 1) << "Setting parameter range to " << words[i][2];
197  }
198  }
199  } else if (words[i].size() == 3 && is_wsp_rateparam) {
200  SetupRateParamWspObj(param_name, words[i][2], true);
201  }
202  }
203  }
204 
205  // Loop through the vector of word vectors
206  for (unsigned i = 0; i < words.size(); ++i) {
207  // Ignore line if it only has one word
208  if (words[i].size() <= 1) continue;
209 
210  // Want to check this line and the previous one, so need i >= 1.
211  // If the first word on this line is "observation" and "bin" on
212  // the previous line then we've found the entries for data, and
213  // can add Observation objects
214  if (i >= 1) {
215  if ( boost::iequals(words[i][0], "observation") &&
216  boost::iequals(words[i-1][0], "bin") &&
217  words[i].size() == words[i-1].size()) {
218  for (unsigned p = 1; p < words[i].size(); ++p) {
219  auto obs = std::make_shared<Observation>();
220  obs->set_bin(words[i-1][p]);
221  obs->set_rate(boost::lexical_cast<double>(words[i][p]));
222  obs->set_analysis(analysis);
223  obs->set_era(era);
224  obs->set_channel(channel);
225  obs->set_bin_id(bin_id);
226  obs->set_mass(mass);
227 
228  LoadShapes(obs.get(), hist_mapping);
229 
230  obs_.push_back(obs);
231  }
232  }
233  }
234 
235  if (boost::iequals(words[i][0], "observation") &&
236  !boost::iequals(words[i-1][0], "bin") &&
237  words[i].size() == 2 &&
238  single_obs.get() == nullptr) {
239  for (unsigned p = 1; p < words[i].size(); ++p) {
240  single_obs = std::make_shared<Observation>();
241  single_obs->set_bin("");
242  single_obs->set_rate(boost::lexical_cast<double>(words[i][p]));
243  single_obs->set_analysis(analysis);
244  single_obs->set_era(era);
245  single_obs->set_channel(channel);
246  single_obs->set_bin_id(bin_id);
247  single_obs->set_mass(mass);
248  }
249  }
250 
251  // Similarly look for the lines indicating the different signal
252  // and background processes
253  // Once these are found save in line index for the rate line as r
254  // to we can refer back to these later, then assume that every
255  // line that follows is a nuisance parameter
256 
257  if (i >= 3) {
258  if ( boost::iequals(words[i][0], "rate") &&
259  boost::iequals(words[i-1][0], "process") &&
260  boost::iequals(words[i-2][0], "process") &&
261  boost::iequals(words[i-3][0], "bin") &&
262  words[i].size() == words[i-1].size() &&
263  words[i].size() == words[i-2].size() &&
264  words[i].size() == words[i-3].size()) {
265  for (unsigned p = 1; p < words[i].size(); ++p) {
266  auto proc = std::make_shared<Process>();
267  proc->set_bin(words[i-3][p]);
268  bin_names.insert(words[i-3][p]);
269  try {
270  int process_id = boost::lexical_cast<int>(words[i-2][p]);
271  proc->set_signal(process_id <= 0);
272  proc->set_process(words[i-1][p]);
273  } catch(boost::bad_lexical_cast &) {
274  int process_id = boost::lexical_cast<int>(words[i-1][p]);
275  proc->set_signal(process_id <= 0);
276  proc->set_process(words[i-2][p]);
277  }
278  proc->set_rate(boost::lexical_cast<double>(words[i][p]));
279  proc->set_analysis(analysis);
280  proc->set_era(era);
281  proc->set_channel(channel);
282  proc->set_bin_id(bin_id);
283  proc->set_mass(mass);
284 
285  LoadShapes(proc.get(), hist_mapping);
286 
287  procs_.push_back(proc);
288  }
289  r = i;
290  start_nuisance_scan = true;
291  }
292  }
293 
294  if (start_nuisance_scan && words[i].size() >= 4) {
295  if (boost::iequals(words[i][1], "param")) {
296  std::string param_name = words[i][0];
297  if (!params_.count(param_name))
298  params_[param_name] = std::make_shared<Parameter>(Parameter());
299  Parameter * param = params_.at(param_name).get();
300  param->set_name(param_name);
301  param->set_val(boost::lexical_cast<double>(words[i][2]));
302  std::size_t slash_pos = words[i][3].find("/");
303  if (slash_pos != words[i][3].npos) {
304  param->set_err_d(
305  boost::lexical_cast<double>(words[i][3].substr(0, slash_pos)));
306  param->set_err_u(
307  boost::lexical_cast<double>(words[i][3].substr(slash_pos+1)));
308  } else {
309  param->set_err_u(+1.0 * boost::lexical_cast<double>(words[i][3]));
310  param->set_err_d(-1.0 * boost::lexical_cast<double>(words[i][3]));
311  }
312  if (words[i].size() >= 5) {
313  // We have a range
314  std::vector<std::string> tokens;
315  boost::split(tokens, words[i][4], boost::is_any_of("[],"));
316  if (tokens.size() == 4) {
317  param->set_range_d(boost::lexical_cast<double>(tokens[1]));
318  param->set_range_u(boost::lexical_cast<double>(tokens[2]));
319  }
320  }
321  continue; // skip the rest of this now
322  }
323  }
324 
325  if (start_nuisance_scan && words[i].size() >= 5 &&
326  boost::iequals(words[i][1], "rateParam")) {
327  if (verbosity_ > 1) {
328  FNLOG(log()) << "Processing rateParam line:\n";
329  for (auto const& str : words[i]) {
330  log() << str << "\t";
331  }
332  log() << "\n";
333  }
334 
335  bool has_range = words[i].size() == 6 && words[i][5][0] == '[';
336  std::string param_name = words[i][0];
337  // If this is a free param may need to create a Parameter object
338  // If the line has 5 words then it can either be a floating param
339  // or one from a workspace. Otherwise if it has 6 then it's either
340  // a floating param with a range or a formula
341  bool is_wsp_rateparam = false;
342  try {
343  boost::lexical_cast<double>(words[i][4]);
344  } catch (boost::bad_lexical_cast &) {
345  is_wsp_rateparam = true;
346  }
347  if ((!is_wsp_rateparam) && (words[i].size() == 5 || has_range)) {
348  ch::Parameter* param = SetupRateParamVar(
349  param_name, boost::lexical_cast<double>(words[i][4]));
350  param->set_err_u(0.);
351  param->set_err_d(0.);
352  if (has_range) {
353  std::vector<std::string> tokens;
354  boost::split(tokens, words[i][5], boost::is_any_of("[],"));
355  if (tokens.size() == 4) {
356  param->set_range_d(boost::lexical_cast<double>(tokens[1]));
357  param->set_range_u(boost::lexical_cast<double>(tokens[2]));
358  FNLOGC(log(), verbosity_ > 1) << "Setting parameter range to " << words[i][5];
359  }
360  }
361  } else if (words[i].size() == 6 && !has_range) {
362  SetupRateParamFunc(param_name, words[i][4], words[i][5]);
363  } else if (words[i].size() == 5 && is_wsp_rateparam) {
364  SetupRateParamWspObj(param_name, words[i][4]);
365  }
366  for (unsigned p = 1; p < words[r].size(); ++p) {
367  bool matches_bin = false;
368  bool matches_proc = false;
369  int process_id;
370  std::string process;
371  std::string bin = words[r-3][p];
372  // Not great that we repeat this below
373  try {
374  process_id = boost::lexical_cast<int>(words[r-2][p]);
375  process = words[r-1][p];
376  } catch(boost::bad_lexical_cast &) {
377  process_id = boost::lexical_cast<int>(words[r-1][p]);
378  process = words[r-2][p];
379  }
380  // if (words[i][2] == "*" || words[i][2] == bin) {
381  if (words[i][2] == "*" || fnmatch(words[i][2].c_str(), bin.c_str(), 0) == 0) {
382  matches_bin = true;
383  }
384  // if (words[i][3] == "*" || words[i][3] == process) {
385  if (words[i][3] == "*" || fnmatch(words[i][3].c_str(), process.c_str(), 0) == 0) {
386  matches_proc = true;
387  }
388  if (!matches_bin || !matches_proc) continue;
389  auto sys = std::make_shared<Systematic>();
390  sys->set_bin(bin);
391  sys->set_signal(process_id <= 0);
392  sys->set_process(process);
393  sys->set_name(param_name);
394  sys->set_type("rateParam");
395  sys->set_analysis(analysis);
396  sys->set_era(era);
397  sys->set_channel(channel);
398  sys->set_bin_id(bin_id);
399  sys->set_mass(mass);
400  systs_.push_back(sys);
401  }
402  continue;
403  }
404 
405  if (start_nuisance_scan && words[i].size() >= 4 &&
406  boost::iequals(words[i][1], "group")) {
407  if (!groups.count(words[i][0])) {
408  groups[words[i][0]] = std::set<std::string>();
409  }
410  for (unsigned ig = 3; ig < words[i].size(); ++ig) {
411  groups[words[i][0]].insert(words[i][ig]);
412  }
413  continue;
414  }
415 
416  if (start_nuisance_scan && words[i].size() >= 3 &&
417  boost::iequals(words[i][1], "autoMCStats")) {
418  std::vector<std::string> for_bins;
419  if (words[i][0] == "*") {
420  for_bins = Set2Vec(bin_names);
421  } else {
422  for_bins.push_back(words[i][0]);
423  }
424  for (auto const& bin : for_bins) {
425  double thresh = boost::lexical_cast<double>(words[i][2]);
426  if (words[i].size() == 3) {
427  auto_stats_settings_[bin] = AutoMCStatsSettings(thresh);
428  } else if (words[i].size() == 4) {
429  auto_stats_settings_[bin] = AutoMCStatsSettings(thresh, boost::lexical_cast<int>(words[i][3]));
430  } else {
431  auto_stats_settings_[bin] = AutoMCStatsSettings(thresh, boost::lexical_cast<int>(words[i][3]), boost::lexical_cast<int>(words[i][4]));
432  }
433  }
434  }
435 
436  if (start_nuisance_scan && words[i].size()-1 == words[r].size() && !boost::iequals(words[i][1], "autoMCStats")) {
437  for (unsigned p = 2; p < words[i].size(); ++p) {
438  if (words[i][p] == "-") continue;
439  auto sys = std::make_shared<Systematic>();
440  sys->set_bin(words[r-3][p-1]);
441  try {
442  int process_id = boost::lexical_cast<int>(words[r-2][p-1]);
443  sys->set_signal(process_id <= 0);
444  sys->set_process(words[r-1][p-1]);
445  } catch(boost::bad_lexical_cast &) {
446  int process_id = boost::lexical_cast<int>(words[r-1][p-1]);
447  sys->set_signal(process_id <= 0);
448  sys->set_process(words[r-2][p-1]);
449  }
450  sys->set_name(words[i][0]);
451  std::string type = words[i][1];
452  if (!contains(std::vector<std::string>{"shape", "shape?", "shapeN2", "shapeU", "lnN", "lnU"},
453  type)) {
454  throw std::runtime_error(
455  FNERROR("Systematic type " + type + " not supported"));
456  }
457  sys->set_type(words[i][1]);
458  sys->set_analysis(analysis);
459  sys->set_era(era);
460  sys->set_channel(channel);
461  sys->set_bin_id(bin_id);
462  sys->set_mass(mass);
463  sys->set_scale(1.0);
464  std::size_t slash_pos = words[i][p].find("/");
465  if (slash_pos != words[i][p].npos) {
466  // Assume asymmetric of form kDown/kUp
467  sys->set_value_d(
468  boost::lexical_cast<double>(words[i][p].substr(0, slash_pos)));
469  sys->set_value_u(
470  boost::lexical_cast<double>(words[i][p].substr(slash_pos+1)));
471  sys->set_asymm(true);
472  } else {
473  sys->set_value_u(boost::lexical_cast<double>(words[i][p]));
474  sys->set_asymm(false);
475  }
476  if (sys->type() == "shape" || sys->type() == "shapeN2" ||
477  sys->type() == "shapeU") {
478  sys->set_scale(boost::lexical_cast<double>(words[i][p]));
479  LoadShapes(sys.get(), hist_mapping);
480  } else if (sys->type() == "shape?") {
481  // This might fail, so we have to "try"
482  try {
483  LoadShapes(sys.get(), hist_mapping);
484  } catch (std::exception & e) {
485  if (verbosity_ > 0) {
486  LOGLINE(log(), "Systematic with shape? did not resolve to a shape");
487  log() << e.what();
488  }
489  }
490  if (!sys->shape_u() || !sys->shape_d()) {
491  sys->set_type("lnN");
492  } else {
493  sys->set_type("shape");
494  sys->set_scale(boost::lexical_cast<double>(words[i][p]));
495  }
496  }
497  if (sys->type() == "shape" || sys->type() == "shapeN2" ||
498  sys->type() == "shapeU")
499  sys->set_asymm(true);
500 
502  if (sys->type() == "lnU" || sys->type() == "shapeU") {
503  params_.at(sys->name())->set_err_d(0.);
504  params_.at(sys->name())->set_err_u(0.);
505  }
506  systs_.push_back(sys);
507  }
508  }
509  }
510  if (single_obs) {
511  if (bin_names.size() == 1) {
512  single_obs->set_bin(*(bin_names.begin()));
513  LoadShapes(single_obs.get(), hist_mapping);
514  obs_.push_back(single_obs);
515  } else {
516  throw std::runtime_error(FNERROR(
517  "Input card specifies a single observation entry without a bin, but "
518  "multiple bins defined elsewhere"));
519  }
520  }
521 
522  // Finally populate the groups
523  for (auto const& grp : groups) {
524  this->SetGroup(grp.first, ch::Set2Vec(grp.second));
525  }
526  return 0;
527 }
528 
529 void CombineHarvester::WriteDatacard(std::string const& name,
530  std::string const& root_file) {
531  TFile file(root_file.c_str(), "RECREATE");
533  file.Close();
534 }
535 
536 void CombineHarvester::WriteDatacard(std::string const& name) {
537  TFile dummy;
538  CombineHarvester::WriteDatacard(name, dummy);
539  dummy.Close();
540 }
541 
542 void CombineHarvester::FillHistMappings(std::vector<HistMapping> & mappings) {
543  // Build maps of
544  // RooAbsData --> RooWorkspace
545  // RooAbsPdf --> RooWorkspace
546  std::map<RooAbsData const*, RooWorkspace*> data_ws_map;
547  std::map<RooAbsReal const*, RooWorkspace*> pdf_ws_map;
548  for (auto const& iter : wspaces_) {
549  auto dat = iter.second->allData();
550  for (auto d : dat) {
551  data_ws_map[d] = iter.second.get();
552  }
553  RooArgSet vars = iter.second->allPdfs();
554  auto v = vars.createIterator();
555  do {
556  RooAbsReal *y = dynamic_cast<RooAbsReal*>(**v);
557  if (y) pdf_ws_map[iter.second->pdf(y->GetName())] = iter.second.get();
558  } while (v->Next());
559  RooArgSet fvars = iter.second->allFunctions();
560  auto fv = fvars.createIterator();
561  do {
562  RooAbsReal *y = dynamic_cast<RooAbsReal*>(**fv);
563  if (y) pdf_ws_map[iter.second->function(y->GetName())] = iter.second.get();
564  } while (fv->Next());
565  }
566 
567  // For writing TH1s we will hard code a set of patterns for each bin
568  // This assumes that the backgrounds will not depend on "mass" but the
569  // signal will. Will probably want to change this in the future
570  std::set<std::string> hist_bins;
571  auto bins = this->bin_set();
572  for (auto bin : bins) {
573  unsigned shape_count = std::count_if(procs_.begin(), procs_.end(),
574  [&](std::shared_ptr<ch::Process> p) {
575  return (p->bin() == bin && p->shape() && (!p->signal()));
576  });
577  shape_count += std::count_if(obs_.begin(), obs_.end(),
578  [&](std::shared_ptr<ch::Observation> p) {
579  return (p->bin() == bin && p->shape());
580  });
581  unsigned counting = std::count_if(
582  procs_.begin(), procs_.end(), [&](std::shared_ptr<ch::Process> p) {
583  return (p->bin() == bin && p->shape() == nullptr &&
584  p->pdf() == nullptr && p->data() == nullptr);
585  });
586  counting += std::count_if(
587  obs_.begin(), obs_.end(), [&](std::shared_ptr<ch::Observation> p) {
588  return (p->bin() == bin && p->shape() == nullptr &&
589  p->data() == nullptr);
590  });
591 
592  if (shape_count > 0) {
593  mappings.emplace_back("*", bin, bin + "/$PROCESS",
594  bin + "/$PROCESS_$SYSTEMATIC");
595  hist_bins.insert(bin);
596  } else if (counting > 0) {
597  mappings.emplace_back("*", bin, "", "");
598  mappings.back().is_fake = true;
599  }
600 
601  CombineHarvester ch_signals =
602  std::move(this->cp().bin({bin}).signals().histograms());
603  auto sig_proc_set =
604  ch_signals.SetFromProcs(std::mem_fn(&ch::Process::process));
605  for (auto sig_proc : sig_proc_set) {
606  // should only add this mapping if the signal process has a numeric mass
607  // value, otherwise we will write it using the background rule above
608  auto masses = Set2Vec(ch_signals.cp()
609  .process({sig_proc})
610  .SetFromProcs(std::mem_fn(&ch::Process::mass)));
611  if (masses.size() != 1) {
612  throw std::runtime_error(FNERROR("Process " + sig_proc + " in bin " +
613  bin +
614  " has multiple entries with multiple "
615  "mass values, this is not supported"));
616 
617  }
618  if (is_float(masses[0])) {
619  mappings.emplace_back(sig_proc, bin, bin + "/" + sig_proc + "$MASS",
620  bin + "/" + sig_proc + "$MASS_$SYSTEMATIC");
621  }
622  }
623  }
624 
625  // Generate mappings for RooFit objects
626  for (auto bin : bins) {
627  CombineHarvester ch_bin = std::move(this->cp().bin({bin}));
628  for (auto obs : ch_bin.obs_) {
629  if (!obs->data()) continue;
630  std::string obj_name = std::string(data_ws_map[obs->data()]->GetName()) +
631  ":" + std::string(obs->data()->GetName());
632  mappings.emplace_back("data_obs", obs->bin(), obj_name, "");
633  }
634 
635  bool prototype_ok = false;
636  HistMapping prototype;
637  std::vector<HistMapping> full_list;
638  auto pmap = ch_bin.GenerateProcSystMap();
639  for (unsigned i = 0; i < ch_bin.procs_.size(); ++i) {
640  ch::Process * proc = ch_bin.procs_[i].get();
641  if (!proc->data() && !proc->pdf()) continue;
642  std::string obj_name;
643  std::string obj_sys_name;
644  if (proc->data()) {
645  obj_name = std::string(proc->data()->GetName());
646  boost::replace_all(obj_name, proc->process(), "$PROCESS");
647  obj_name = std::string(data_ws_map[proc->data()]->GetName()) + ":" +
648  obj_name;
649  }
650  if (proc->pdf()) {
651  obj_name = std::string(proc->pdf()->GetName());
652  boost::replace_all(obj_name, proc->process(), "$PROCESS");
653  obj_name = std::string(pdf_ws_map[proc->pdf()]->GetName()) +
654  ":" + obj_name;
655  }
656  for (unsigned j = 0; j < pmap[i].size(); ++j) {
657  ch::Systematic const* sys = pmap[i][j];
658  if (sys->data_u()) {
659  obj_sys_name = std::string(sys->data_u()->GetName());
660  boost::replace_all(obj_sys_name, sys->name() + "Up", "$SYSTEMATIC");
661  boost::replace_all(obj_sys_name, sys->process(), "$PROCESS");
662  obj_sys_name = std::string(data_ws_map[sys->data_u()]->GetName()) +
663  ":" + obj_sys_name;
664  break;
665  }
666  if (sys->pdf_u()) {
667  obj_sys_name = std::string(sys->pdf_u()->GetName());
668  boost::replace_all(obj_sys_name, sys->name() + "Up", "$SYSTEMATIC");
669  boost::replace_all(obj_sys_name, sys->process(), "$PROCESS");
670  obj_sys_name = std::string(pdf_ws_map[sys->pdf_u()]->GetName()) +
671  ":" + obj_sys_name;
672  break;
673  }
674  }
675 
676  // If the prototype pattern is already filled, but doesn't equal this
677  // new pattern - then we can't use the prototype
678  if (prototype.pattern.size() && prototype.pattern != obj_name) {
679  prototype_ok = false;
680  }
681 
682  if (prototype.syst_pattern.size() && obj_sys_name.size() &&
683  prototype.syst_pattern != obj_sys_name) {
684  prototype_ok = false;
685  }
686 
687  if (!prototype.pattern.size()) {
688  prototype_ok = true;
689  prototype.process = "*";
690  prototype.category = bin;
691  prototype.pattern = obj_name;
692  }
693  if (!prototype.syst_pattern.size()) {
694  prototype.syst_pattern = obj_sys_name;
695  }
696 
697  full_list.emplace_back(proc->process(), proc->bin(), obj_name,
698  obj_sys_name);
699  }
700  // There are two reasons we won't want to write a generic mapping
701  // for the processes in this bin:
702  // 1) One or more processes is not described by the prototype mapping
703  // 2) We also have a generic histogram mapping for this bin
704  if (!prototype_ok || hist_bins.count(bin)) {
705  for (auto m : full_list) {
706  mappings.push_back(m);
707  }
708  } else {
709  mappings.push_back(prototype);
710  }
711  }
712 }
713 
714 
715 void CombineHarvester::WriteDatacard(std::string const& name,
716  TFile& root_file) {
717  using boost::format;
718 
719  // First figure out if this is a counting-experiment only
720  bool is_counting = true;
721  this->ForEachObs([&](ch::Observation *obs) {
722  if (obs->shape() != nullptr || obs->data() != nullptr) {
723  is_counting = false;
724  }
725  });
726  if (is_counting) {
727  this->ForEachProc([&](ch::Process *proc) {
728  if (proc->shape() != nullptr || proc->data() != nullptr ||
729  proc->pdf() != nullptr) {
730  is_counting = false;
731  }
732  });
733  }
734 
735  // Allow a non-open ROOT file if this is purely a counting experiment
736  if (!root_file.IsOpen() && !is_counting) {
737  throw std::runtime_error(FNERROR(
738  std::string("Output ROOT file is not open: ") + root_file.GetName()));
739  }
740 
741  std::unique_ptr<std::ostream> txt_file_ptr = nullptr;
742 
743  // Figure out if the datacard name ends with ".gz"
744  std::string zip_ext = ".gz";
745  bool has_zip_ext = (name.length() >= zip_ext.length() && name.compare(name.length() - zip_ext.length(), zip_ext.length(), zip_ext) == 0);
746 
747  if (has_zip_ext) {
748  txt_file_ptr = std::make_unique<zstr::ofstream>(name);
749  } else {
750  txt_file_ptr = std::make_unique<std::ofstream>(name);
751  }
752  if (txt_file_ptr->fail()) {
753  throw std::runtime_error(FNERROR("Unable to create file: " + name));
754  }
755 
756  std::ostream & txt_file = *txt_file_ptr;
757 
758  //txt_file << "# Datacard produced by CombineHarvester with git status: "
759  // << ch::GitVersion() << "\n";
760 
761  std::string dashes(80, '-');
762 
763  auto bin_set = this->SetFromProcs(std::mem_fn(&ch::Process::bin));
764  auto proc_set = this->SetFromProcs(std::mem_fn(&ch::Process::process));
765  std::set<std::string> sys_set;
766  std::set<std::string> rateparam_set;
767  this->ForEachSyst([&](ch::Systematic const* sys) {
768  if (sys->type() == "rateParam") {
769  rateparam_set.insert(sys->name());
770  } else {
771  sys_set.insert(sys->name());
772  }
773  });
774  txt_file << "imax " << bin_set.size()
775  << " number of bins\n";
776  txt_file << "jmax " << proc_set.size() - 1
777  << " number of processes minus 1\n";
778  txt_file << "kmax " << "*" << " number of nuisance parameters\n";
779  txt_file << dashes << "\n";
780 
781 
782  std::vector<HistMapping> mappings;
783  FillHistMappings(mappings);
784 
785  auto bins = this->SetFromObs(std::mem_fn(&ch::Observation::bin));
786 
787  auto proc_sys_map = this->GenerateProcSystMap();
788 
789  std::set<std::string> all_dependents_pars;
790  std::set<std::string> multipdf_cats;
791  for (auto proc : procs_) {
792  if (!proc->pdf()) continue;
793  // The rest of this is building the list of dependents
797  RooAbsData const* data_obj = FindMatchingData(proc.get());
798  RooArgSet par_list;
799  RooArgSet norm_par_list;
800 
801  if (data_obj) {
802  par_list.add(ParametersByName(proc->pdf(), data_obj->get()));
803  if (proc->norm()) {
804  norm_par_list.add(ParametersByName(proc->norm(), data_obj->get()));
805  }
806  } else {
807  RooRealVar mx("CMS_th1x" , "CMS_th1x", 0, 1);
808  RooArgSet tmp_set(mx);
809  par_list.add(ParametersByName(proc->pdf(), &tmp_set));
810  if (proc->norm()) {
811  norm_par_list.add(ParametersByName(proc->norm(), &tmp_set));
812  }
813  }
814  RooFIter par_list_it = par_list.fwdIterator();
815  RooAbsArg *par_list_var = nullptr;
816  while ((par_list_var = par_list_it.next())) {
817  if (dynamic_cast<RooRealVar*>(par_list_var)) {
818  all_dependents_pars.insert(par_list_var->GetName());
819  }
820  // If this pdf has a RooCategory parameter then it's probably a
821  // RooMultiPdf. We'll need to write a special line in the datacard to
822  // indicate that this is a discrete pdf index. However it's best to be
823  // sure, so we'll use ROOT's string-based RTTI to check the type without
824  // having to link against HiggsAnalysis/CombinedLimit
825  if (dynamic_cast<RooCategory*>(par_list_var) &&
826  proc->pdf()->InheritsFrom("RooMultiPdf")) {
827  multipdf_cats.insert(par_list_var->GetName());
828  }
829  }
830  if (proc->norm()) {
831  RooFIter nm_list_it = norm_par_list.fwdIterator();
832  RooAbsArg *nm_list_var = nullptr;
833  while ((nm_list_var = nm_list_it.next())) {
834  if (dynamic_cast<RooRealVar*>(nm_list_var)) {
835  all_dependents_pars.insert(nm_list_var->GetName());
836  }
837  }
838  }
839  }
840 
841  // The ROOT file mapping should be given as a relative path
842  std::string file_name = root_file.GetName();
843  // Get the full path to the output root file
844  // NOTE: was using canonical here instead of absolute, but not
845  // supported in boost 1.47
846  boost::filesystem::path root_file_path =
847  boost::filesystem::absolute(file_name);
848  // Get the full path to the directory containing the txt file
849  boost::filesystem::path txt_file_path =
850  boost::filesystem::absolute(name).parent_path();
851  // Compute the relative path from the txt file to the root file
852  file_name = make_relative(txt_file_path, root_file_path).string();
853 
854  // Keep track of the workspaces we actually need to write into the
855  // output file
856  std::set<std::string> used_wsps;
857 
858  for (auto const& mapping : mappings) {
859  if (!mapping.is_fake) {
860  txt_file << format("shapes %s %s %s %s %s\n")
861  % mapping.process
862  % mapping.category
863  % file_name
864  % mapping.pattern
865  % mapping.syst_pattern;
866  if (mapping.IsPdf() || mapping.IsData()) {
867  used_wsps.insert(mapping.WorkspaceName());
868  if (mapping.syst_pattern != "") {
869  used_wsps.insert(mapping.SystWorkspaceName());
870  }
871  }
872  } else {
873  txt_file << format("shapes %s %s %s\n")
874  % mapping.process
875  % mapping.category
876  % "FAKE";
877  }
878  }
879  txt_file << dashes << "\n";
880 
881  if (!is_counting) {
882  for (auto ws_it : wspaces_) {
883  if (ws_it.first == "_rateParams") continue; // don't write this one
884  // Also skip any workspace that isn't needed for this card
885  if (!used_wsps.count(ws_it.second->GetName())) continue;
886  ch::WriteToTFile(ws_it.second.get(), &root_file, ws_it.second->GetName());
887  }
888  }
889 
890  // Writing observations
891  if (obs_.size() > 0) {
892  txt_file << "bin ";
893  for (auto const& obs : obs_) {
894  txt_file << format("%-15s ") % obs->bin();
895  if (obs->shape()) {
896  bool add_dir = TH1::AddDirectoryStatus();
897  TH1::AddDirectory(false);
898  std::unique_ptr<TH1> h((TH1*)(obs->shape()->Clone()));
899  h->Scale(obs->rate());
900  WriteHistToFile(h.get(), &root_file, mappings, obs->bin(), "data_obs",
901  obs->mass(), "", 0);
902  TH1::AddDirectory(add_dir);
903  }
904  }
905  txt_file << "\n";
906  txt_file << "observation ";
907  // On the precision of the observation yields: .1f is not sufficient for
908  // combine to be happy if we have some asimov dataset with non-integer values.
909  // We could just always give .4f but this doesn't look nice for the majority
910  // of cards that have real data. Instead we'll check...
911  std::string obs_fmt_int = "%-15.1f ";
912  std::string obs_fmt_flt = "%-15.4f ";
913  for (auto const& obs : obs_) {
914  bool is_float =
915  std::fabs(obs->rate() - std::round(obs->rate())) > 1E-4;
916  txt_file << format(is_float ? obs_fmt_flt : obs_fmt_int)
917  % obs->rate();
918  }
919  txt_file << "\n";
920  txt_file << dashes << "\n";
921  }
922 
923  unsigned sys_str_len = 14;
924  for (auto const& sys : sys_set) {
925  if (sys.length() > sys_str_len) sys_str_len = sys.length();
926  }
927  for (auto const& sys : all_dependents_pars) {
928  if (sys.length() > sys_str_len) sys_str_len = sys.length();
929  }
930  std::string sys_str_short = boost::lexical_cast<std::string>(sys_str_len);
931  std::string sys_str_long = boost::lexical_cast<std::string>(sys_str_len+9);
932 
933  auto getProcLen = [](auto const& proc) -> std::string {
934  std::size_t proc_len = 15;
935  proc_len = std::max(proc_len, proc->process().size());
936  std::string proc_len_str = boost::lexical_cast<std::string>(proc_len);
937  return proc_len_str;
938  };
939 
940  txt_file << format("%-"+sys_str_long+"s") % "bin";
941  for (auto const& proc : procs_) {
942  if (proc->shape()) {
943  bool add_dir = TH1::AddDirectoryStatus();
944  TH1::AddDirectory(false);
945  std::unique_ptr<TH1> h = proc->ClonedScaledShape();
946  WriteHistToFile(h.get(), &root_file, mappings, proc->bin(),
947  proc->process(), proc->mass(), "", 0);
948  TH1::AddDirectory(add_dir);
949  }
950  txt_file << format("%-"+getProcLen(proc)+"s ") % proc->bin();
951  }
952  txt_file << "\n";
953 
954  txt_file << format("%-"+sys_str_long+"s") % "process";
955 
956  for (auto const& proc : procs_) {
957  txt_file << format("%-"+getProcLen(proc)+"s ") % proc->process();
958  }
959  txt_file << "\n";
960 
961  txt_file << format("%-"+sys_str_long+"s") % "process";
962 
963  // Setup process_ids first
964  std::map<std::string, int> p_ids;
965  unsigned sig_id = 0;
966  unsigned bkg_id = 1;
967  for (unsigned p = 0; p < procs_.size(); ++p) {
968  if (!procs_[p]->signal()) {
969  if (p_ids.count(procs_[p]->process()) == 0) {
970  p_ids[procs_[p]->process()] = bkg_id;
971  ++bkg_id;
972  }
973  else {
974  // check if process was already picked up as signal
975  if (p_ids[procs_[p]->process()] <= 0) throw std::runtime_error(FNERROR("Ambiguous definition of process (" + procs_[p]->process() + ") as both signal and background"));
976  }
977  }
978  unsigned q = procs_.size() - (p + 1);
979  if (procs_[q]->signal()) {
980  if (p_ids.count(procs_[q]->process()) == 0) {
981  p_ids[procs_[q]->process()] = sig_id;
982  --sig_id;
983  }
984  else {
985  // check if process was already picked up as background
986  if (p_ids[procs_[q]->process()] > 0) throw std::runtime_error(FNERROR("Ambiguous definition of process (" + procs_[q]->process() + ") as both signal and background"));
987  }
988  }
989  }
990  for (auto const& proc : procs_) {
991  txt_file << format("%-"+getProcLen(proc)+"s ") % p_ids[proc->process()];
992  }
993  txt_file << "\n";
994 
995 
996  txt_file << format("%-"+sys_str_long+"s") % "rate";
997  for (auto const& proc : procs_) {
998  txt_file << format("%-"+getProcLen(proc)+".6g ") % proc->no_norm_rate();
999  }
1000  txt_file << "\n";
1001  txt_file << dashes << "\n";
1002 
1003  // Need to write parameters here that feature both in the list of pdf
1004  // dependents and sys_set.
1005  for (auto par : params_) {
1006  Parameter const* p = par.second.get();
1007  if (p->err_d() != 0.0 && p->err_u() != 0.0 &&
1008  all_dependents_pars.count(p->name()) && sys_set.count(p->name())) {
1009  txt_file << format((format("%%-%is param %%g %%g") % sys_str_len).str()) %
1010  p->name() % p->val() % ((p->err_u() - p->err_d()) / 2.0);
1011  if (p->range_d() != std::numeric_limits<double>::lowest() &&
1012  p->range_u() != std::numeric_limits<double>::max()) {
1013  txt_file << format(" [%.4g,%.4g]") % p->range_d() % p->range_u();
1014  }
1015  txt_file << "\n";
1016  }
1017  }
1018 
1019  for (auto const& sys : sys_set) {
1020  std::vector<std::string> line(procs_.size() + 2);
1021  line[0] = sys;
1022  bool seen_lnN = false;
1023  bool seen_lnU = false;
1024  bool seen_shape = false;
1025  bool seen_shapeN2 = false;
1026  bool seen_shapeU = false;
1027  for (unsigned p = 0; p < procs_.size(); ++p) {
1028  line[p + 2] = "-";
1029  for (unsigned n = 0; n < proc_sys_map[p].size(); ++n) {
1030  ch::Systematic const* ptr = proc_sys_map[p][n];
1031  // Only interested into Systematics with name "sys"
1032  if (ptr->name() != sys) continue;
1033  std::string tp = ptr->type();
1034  if (tp == "lnN" || tp == "lnU") {
1035  if (tp == "lnN") seen_lnN = true;
1036  if (tp == "lnU") seen_lnU = true;
1037  line[p + 2] =
1038  ptr->asymm()
1039  ? (format("%g/%g") % ptr->value_d() % ptr->value_u()).str()
1040  : (format("%g") % ptr->value_u()).str();
1041  break;
1042  }
1043  if (tp == "shape" || tp == "shapeN2" || tp == "shapeU") {
1044  if (tp == "shape") seen_shape = true;
1045  if (tp == "shapeN2") seen_shapeN2 = true;
1046  if (tp == "shapeU") seen_shapeU = true;
1047  line[p + 2] = (format("%g") % ptr->scale()).str();
1048  if (ptr->shape_u() && ptr->shape_d()) {
1049  bool add_dir = TH1::AddDirectoryStatus();
1050  TH1::AddDirectory(false);
1051  std::unique_ptr<TH1> h_d = ptr->ClonedShapeD();
1052  h_d->Scale(procs_[p]->rate() * ptr->value_d());
1053  WriteHistToFile(h_d.get(), &root_file, mappings, ptr->bin(),
1054  ptr->process(), ptr->mass(), ptr->name(), 1);
1055  std::unique_ptr<TH1> h_u = ptr->ClonedShapeU();
1056  h_u->Scale(procs_[p]->rate() * ptr->value_u());
1057  WriteHistToFile(h_u.get(), &root_file, mappings, ptr->bin(),
1058  ptr->process(), ptr->mass(), ptr->name(), 2);
1059  TH1::AddDirectory(add_dir);
1060  break;
1061  } else if ( (ptr->data_u() && ptr->data_d()) || (ptr->pdf_u() && ptr->pdf_d()) ) {
1062  } else {
1063  if (!flags_.at("allow-missing-shapes")) {
1064  std::stringstream err;
1065  err << "Trying to write shape uncertainty with missing "
1066  "shapes:\n";
1067  err << Systematic::PrintHeader << *ptr;
1068  throw std::runtime_error(FNERROR(err.str()));
1069  }
1070  }
1071  }
1072  }
1073  }
1074  if (seen_shapeN2) {
1075  line[1] = "shapeN2";
1076  } else if (seen_shapeU) {
1077  line[1] = "shapeU";
1078  } else if (seen_lnU) {
1079  line[1] = "lnU";
1080  } else if (seen_lnN && !seen_shape) {
1081  line[1] = "lnN";
1082  } else if (!seen_lnN && seen_shape) {
1083  line[1] = "shape";
1084  } else if (seen_lnN && seen_shape) {
1085  line[1] = "shape?";
1086  } else {
1087  throw std::runtime_error(FNERROR("Systematic type could not be deduced"));
1088  }
1089  txt_file << format("%-" + sys_str_short + "s %-7s ") % line[0] % line[1];
1090  for (unsigned p = 0; p < procs_.size(); ++p) {
1091  txt_file << format("%-"+getProcLen(procs_[p])+"s ") % line[p + 2];
1092  }
1093  txt_file << "\n";
1094  }
1095  // write param line for any parameter which has a non-zero error
1096  // and which doesn't appear in list of nuisances
1097  CombineHarvester ch_rp = this->cp().syst_type({"rateParam"});
1098  // These are all the processes with a rateParam. We'll look at each one and
1099  // see if every bin with this process also has this rateParam.
1100  std::vector<std::vector<std::string> > floating_params;
1101  for (auto const& sys : rateparam_set) {
1102  FNLOGC(log(), verbosity_ > 1) << "Doing rateParam: " << sys << "\n";
1103  auto procs_rp = ch_rp.cp().syst_name({sys}).SetFromSysts(
1104  std::mem_fn(&ch::Systematic::process));
1105  FNLOGC(log(), verbosity_ > 1) << "Procs for this rateParam: \n";
1106  for (auto const& proc : procs_rp) {
1107  FNLOGC(log(), verbosity_ > 1) << " - " << proc << "\n";
1108  // Get the set of bins
1109  auto bins_rp = ch_rp.cp().process({proc}).syst_name({sys}).SetFromSysts(
1110  std::mem_fn(&ch::Systematic::bin));
1111  auto bins_all = this->cp().process({proc}).bin_set();
1112  if (bins_rp.size() > 0 && bins_rp.size() == bins_all.size()) {
1113  floating_params.push_back({sys, "*", proc});
1114  } else {
1115  for (auto const& bin : bins_rp) {
1116  floating_params.push_back({sys, bin, proc});
1117  }
1118  }
1119  FNLOGC(log(), verbosity_ > 1)
1120  << bins_rp.size() << "/" << bins_all.size()
1121  << " bins with this process have a rateParam\n";
1122  }
1123  }
1124  std::set<std::string> frozen_params;
1125  for (auto const& rp : floating_params) {
1126  if (params_.count(rp[0])) {
1127  ch::Parameter const* par = params_.at(rp[0]).get();
1128  txt_file << format("%-" + sys_str_short + "s %-10s %-10s %-10s %g") %
1129  rp[0] % "rateParam" % rp[1] % rp[2] % par->val();
1130  // Check if we have to set the range too
1131  // TODO: should package this into some function in ch::Parameter
1132  if (par->range_d() != std::numeric_limits<double>::lowest() &&
1133  par->range_u() != std::numeric_limits<double>::max()) {
1134  txt_file << format(" [%.4g,%.4g]") % par->range_d() % par->range_u();
1135  }
1136  txt_file << "\n";
1137  if (par->frozen()) {
1138  frozen_params.insert(rp[0]);
1139  }
1140  }
1141  }
1142  for (auto const& par : frozen_params) {
1143  txt_file << "nuisance edit freeze " << par << "\n";
1144  }
1145  std::set<std::string> all_fn_param_args;
1146  for (auto const& rp : floating_params) {
1147  if (!params_.count(rp[0])) {
1148  // If we don't have a ch::Parameter with this name, then we'll assume
1149  // this is a function
1150  RooWorkspace *rp_ws = wspaces_.at("_rateParams").get();
1151  RooAbsArg *arg = rp_ws->arg(rp[0].c_str());
1152  if (arg->getStringAttribute("wspSource")) {
1153  txt_file << format("%-" + sys_str_short +
1154  "s %-10s %-10s %-10s %-20s\n") %
1155  rp[0] % "rateParam" % rp[1] % rp[2] % std::string(arg->getStringAttribute("wspSource"));
1156  continue;
1157  }
1158  RooFormulaVar* form =
1159  dynamic_cast<RooFormulaVar*>(arg);
1160  // Want to extract the formula string, apparently "printMetaArgs" is the
1161  // only way to do it
1162  std::stringstream ss;
1163  form->printMetaArgs(ss);
1164  std::string form_str = ss.str();
1165  // The string will be like 'formula="blah"', strip everything on either
1166  // side of the double quotes (including the quotes themselves)
1167  std::size_t first = form_str.find_first_of('"');
1168  std::size_t last = form_str.find_last_of('"');
1169  form_str = form_str.substr(first + 1, last - first - 1);
1170  // unique_ptr because we own the RooArgSet that is generated
1171  unsigned npars =
1172  std::unique_ptr<RooArgSet>(form->getParameters(RooArgList()))->getSize();
1173  std::string args = "";
1174  for (unsigned i = 0; i < npars; ++i) {
1175  all_fn_param_args.insert(std::string(form->getParameter(i)->GetName()));
1176  args += std::string(form->getParameter(i)->GetName());
1177  if (i < (npars-1)) {
1178  args += ",";
1179  }
1180  }
1181  txt_file << format("%-" + sys_str_short +
1182  "s %-10s %-10s %-10s %-20s %s\n") %
1183  rp[0] % "rateParam" % rp[1] % rp[2] % form_str % args;
1184  }
1185  }
1186 
1187  if (wspaces_.count("_rateParams")) {
1188  RooWorkspace *rp_ws = wspaces_.at("_rateParams").get();
1189  RooArgSet vars = rp_ws->allVars();
1190  auto v = vars.createIterator();
1191  do {
1192  RooRealVar *y = dynamic_cast<RooRealVar*>(**v);
1193  if (y && y->getAttribute("extArg") && all_fn_param_args.count(std::string(y->GetName()))) {
1194  Parameter const* p = params_.at(y->GetName()).get();
1195  txt_file << format("%-" + sys_str_short + "s %-10s %g") %
1196  y->GetName() % "extArg" % p->val();
1197  if (p->range_d() != std::numeric_limits<double>::lowest() &&
1198  p->range_u() != std::numeric_limits<double>::max()) {
1199  txt_file << format(" [%.4g,%.4g]") % p->range_d() % p->range_u();
1200  }
1201  txt_file << "\n";
1202 
1203  }
1204  } while (v->Next());
1205  RooArgSet funcs = rp_ws->allFunctions();
1206  v = funcs.createIterator();
1207  do {
1208  RooAbsReal *y = dynamic_cast<RooAbsReal*>(**v);
1209  if (y && y->getAttribute("extArg") && y->getStringAttribute("wspSource") && all_fn_param_args.count(std::string(y->GetName()))) {
1210  txt_file << format("%-" + sys_str_short +
1211  "s %-10s %-20s\n") %
1212  y->GetName() % "extArg" % std::string(y->getStringAttribute("wspSource"));
1213  continue;
1214 
1215  }
1216  } while (v->Next());
1217  }
1218 
1219  std::set<std::string> ws_vars;
1220  for (auto iter : wspaces_) {
1221  RooArgSet vars = iter.second->allVars();
1222  auto v = vars.createIterator();
1223  do {
1224  RooRealVar *y = dynamic_cast<RooRealVar*>(**v);
1225  if (y) {
1226  ws_vars.insert(y->GetName());
1227  }
1228  } while (v->Next());
1229  }
1230 
1231  // How to check for params we need to write:
1232  // Get a list of all pdf dependents for the processes we've just written
1233  // Also need a list of all Systematic entries we've just written
1234  // Then we write param if:
1235  // - it has non-zero errors
1236  // - it appears in the first list
1237  // - it doesn't appear in the second list
1238  for (auto par : params_) {
1239  Parameter const* p = par.second.get();
1240  if (p->err_d() != 0.0 && p->err_u() != 0.0 &&
1241  all_dependents_pars.count(p->name()) && !sys_set.count(p->name())) {
1242  txt_file << format((format("%%-%is param %%g %%g") % sys_str_len).str()) %
1243  p->name() % p->val() % ((p->err_u() - p->err_d()) / 2.0);
1244  if (p->range_d() != std::numeric_limits<double>::lowest() &&
1245  p->range_u() != std::numeric_limits<double>::max()) {
1246  txt_file << format(" [%.4g,%.4g]") % p->range_d() % p->range_u();
1247  }
1248  txt_file << "\n";
1249  }
1250  }
1251 
1252  for (auto par : multipdf_cats) {
1253  txt_file << format((format("%%-%is discrete\n") % sys_str_len).str()) % par;
1254  }
1255 
1256  for (auto stat_settings : auto_stats_settings_) {
1257  if (!bin_set.count(stat_settings.first)) continue;
1258  txt_file << format("%s autoMCStats %g %i %i\n") % stat_settings.first %
1259  stat_settings.second.event_threshold %
1260  stat_settings.second.include_signal %
1261  stat_settings.second.hist_mode;
1262  }
1263 
1264  // Finally write the NP groups
1265  // We should only consider parameters are either:
1266  // - in the list of pdf dependents
1267  // - in the list of systematics
1268  std::map<std::string, std::set<std::string>> groups;
1269  for (auto const& par : params_) {
1270  Parameter * p = par.second.get();
1271  if (!all_dependents_pars.count(p->name()) && !sys_set.count(p->name())) {
1272  continue;
1273  }
1274  if (p->err_d() == 0.0 && p->err_u() == 0.0) continue;
1275  for (auto const& str : p->groups()) {
1276  if (!groups.count(str)) {
1277  groups[str] = std::set<std::string>();
1278  }
1279  groups[str].insert(p->name());
1280  }
1281  }
1282  for (auto const& gr : groups) {
1283  txt_file << format("%s group =") % gr.first;
1284  for (auto const& np : gr.second) {
1285  txt_file << " " << np;
1286  }
1287  txt_file << "\n";
1288  }
1289 
1290  for (auto const& postl : post_lines_) {
1291  txt_file << postl << "\n";
1292  }
1293 }
1294 
1295 void CombineHarvester::WriteHistToFile(
1296  TH1 * hist,
1297  TFile * file,
1298  std::vector<HistMapping> const& mappings,
1299  std::string const& bin,
1300  std::string const& process,
1301  std::string const& mass,
1302  std::string const& nuisance,
1303  unsigned type) {
1304  StrPairVec attempts = this->GenerateShapeMapAttempts(process, bin);
1305  for (unsigned a = 0; a < attempts.size(); ++a) {
1306  for (unsigned m = 0; m < mappings.size(); ++m) {
1307  if ((attempts[a].first == mappings[m].process) &&
1308  (attempts[a].second == mappings[m].category)) {
1309  std::string p = (type == 0 ?
1310  mappings[m].pattern : mappings[m].syst_pattern);
1311  boost::replace_all(p, "$CHANNEL", bin);
1312  boost::replace_all(p, "$PROCESS", process);
1313  boost::replace_all(p, "$MASS", mass);
1314  if (type == 1) boost::replace_all(p, "$SYSTEMATIC", nuisance+"Down");
1315  if (type == 2) boost::replace_all(p, "$SYSTEMATIC", nuisance+"Up");
1316  WriteToTFile(hist, file, p);
1317  return;
1318  }
1319  }
1320  }
1321 }
1322 }
ch::Systematic::pdf_d
RooAbsReal const * pdf_d() const
Definition: Systematic.h:54
ch::CombineHarvester::analysis
CombineHarvester & analysis(std::vector< std::string > const &vec, bool cond=true)
Definition: CombineHarvester_Filters.cc:55
ch::Parameter::frozen
bool frozen() const
Definition: Parameter.h:51
ch::CombineHarvester::ForEachProc
void ForEachProc(Function func)
Definition: CombineHarvester.h:632
ch::Systematic::pdf_u
RooAbsReal const * pdf_u() const
Definition: Systematic.h:52
ch::Systematic::asymm
bool asymm() const
Definition: Systematic.h:36
ch::CombineHarvester::ForEachObs
void ForEachObs(Function func)
Definition: CombineHarvester.h:637
ch::Systematic::PrintHeader
static std::ostream & PrintHeader(std::ostream &out)
Definition: Systematic.cc:251
ch::Parameter::err_d
double err_d() const
Definition: Parameter.h:37
Parameter.h
ch::CombineHarvester::mass
CombineHarvester & mass(std::vector< std::string > const &vec, bool cond=true)
Definition: CombineHarvester_Filters.cc:97
ch::is_float
bool is_float(std::string const &str)
Definition: Utilities.cc:241
ch::Systematic::data_d
RooDataHist const * data_d() const
Definition: Systematic.h:50
ch::Parameter::set_name
void set_name(std::string const &name)
Definition: Parameter.h:20
ch::Systematic::ClonedShapeU
std::unique_ptr< TH1 > ClonedShapeU() const
Definition: Systematic.cc:200
TFileIO.h
ch::CombineHarvester::syst_type
CombineHarvester & syst_type(std::vector< std::string > const &vec, bool cond=true)
Definition: CombineHarvester_Filters.cc:136
Utilities.h
Process.h
ch::Observation::rate
double rate() const
Definition: Observation.h:21
ch::CombineHarvester::CreateParameterIfEmpty
void CreateParameterIfEmpty(std::string const &name)
Definition: CombineHarvester_Creation.cc:228
ch::Object::bin
virtual std::string const & bin() const
Definition: Object.h:17
ch::syst::era
Definition: Systematics.h:21
FNERROR
#define FNERROR(x)
Definition: Logging.h:9
ch::Parameter::range_u
double range_u() const
Definition: Parameter.h:40
ch::WriteToTFile
void WriteToTFile(T *ptr, TFile *file, std::string const &path)
Definition: TFileIO.h:31
ch::Systematic::shape_d
TH1 const * shape_d() const
Definition: Systematic.h:46
ch::Systematic::value_u
double value_u() const
Definition: Systematic.h:27
ch::Process
Definition: Process.h:15
ch::Process::shape
TH1 const * shape() const
Definition: Process.h:52
ch::HistMapping::WorkspaceName
std::string WorkspaceName() const
Definition: HistMapping.cc:37
ch::Parameter
Definition: Parameter.h:12
ch::Parameter::set_val
void set_val(double const &val)
Definition: Parameter.h:23
ch::syst::channel
Definition: Systematics.h:26
ch::contains
bool contains(const Range &r, T p)
Definition: Algorithm.h:22
ch::Parameter::val
double val() const
Definition: Parameter.h:31
ch::syst::bin_id
Definition: Systematics.h:41
FNLOGC
#define FNLOGC(x, y)
Definition: Logging.h:14
ch::CombineHarvester::bin_set
std::set< std::string > bin_set()
Definition: CombineHarvester_Filters.cc:190
ch::HistMapping::pattern
std::string pattern
Definition: HistMapping.h:13
ch::Parameter::set_range_d
void set_range_d(double const &range_d)
Definition: Parameter.h:42
MakeUnique.h
ch::Systematic::scale
double scale() const
Definition: Systematic.h:33
ch::HistMapping::process
std::string process
Definition: HistMapping.h:11
ch::Process::data
RooAbsData const * data() const
Definition: Process.h:63
ch::Object::mass
virtual std::string const & mass() const
Definition: Object.h:38
ch::HistMapping::category
std::string category
Definition: HistMapping.h:12
ch::ParametersByName
RooArgSet ParametersByName(RooAbsReal const *pdf, RooArgSet const *dat_vars)
Definition: Utilities.cc:18
ch::CombineHarvester::signals
CombineHarvester & signals()
Definition: CombineHarvester_Filters.cc:146
ch::HistMapping::sys_ws
std::shared_ptr< RooWorkspace > sys_ws
Definition: HistMapping.h:17
Systematic.h
ch::syst::process
Definition: Systematics.h:36
ch::Process::no_norm_rate
double no_norm_rate() const
Get the process normalisation without multiplying by the RooAbsReal value (in the case that it's pres...
Definition: Process.h:46
ch::Parameter::range_d
double range_d() const
Definition: Parameter.h:43
ch::CombineHarvester::SetFromSysts
std::set< R > SetFromSysts(T func)
Fill an std::set using only the Systematic entries.
Definition: CombineHarvester.h:618
ch
Definition: Algorithm.h:10
ch::Parameter::set_range_u
void set_range_u(double const &range_u)
Definition: Parameter.h:39
Algorithm.h
ch::CombineHarvester::cp
CombineHarvester cp()
Creates and returns a shallow copy of the CombineHarvester instance.
Definition: CombineHarvester.cc:220
ch::make_relative
boost::filesystem::path make_relative(boost::filesystem::path p_from, boost::filesystem::path p_to)
Determine the relative path from one file to another.
Definition: Utilities.cc:313
ch::Parameter::set_err_d
void set_err_d(double const &err_d)
Definition: Parameter.h:36
ch::CombineHarvester::SetFromProcs
std::set< R > SetFromProcs(T func)
Fill an std::set using only the Process entries.
Definition: CombineHarvester.h:611
ch::CombineHarvester::histograms
CombineHarvester & histograms()
Definition: CombineHarvester_Filters.cc:166
ch::Systematic::value_d
double value_d() const
Definition: Systematic.h:30
ch::Systematic
Definition: Systematic.h:12
ch::HistMapping::SystWorkspaceName
std::string SystWorkspaceName() const
Definition: HistMapping.cc:53
ch::ParseFileLines
std::vector< std::string > ParseFileLines(std::string const &file_name)
Definition: Utilities.cc:224
ch::CombineHarvester::SetFromObs
std::set< R > SetFromObs(T func)
Fill an std::set using only the Observation entries.
Definition: CombineHarvester.h:604
ch::CombineHarvester::data
CombineHarvester & data()
Definition: CombineHarvester_Filters.cc:183
ch::Process::ClonedScaledShape
std::unique_ptr< TH1 > ClonedScaledShape() const
Definition: Process.cc:117
ch::CombineHarvester::ParseDatacard
int ParseDatacard(std::string const &filename, std::string const &analysis, std::string const &era, std::string const &channel, int bin_id, std::string const &mass)
Definition: CombineHarvester_Datacards.cc:56
ch::CombineHarvester::bin
CombineHarvester & bin(std::vector< std::string > const &vec, bool cond=true)
Definition: CombineHarvester_Filters.cc:13
LOGLINE
#define LOGLINE(x, y)
Definition: Logging.h:11
ch::CombineHarvester
Definition: CombineHarvester.h:30
ch::Parameter::groups
std::set< std::string > & groups()
Definition: Parameter.h:54
ch::CombineHarvester::ForEachSyst
void ForEachSyst(Function func)
Definition: CombineHarvester.h:642
ch::Parameter::name
std::string const & name() const
Definition: Parameter.h:21
ch::HistMapping::file
std::shared_ptr< TFile > file
Definition: HistMapping.h:15
ch::Parameter::err_u
double err_u() const
Definition: Parameter.h:34
ch::HistMapping::ws
std::shared_ptr< RooWorkspace > ws
Definition: HistMapping.h:16
ch::Process::pdf
RooAbsReal const * pdf() const
Definition: Process.h:60
CombineHarvester.h
ch::CombineHarvester::CombineHarvester
CombineHarvester()
Definition: CombineHarvester.cc:17
ch::Systematic::name
std::string const & name() const
Definition: Systematic.h:21
ch::CombineHarvester::SetGroup
void SetGroup(std::string const &name, std::vector< std::string > const &patterns)
Add parameters to a given group.
Definition: CombineHarvester_Evaluate.cc:780
ch::Observation::data
RooAbsData const * data() const
Definition: Observation.h:35
Observation.h
ch::Parameter::set_err_u
void set_err_u(double const &err_u)
Definition: Parameter.h:33
ch::Object::process
virtual std::string const & process() const
Definition: Object.h:20
ch::Systematic::ClonedShapeD
std::unique_ptr< TH1 > ClonedShapeD() const
Definition: Systematic.cc:207
ch::CombineHarvester::WriteDatacard
void WriteDatacard(std::string const &name, std::string const &root_file)
Definition: CombineHarvester_Datacards.cc:529
ch::Systematic::shape_u
TH1 const * shape_u() const
Definition: Systematic.h:38
ch::CombineHarvester::syst_name
CombineHarvester & syst_name(std::vector< std::string > const &vec, bool cond=true)
Definition: CombineHarvester_Filters.cc:126
ch::HistMapping::syst_pattern
std::string syst_pattern
Definition: HistMapping.h:14
ch::Process::norm
RooAbsReal const * norm() const
Definition: Process.h:66
ch::HistMapping
Definition: HistMapping.h:10
ch::HistMapping::IsPdf
bool IsPdf() const
Definition: HistMapping.cc:21
ch::HistMapping::is_fake
bool is_fake
Definition: HistMapping.h:18
FNLOG
#define FNLOG(x)
Definition: Logging.h:13
ch::Set2Vec
std::vector< T > Set2Vec(std::set< T > const &in)
Definition: Utilities.h:101
ch::Systematic::data_u
RooDataHist const * data_u() const
Definition: Systematic.h:48
ch::Systematic::type
std::string const & type() const
Definition: Systematic.h:24
ch::Observation
Definition: Observation.h:12
ch::Observation::shape
TH1 const * shape() const
Definition: Observation.h:27
ch::CombineHarvester::process
CombineHarvester & process(std::vector< std::string > const &vec, bool cond=true)
Definition: CombineHarvester_Filters.cc:35