CombineHarvester
PostFitShapesFromWorkspace.cpp
Go to the documentation of this file.
1 #include <map>
2 #include "boost/program_options.hpp"
3 #include "boost/format.hpp"
4 #include "TSystem.h"
5 #include "TH2F.h"
10 
11 namespace po = boost::program_options;
12 
13 using namespace std;
14 
15 void ReverseBins(TH1F & h) {
16  std::vector<float> contents(h.GetNbinsX());
17  std::vector<float> errors(h.GetNbinsX());
18  for (int i = 0; i < h.GetNbinsX(); ++i) {
19  contents[i] = h.GetBinContent(i + 1);
20  errors[i] = h.GetBinError(i + 1);
21  }
22  for (int i = 0; i < h.GetNbinsX(); ++i) {
23  h.SetBinContent(h.GetNbinsX() - i, contents[i]);
24  h.SetBinError(h.GetNbinsX() - i, errors[i]);
25  }
26  // return h;
27 }
28 
29 int main(int argc, char* argv[]) {
30  // Need this to read combine workspaces
31  gSystem->Load("libHiggsAnalysisCombinedLimit");
32 
33  string datacard = "";
34  string workspace = "";
35  string fitresult = "";
36  string mass = "";
37  bool postfit = false;
38  bool sampling = false;
39  bool no_sampling = false;
40  string output = "";
41  bool factors = false;
42  unsigned samples = 500;
43  std::string freeze_arg = "";
44  bool covariance = false;
45  string data = "data_obs";
46  bool skip_prefit = false;
47  bool skip_proc_errs = false;
48  bool total_shapes = false;
49  std::vector<std::string> reverse_bins_;
50 
51  po::options_description help_config("Help");
52  help_config.add_options()
53  ("help,h", "produce help message");
54 
55  po::options_description config("Configuration");
56  config.add_options()
57  ("workspace,w",
58  po::value<string>(&workspace)->required(),
59  "The input workspace-containing file [REQUIRED]")
60  ("dataset",
61  po::value<string>(&data)->default_value(data),
62  "The input dataset name")
63  ("datacard,d",
64  po::value<string>(&datacard),
65  "The input datacard, only used for rebinning")
66  ("output,o ",
67  po::value<string>(&output)->required(),
68  "Name of the output root file to create [REQUIRED]")
69  ("fitresult,f",
70  po::value<string>(&fitresult)->default_value(fitresult),
71  "Path to a RooFitResult, only needed for postfit")
72  ("mass,m",
73  po::value<string>(&mass)->default_value(""),
74  "Signal mass point of the input datacard")
75  ("postfit",
76  po::value<bool>(&postfit)
77  ->default_value(postfit)->implicit_value(true),
78  "Create post-fit histograms in addition to pre-fit")
79  ("sampling",
80  po::value<bool>(&sampling)->default_value(sampling)->implicit_value(true),
81  "Use the cov. matrix sampling method for the post-fit uncertainty (deprecated, this is the default)")
82  ("no-sampling",
83  po::value<bool>(&no_sampling)->default_value(no_sampling)->implicit_value(true),
84  "Do not use the cov. matrix sampling method for the post-fit uncertainty")
85  ("samples",
86  po::value<unsigned>(&samples)->default_value(samples),
87  "Number of samples to make in each evaluate call")
88  ("print",
89  po::value<bool>(&factors)->default_value(factors)->implicit_value(true),
90  "Print tables of background shifts and relative uncertainties")
91  ("freeze",
92  po::value<string>(&freeze_arg)->default_value(freeze_arg),
93  "Format PARAM1,PARAM2=X,PARAM3=Y where the values X and Y are optional")
94  ("covariance",
95  po::value<bool>(&covariance)->default_value(covariance)->implicit_value(true),
96  "Save the covariance and correlation matrices of the process yields")
97  ("skip-prefit",
98  po::value<bool>(&skip_prefit)->default_value(skip_prefit)->implicit_value(true),
99  "Skip the pre-fit evaluation")
100  ("skip-proc-errs",
101  po::value<bool>(&skip_proc_errs)->default_value(skip_proc_errs)->implicit_value(true),
102  "Skip evaluation of errors on individual processes")
103  ("total-shapes",
104  po::value<bool>(&total_shapes)->default_value(total_shapes)->implicit_value(true),
105  "Save signal- and background shapes added for all channels/categories")
106  ("reverse-bins", po::value<vector<string>>(&reverse_bins_)->multitoken(), "List of bins to reverse the order for");
107 
108 
109  po::variables_map vm;
110 
111  // First check if the user has set the "--help" or "-h" option, and if so
112  // just prin the usage information and quit
113  po::store(po::command_line_parser(argc, argv)
114  .options(help_config).allow_unregistered().run(), vm);
115  po::notify(vm);
116  if (vm.count("help")) {
117  cout << config << "\nExample usage:\n";
118  cout << "PostFitShapesFromWorkspace.root -d htt_mt_125.txt -w htt_mt_125.root -o htt_mt_125_shapes.root -m 125 "
119  "-f mlfit.root:fit_s --postfit --print\n";
120  return 1;
121  }
122 
123  // Parse the main config options
124  po::store(po::command_line_parser(argc, argv).options(config).run(), vm);
125  po::notify(vm);
126 
127  if (sampling) {
128  std::cout<<"WARNING: the default behaviour of PostFitShapesFromWorkspace is to use the covariance matrix sampling method for the post-fit uncertainty. The option --sampling is deprecated and will be removed in future versions of CombineHarvester"<<std::endl;
129  }
130 
131  TFile infile(workspace.c_str());
132 
133  RooWorkspace *ws = dynamic_cast<RooWorkspace*>(gDirectory->Get("w"));
134 
135  if (!ws) {
136  throw std::runtime_error(
137  FNERROR("Could not locate workspace in input file"));
138  }
139 
140  // Create CH instance and parse the workspace
142  cmb.SetFlag("workspaces-use-clone", true);
143  ch::ParseCombineWorkspace(cmb, *ws, "ModelConfig", data, false);
144 
145  // Only evaluate in case parameters to freeze are provided
146  if(! freeze_arg.empty())
147  {
148  vector<string> freeze_vec;
149  boost::split(freeze_vec, freeze_arg, boost::is_any_of(","));
150  for (auto const& item : freeze_vec) {
151  vector<string> parts;
152  boost::split(parts, item, boost::is_any_of("="));
153  if (parts.size() == 1) {
154  ch::Parameter *par = cmb.GetParameter(parts[0]);
155  if (par) par->set_frozen(true);
156  else throw std::runtime_error(
157  FNERROR("Requested variable to freeze does not exist in workspace"));
158  } else {
159  if (parts.size() == 2) {
160  ch::Parameter *par = cmb.GetParameter(parts[0]);
161  if (par) {
162  par->set_val(boost::lexical_cast<double>(parts[1]));
163  par->set_frozen(true);
164  }
165  else throw std::runtime_error(
166  FNERROR("Requested variable to freeze does not exist in workspace"));
167  }
168  }
169  }
170  }
171  // cmb.GetParameter("r")->set_frozen(true);
172 
173  ch::CombineHarvester cmb_card;
174  cmb_card.SetFlag("workspaces-use-clone",true);
175  if (datacard != "") {
176  cmb_card.ParseDatacard(datacard, "", "", "", 0, mass);
177  }
178 
179  // Drop any process that has no hist/data/pdf
180  cmb.FilterProcs([&](ch::Process * proc) {
181  bool no_shape = !proc->shape() && !proc->data() && !proc->pdf();
182  if (no_shape) {
183  cout << "Filtering process with no shape:\n";
184  cout << ch::Process::PrintHeader << *proc << "\n";
185  }
186  return no_shape;
187  });
188 
189  auto bins = cmb.cp().bin_set();
190 
191  TFile outfile(output.c_str(), "RECREATE");
192  TH1::AddDirectory(false);
193 
194  // Create a map of maps for storing histograms in the form:
195  // pre_shapes[<bin>][<process>]
196  map<string, map<string, TH1F>> pre_shapes;
197 
198  // Also create a simple map for storing total histograms, summed
199  // over all bins, in the form:
200  // pre_shapes_tot[<process>]
201  map<string, TH1F> pre_shapes_tot;
202 
203  // We can always do the prefit version,
204  // Loop through the bins writing the shapes to the output file
205  if (!skip_prefit) {
206  if(total_shapes){
207  pre_shapes_tot["data_obs"] = cmb.GetObservedShape();
208  // Then fill total signal and total bkg hists
209  std::cout << ">> Doing prefit: TotalBkg" << std::endl;
210  pre_shapes_tot["TotalBkg"] =
212  std::cout << ">> Doing prefit: TotalSig" << std::endl;
213  pre_shapes_tot["TotalSig"] =
215  std::cout << ">> Doing prefit: TotalProcs" << std::endl;
216  pre_shapes_tot["TotalProcs"] =
217  cmb.cp().GetShapeWithUncertainty();
218 
219  if (datacard != "") {
220  TH1F ref = cmb_card.cp().GetObservedShape();
221  for (auto & it : pre_shapes_tot) {
222  it.second = ch::RestoreBinning(it.second, ref);
223  }
224  }
225 
226  // Can write these straight into the output file
227  outfile.cd();
228  for (auto& iter : pre_shapes_tot) {
229  ch::WriteToTFile(&(iter.second), &outfile, "prefit/" + iter.first);
230  }
231  }
232  for (auto bin : bins) {
233  ch::CombineHarvester cmb_bin = cmb.cp().bin({bin});
234  // This next line is a temporary fix for models with parameteric RooFit pdfs
235  // - we try and set the number of bins to evaluate the pdf to be the same as
236  // the number of bins in data
237  // cmb_bin.SetPdfBins(cmb_bin.GetObservedShape().GetNbinsX());
238 
239  // Fill the data and process histograms
240  pre_shapes[bin]["data_obs"] = cmb_bin.GetObservedShape();
241  for (auto proc : cmb_bin.process_set()) {
242  std::cout << ">> Doing prefit: " << bin << "," << proc << std::endl;
243  if (skip_proc_errs) {
244  pre_shapes[bin][proc] =
245  cmb_bin.cp().process({proc}).GetShape();
246  } else {
247  pre_shapes[bin][proc] =
248  cmb_bin.cp().process({proc}).GetShapeWithUncertainty();
249  }
250  }
251 
252  // The fill total signal and total bkg hists
253  std::cout << ">> Doing prefit: " << bin << "," << "TotalBkg" << std::endl;
254  pre_shapes[bin]["TotalBkg"] =
255  cmb_bin.cp().backgrounds().GetShapeWithUncertainty();
256  std::cout << ">> Doing prefit: " << bin << "," << "TotalSig" << std::endl;
257  pre_shapes[bin]["TotalSig"] =
258  cmb_bin.cp().signals().GetShapeWithUncertainty();
259  std::cout << ">> Doing prefit: " << bin << "," << "TotalProcs" << std::endl;
260  pre_shapes[bin]["TotalProcs"] =
261  cmb_bin.cp().GetShapeWithUncertainty();
262 
263 
264  if (datacard != "") {
265  TH1F ref = cmb_card.cp().bin({bin}).GetObservedShape();
266  for (auto & it : pre_shapes[bin]) {
267  it.second = ch::RestoreBinning(it.second, ref);
268  }
269  }
270 
271  for (auto const& rbin : reverse_bins_) {
272  if (rbin != bin) continue;
273  auto & hists = pre_shapes[bin];
274  for (auto it = hists.begin(); it != hists.end(); ++it) {
275  ReverseBins(it->second);
276  }
277  }
278  // Can write these straight into the output file
279  outfile.cd();
280  for (auto& iter : pre_shapes[bin]) {
281  ch::WriteToTFile(&(iter.second), &outfile, bin + "_prefit/" + iter.first);
282  }
283  }
284 
285  // Print out the relative uncert. on the bkg
286  if (factors) {
287  cout << boost::format("%-25s %-32s\n") % "Bin" %
288  "Total relative bkg uncert. (prefit)";
289  cout << string(58, '-') << "\n";
290  for (auto bin : bins) {
291  ch::CombineHarvester cmb_bin = cmb.cp().bin({bin});
292  double rate = cmb_bin.cp().backgrounds().GetRate();
293  double err = cmb_bin.cp().backgrounds().GetUncertainty();
294  cout << boost::format("%-25s %-10.5f") % bin %
295  (rate > 0. ? (err / rate) : 0.) << std::endl;
296  }
297  }
298  }
299 
300 
301  // Now we can do the same again but for the post-fit model
302  if (postfit) {
303  // Get the fit result and update the parameters to the post-fit model
304  RooFitResult res = ch::OpenFromTFile<RooFitResult>(fitresult);
305  cmb.UpdateParameters(res);
306 
307  // Calculate the post-fit fractional background uncertainty in each bin
308 
309  map<string, map<string, TH1F>> post_shapes;
310  map<string, TH2F> post_yield_cov;
311  map<string, TH2F> post_yield_cor;
312 
313  map<string, TH1F> post_shapes_tot;
314 
315  if(total_shapes){
316  post_shapes_tot["data_obs"] = cmb.GetObservedShape();
317  // Fill the total sig. and total bkg. hists
318  auto cmb_bkgs = cmb.cp().backgrounds();
319  auto cmb_sigs = cmb.cp().signals();
320  std::cout << ">> Doing postfit: TotalBkg" << std::endl;
321  post_shapes_tot["TotalBkg"] =
322  no_sampling ? cmb_bkgs.GetShapeWithUncertainty()
323  : cmb_bkgs.GetShapeWithUncertainty(res, samples);
324  std::cout << ">> Doing postfit: TotalSig" << std::endl;
325  post_shapes_tot["TotalSig"] =
326  no_sampling ? cmb_sigs.GetShapeWithUncertainty()
327  : cmb_sigs.GetShapeWithUncertainty(res, samples);
328  std::cout << ">> Doing postfit: TotalProcs" << std::endl;
329  post_shapes_tot["TotalProcs"] =
330  no_sampling ? cmb.cp().GetShapeWithUncertainty()
331  : cmb.cp().GetShapeWithUncertainty(res, samples);
332 
333  if (datacard != "") {
334  TH1F ref = cmb_card.cp().GetObservedShape();
335  for (auto & it : post_shapes_tot) {
336  it.second = ch::RestoreBinning(it.second, ref);
337  }
338  }
339 
340  outfile.cd();
341  // Write the post-fit histograms
342  for (auto & iter : post_shapes_tot) {
343  ch::WriteToTFile(&(iter.second), &outfile,
344  "postfit/" + iter.first);
345  }
346  }
347 
348 
349  for (auto bin : bins) {
350  ch::CombineHarvester cmb_bin = cmb.cp().bin({bin});
351  post_shapes[bin]["data_obs"] = cmb_bin.GetObservedShape();
352  for (auto proc : cmb_bin.process_set()) {
353  auto cmb_proc = cmb_bin.cp().process({proc});
354  // Method to get the shape uncertainty depends on whether we are using
355  // the sampling method or the "wrong" method (assumes no correlations)
356  std::cout << ">> Doing postfit: " << bin << "," << proc << std::endl;
357  if (skip_proc_errs) {
358  post_shapes[bin][proc] = cmb_proc.GetShape();
359  } else {
360  post_shapes[bin][proc] =
361  no_sampling ? cmb_proc.GetShapeWithUncertainty()
362  : cmb_proc.GetShapeWithUncertainty(res, samples);
363  }
364  }
365  if (!no_sampling && covariance) {
366  post_yield_cov[bin] = cmb_bin.GetRateCovariance(res, samples);
367  post_yield_cor[bin] = cmb_bin.GetRateCorrelation(res, samples);
368  }
369  // Fill the total sig. and total bkg. hists
370  auto cmb_bkgs = cmb_bin.cp().backgrounds();
371  auto cmb_sigs = cmb_bin.cp().signals();
372  std::cout << ">> Doing postfit: " << bin << "," << "TotalBkg" << std::endl;
373  post_shapes[bin]["TotalBkg"] =
374  no_sampling ? cmb_bkgs.GetShapeWithUncertainty()
375  : cmb_bkgs.GetShapeWithUncertainty(res, samples);
376  std::cout << ">> Doing postfit: " << bin << "," << "TotalSig" << std::endl;
377  post_shapes[bin]["TotalSig"] =
378  no_sampling ? cmb_sigs.GetShapeWithUncertainty()
379  : cmb_sigs.GetShapeWithUncertainty(res, samples);
380  std::cout << ">> Doing postfit: " << bin << "," << "TotalProcs" << std::endl;
381  post_shapes[bin]["TotalProcs"] =
382  no_sampling ? cmb_bin.cp().GetShapeWithUncertainty()
383  : cmb_bin.cp().GetShapeWithUncertainty(res, samples);
384 
385  if (datacard != "") {
386  TH1F ref = cmb_card.cp().bin({bin}).GetObservedShape();
387  for (auto & it : post_shapes[bin]) {
388  it.second = ch::RestoreBinning(it.second, ref);
389  }
390  }
391 
392  outfile.cd();
393  // Write the post-fit histograms
394 
395  for (auto const& rbin : reverse_bins_) {
396  if (rbin != bin) continue;
397  std::cout << ">> reversing hists in bin " << bin << "\n";
398  auto & hists = post_shapes[bin];
399  for (auto it = hists.begin(); it != hists.end(); ++it) {
400  ReverseBins(it->second);
401  }
402  }
403 
404  for (auto & iter : post_shapes[bin]) {
405  ch::WriteToTFile(&(iter.second), &outfile,
406  bin + "_postfit/" + iter.first);
407  }
408  for (auto & iter : post_yield_cov) {
409  ch::WriteToTFile(&(iter.second), &outfile,
410  iter.first+"_cov");
411  }
412  for (auto & iter : post_yield_cor) {
413  ch::WriteToTFile(&(iter.second), &outfile,
414  iter.first+"_cor");
415  }
416 
417  }
418 
419  if (factors) {
420  cout << boost::format("\n%-25s %-32s\n") % "Bin" %
421  "Total relative bkg uncert. (postfit)";
422  cout << string(58, '-') << "\n";
423  for (auto bin : bins) {
424  ch::CombineHarvester cmb_bkgs = cmb.cp().bin({bin}).backgrounds();
425  double rate = cmb_bkgs.GetRate();
426  double err = no_sampling ? cmb_bkgs.GetUncertainty()
427  : cmb_bkgs.GetUncertainty(res, samples);
428  cout << boost::format("%-25s %-10.5f") % bin %
429  (rate > 0. ? (err / rate) : 0.) << std::endl;
430  }
431  }
432 
433  // As we calculate the post-fit yields can also print out the post/pre scale
434  // factors
435  if (factors && postfit) {
436  cout << boost::format("\n%-25s %-20s %-10s\n") % "Bin" % "Process" %
437  "Scale factor";
438  cout << string(58, '-') << "\n";
439  for (auto bin : bins) {
440  ch::CombineHarvester cmb_bin = cmb.cp().bin({bin});
441 
442  for (auto proc : cmb_bin.process_set()) {
443  // Print out the post/pre scale factors
444  TH1 const& pre = pre_shapes[bin][proc];
445  TH1 const& post = post_shapes[bin][proc];
446  cout << boost::format("%-25s %-20s %-10.5f\n") % bin % proc %
447  (pre.Integral() > 0. ? (post.Integral() / pre.Integral())
448  : 1.0);
449  }
450  }
451  }
452  }
453  // And we're done!
454  outfile.Close();
455  return 0;
456 }
457 
#define FNERROR(x)
Definition: Logging.h:9
int main(int argc, char *argv[])
void ReverseBins(TH1F &h)
TH2F GetRateCovariance(RooFitResult const &fit, unsigned n_samples)
CombineHarvester & backgrounds()
CombineHarvester & signals()
TH2F GetRateCorrelation(RooFitResult const &fit, unsigned n_samples)
CombineHarvester & process(std::vector< std::string > const &vec, bool cond=true)
void SetFlag(std::string const &flag, bool const &value)
Set a named flag value.
std::set< std::string > process_set()
CombineHarvester & bin(std::vector< std::string > const &vec, bool cond=true)
void UpdateParameters(std::vector< ch::Parameter > const &params)
std::set< std::string > bin_set()
int ParseDatacard(std::string const &filename, std::string const &analysis, std::string const &era, std::string const &channel, int bin_id, std::string const &mass)
CombineHarvester & FilterProcs(Function func)
CombineHarvester cp()
Creates and returns a shallow copy of the CombineHarvester instance.
ch::Parameter const * GetParameter(std::string const &name) const
void set_frozen(bool const &frozen)
Definition: Parameter.h:50
void set_val(double const &val)
Definition: Parameter.h:23
RooAbsReal const * pdf() const
Definition: Process.h:60
RooAbsData const * data() const
Definition: Process.h:63
static std::ostream & PrintHeader(std::ostream &out)
Definition: Process.cc:153
TH1 const * shape() const
Definition: Process.h:52
void WriteToTFile(T *ptr, TFile *file, std::string const &path)
Definition: TFileIO.h:31
TH1F RestoreBinning(TH1F const &src, TH1F const &ref)
Definition: Utilities.cc:181
void ParseCombineWorkspace(CombineHarvester &cb, RooWorkspace &ws, std::string const &modelcfg, std::string const &data, bool verbose=false)