CombineHarvester
MSSMYieldTable.cpp
Go to the documentation of this file.
1 #include <iostream>
2 #include <vector>
3 #include <map>
4 #include "boost/lexical_cast.hpp"
5 #include "boost/algorithm/string.hpp"
6 #include "boost/format.hpp"
7 #include "boost/bind.hpp"
8 #include "boost/program_options.hpp"
12 
13 namespace po = boost::program_options;
14 
15 using namespace std;
16 
17 // Struct holds info on each column in the table
18 struct ColInfo {
19  string label; // Latex title, e.g. "No B-Tag"
20  vector<string> cats_str; // Category bin ids as strings
21  vector<int> cats_int; // Category bin ids as ints
22  string era; // e.g. "8TeV"
23  double lumi; // pb-1
24 };
25 
26 // Struct holds info on each background row in the table
27 struct BkgInfo {
28  string label; // Latex title, e.g. "$\\PW$+jets"
29  vector<string> procs; // Datacard processes to combine for this entry
30  BkgInfo(string const& l, vector<string> const& p) : label(l), procs(p) {}
31 };
32 
33 int main(int argc, char* argv[]) {
34  string channel = "";
35  vector<string> columns;
36  vector<string> eras;
37  vector<string> datacards;
38  vector<string> sig_datacards;
39  string fitresult_file = "";
40  string signal_mass = "";
41  string tanb = "";
42  bool postfit = true;
43  std::string header = "";
44 
45  po::options_description config("Configuration");
46  config.add_options()
47  ("channel",
48  po::value<string>(&channel)->required(), "channel")
49  ("columns",
50  po::value<vector<string>> (&columns)->multitoken()->required(), "labels")
51  ("eras",
52  po::value<vector<string>> (&eras)->multitoken()->required(), "eras")
53  ("datacard,d",
54  po::value<vector<string>> (&datacards)->multitoken()->required())
55  ("sig_datacard,s",
56  po::value<vector<string>> (&sig_datacards)->multitoken()->required())
57  ("fitresult,f",
58  po::value<string> (&fitresult_file))
59  ("signal_mass",
60  po::value<string>(&signal_mass)->required(), "signal_mass")
61  ("tanb",
62  po::value<string>(&tanb)->default_value("8"), "tanb")
63  ("header",
64  po::value<string>(&header)->default_value(""), "header")
65  ("postfit",
66  po::value<bool>(&postfit)->required(), "postfit");
67  po::variables_map vm;
68  po::store(po::command_line_parser(argc, argv)
69  .options(config)
70  .allow_unregistered()
71  .run(),
72  vm);
73  po::notify(vm);
74 
75  double d_tanb = boost::lexical_cast<double>(tanb);
76 
77  // Signal processes
78  vector<string> signal_procs = {"ggH", "bbH"};
79  unsigned n_sig = signal_procs.size();
80 
81  // Eras
82  unsigned n_eras = eras.size();
83  vector<string> v_eras;
84  vector<string> v_eras_lumi;
85  for (unsigned i = 0; i < eras.size(); ++i) {
86  vector<string> tmp;
87  boost::split(tmp, eras[i], boost::is_any_of(":"));
88  v_eras.push_back(tmp.at(0));
89  v_eras_lumi.push_back(tmp.at(1));
90  }
91 
92  // Table columns
93  vector<ColInfo> col_info;
94  col_info.resize(columns.size() * n_eras);
95  for (unsigned k =0; k < n_eras; ++k) {
96  for (unsigned i = 0; i < columns.size(); ++i) {
97  unsigned idx = k * columns.size() + i;
98  ColInfo & info = col_info[idx];
99  vector<string> tmp_split;
100  boost::split(tmp_split, columns[i], boost::is_any_of(":"));
101  info.label = tmp_split[0];
102  boost::split(info.cats_str, tmp_split[1], boost::is_any_of("+"));
103  for (auto const& str : info.cats_str)
104  info.cats_int.push_back(boost::lexical_cast<int>(str));
105  info.era = v_eras[k];
106  info.lumi = boost::lexical_cast<double>(v_eras_lumi[k]);
107  }
108  }
109  unsigned n_cols = col_info.size();
110 
111  // Create some arrays to track to the signal efficiencies
112  double signal_den[n_cols][n_sig];
113  double signal_num[n_cols][n_sig];
114  double signal_eff[n_cols][n_sig];
115 
116  // We can fill the denominator already
117  for (unsigned i = 0; i < n_sig; ++i) {
118  for (unsigned j = 0; j < n_cols; ++j) {
119  signal_den[j][i] = col_info[j].lumi;
120  }
121  }
122 
123  // Channel-specific background configuration
124  vector<BkgInfo> bkgs;
125  vector<string> total_bkg;
126  if (channel == "et" || channel == "mt" || channel == "tt") {
127  bkgs = {
128  BkgInfo("$\\cPZ\\rightarrow \\Pgt\\Pgt$", {"ZTT"}),
129  BkgInfo("QCD", {"QCD"}),
130  BkgInfo("$\\PW$+jets", {"W"}),
131  BkgInfo("$\\cPZ$+jets (l/jet faking $\\Pgt$)", {"ZL", "ZJ"}),
132  BkgInfo("$\\cPqt\\cPaqt$", {"TT"}),
133  BkgInfo("Di-bosons + single top", {"VV"})
134  };
135  total_bkg = {"ZTT", "QCD", "W", "ZL", "ZJ", "TT", "VV"};
136  }
137  if (channel == "em") {
138  bkgs = {
139  BkgInfo("$\\cPZ\\rightarrow \\Pgt\\Pgt$", {"Ztt"}),
140  BkgInfo("QCD", {"Fakes"}),
141  BkgInfo("$\\cPqt\\cPaqt$", {"ttbar"}),
142  BkgInfo("Di-bosons + single top", {"EWK"})
143  };
144  total_bkg = {"Ztt", "Fakes", "ttbar", "EWK"};
145  }
146  if (channel == "mm") {
147  bkgs = {
148  BkgInfo("$\\cPZ\\rightarrow \\Pgt\\Pgt$", {"ZTT"}),
149  BkgInfo("$\\cPZ\\rightarrow \\mu\\mu$", {"ZMM"}),
150  BkgInfo("QCD", {"QCD"}),
151  BkgInfo("$\\cPqt\\cPaqt$", {"TTJ"}),
152  BkgInfo("Di-bosons + single top", {"WJets", "Dibosons"})
153  };
154  total_bkg = {"ZTT", "ZMM", "QCD", "TTJ", "WJets", "Dibosons"};
155  }
156  unsigned n_bkg = bkgs.size();
157 
158  //---------------------------------------------------------------------------
159  // Now we parse the datacards and ROOT files
160  //---------------------------------------------------------------------------
161 
163  for (auto const& d : datacards) {
164  cmb.ParseDatacard(d, "", "", "", 0, signal_mass);
165  }
166 
167  cmb.ForEachObj(boost::bind(ch::SetFromBinName, _1,
168  "$ANALYSIS_$CHANNEL_$BINID_$ERA"));
169 
170  cmb.cp().signals().ForEachProc([&](ch::Process *p) {
171  p->set_rate(p->rate() * d_tanb);
172  });
173 
174  ch::CombineHarvester sig_cmb;
175  for (auto const& d : sig_datacards) {
176  sig_cmb.ParseDatacard(d, "$MASS/$ANALYSIS_$CHANNEL_$BINID_$ERA.txt");
177  }
178 
179  RooFitResult *fitresult = nullptr;
180  if (fitresult_file.length() && postfit) {
181  fitresult =
182  new RooFitResult(ch::OpenFromTFile<RooFitResult>(fitresult_file));
183  auto fitparams = ch::ExtractFitParameters(*fitresult);
184  cmb.UpdateParameters(fitparams);
185  sig_cmb.UpdateParameters(fitparams);
186  }
187  cmb.channel({channel});
188 
189  double bkg_yields[n_cols][n_bkg];
190  double bkg_errors[n_cols][n_bkg];
191  double tot_yields[n_cols];
192  double tot_errors[n_cols];
193  double sig_yields[n_cols];
194  double sig_errors[n_cols];
195  double data_yields[n_cols];
196 
197  // Number of times to sample from the fit covariance matrix
198  unsigned samples = 500;
199 
200  for (unsigned i = 0; i < n_cols; ++i) {
201  ch::CombineHarvester tmp_cmb =
202  std::move(cmb.cp().era({col_info[i].era}).bin_id(col_info[i].cats_int));
203  // tmp_cmb.cp().PrintAll();
204  data_yields[i] = tmp_cmb.cp().GetObservedRate();
205  sig_yields[i] = tmp_cmb.cp().process(signal_procs).GetRate();
206  sig_errors[i] = postfit ?
207  tmp_cmb.cp().process(signal_procs).GetUncertainty(fitresult, samples) :
208  tmp_cmb.cp().process(signal_procs).GetUncertainty();
209  tot_yields[i] = tmp_cmb.cp().process(total_bkg).GetRate();
210  tot_errors[i] = postfit ?
211  tmp_cmb.cp().process(total_bkg).GetUncertainty(fitresult, samples) :
212  tmp_cmb.cp().process(total_bkg).GetUncertainty();
213  for (unsigned j = 0; j < n_bkg; ++j) {
214  bkg_yields[i][j] = tmp_cmb.cp().process(bkgs[j].procs).GetRate();
215  bkg_errors[i][j] = postfit ?
216  tmp_cmb.cp().process(bkgs[j].procs).GetUncertainty(fitresult, samples) :
217  tmp_cmb.cp().process(bkgs[j].procs).GetUncertainty();
218  }
219  for (unsigned k = 0; k < n_sig; ++k) {
220  signal_num[i][k] = sig_cmb.cp()
221  .era({col_info[i].era})
222  .bin_id(col_info[i].cats_int)
223  .process({signal_procs[k]})
224  .era({col_info[i].era})
225  .GetRate();
226  signal_eff[i][k] = signal_num[i][k] / signal_den[i][k];
227  }
228  }
229 
230  cout << "\\begin{tabular}{l";
231  for (unsigned i = 0; i < n_cols; ++i) cout << "r@{$ \\,\\,\\pm\\,\\, $}l";
232  cout << "}\n\\hline\n";
233  cout << boost::format("\\multicolumn{%i}{c}{%s} \\\\\n") %
234  (n_cols * 2 + 1) % header;
235  cout << "\\hline\n";
236  for (unsigned i = 0; i < v_eras.size(); ++i) {
237  std::string fmt = v_eras[i];
238  boost::replace_all(fmt, "TeV", "~\\TeV");
239  std::cout << boost::format("& \\multicolumn{%i}{c}{$\\sqrt{s}$ = %s} ") %
240  (columns.size() * 2) % fmt;
241  }
242  cout << "\\\\\n";
243  cout << "Process";
244  for (unsigned i = 0; i < n_cols; ++i)
245  cout << " & \\multicolumn{2}{c}{" + col_info[i].label + "}";
246  cout << "\\\\\n";
247  cout << "\\hline\n";
248  string fmt = "& %-10.0f & %-10.0f";
249  string fmts = "& %-10.0f & %-10.1f";
250 
251  for (unsigned k = 0; k < bkgs.size(); ++k) {
252  cout << boost::format("%-38s") % bkgs[k].label;
253  for (unsigned i = 0; i < n_cols; ++i)
254  cout << boost::format(bkg_errors[i][k] < 1. ? fmts : fmt) %
255  bkg_yields[i][k] % bkg_errors[i][k];
256  cout << "\\\\\n";
257  }
258  cout << "\\hline\n";
259 
260  cout << boost::format("%-38s") % "Total Background";
261  for (unsigned i = 0; i < n_cols; ++i)
262  cout << boost::format(fmt) % tot_yields[i] % tot_errors[i];
263  cout << "\\\\\n";
264 
265  cout << boost::format("%-38s") % "A+H+h\\,$\\rightarrow\\Pgt\\Pgt$";
266  for (unsigned i = 0; i < n_cols; ++i)
267  cout << boost::format(sig_errors[i] < 1. ? fmts : fmt) % sig_yields[i] %
268  sig_errors[i];
269 
270  cout << "\\\\\n";
271 
272  cout << boost::format("%-38s") % "Data";
273  for (unsigned i = 0; i < n_cols; ++i)
274  cout << boost::format("& \\multicolumn{2}{c}{%-10.0f}") % data_yields[i];
275  cout << "\\\\\n";
276  cout << "\\hline\n";
277 
278  cout << boost::format(
279  "\\multicolumn{%i}{l}{Efficiency $\\times$ acceptance}\\\\\n") %
280  (n_cols*2 + 1);
281  cout << "\\hline\n";
282  cout << boost::format("%-38s") % "gluon-fusion Higgs (160~\\GeV)";
283  for (unsigned i = 0; i < n_cols; ++i) {
284  std::string fmte = (boost::format("%.2e") % signal_eff[i][0]).str();
285  boost::replace_all(fmte, "e-0", "\\cdot 10^{-");
286  fmte = " $" + fmte;
287  fmte = fmte + "}$";
288  cout << boost::format("& \\multicolumn{2}{c}{%s}") % fmte;
289  }
290 
291  cout << "\\\\\n";
292 
293  cout << boost::format("%-38s") % "b-associated Higgs (160~\\GeV)";
294  for (unsigned i = 0; i < n_cols; ++i) {
295  std::string fmte = (boost::format("%.2e") % signal_eff[i][1]).str();
296  boost::replace_all(fmte, "e-0", "\\cdot 10^{-");
297  fmte = " $" + fmte;
298  fmte = fmte + "}$";
299  cout << boost::format("& \\multicolumn{2}{c}{%s}") % fmte; }
300  cout << "\\\\\n";
301  cout << "\\hline\n";
302 
303  cout << "\\end{tabular}\n";
304 
305  return 0;
306 }
int main(int argc, char *argv[])
CombineHarvester & bin_id(std::vector< int > const &vec, bool cond=true)
CombineHarvester & signals()
CombineHarvester & process(std::vector< std::string > const &vec, bool cond=true)
void ForEachProc(Function func)
CombineHarvester & era(std::vector< std::string > const &vec, bool cond=true)
void UpdateParameters(std::vector< ch::Parameter > const &params)
int ParseDatacard(std::string const &filename, std::string const &analysis, std::string const &era, std::string const &channel, int bin_id, std::string const &mass)
CombineHarvester cp()
Creates and returns a shallow copy of the CombineHarvester instance.
void ForEachObj(Function func)
CombineHarvester & channel(std::vector< std::string > const &vec, bool cond=true)
void set_rate(double const &rate)
Definition: Process.h:24
double rate() const
Definition: Process.h:25
std::vector< ch::Parameter > ExtractFitParameters(RooFitResult const &res)
Definition: Utilities.cc:46
void SetFromBinName(ch::Object *input, std::string parse_rules)
Definition: Utilities.cc:96
BkgInfo(string const &l, vector< string > const &p)
string label
vector< string > procs
vector< string > cats_str
string era
double lumi
vector< int > cats_int
string label