CombineHarvester
SMLegacyExample.cpp
Go to the documentation of this file.
1 #include <string>
2 #include <map>
3 #include <set>
4 #include <iostream>
5 #include <vector>
6 #include <utility>
7 #include <cstdlib>
8 #include "boost/filesystem.hpp"
15 
16 using namespace std;
17 
18 int main() {
20 
21  typedef vector<pair<int, string>> Categories;
22  typedef vector<string> VString;
23 
24  string auxiliaries = string(getenv("CMSSW_BASE")) + "/src/auxiliaries/";
25  string aux_shapes = auxiliaries +"shapes/";
26  string aux_pruning = auxiliaries +"pruning/";
27  string input_dir =
28  string(getenv("CMSSW_BASE")) + "/src/CombineHarvester/CombineTools/input";
29 
30  VString chns =
31  {"et", "mt", "em", "ee", "mm", "tt"};
32 
33  map<string, string> input_folders = {
34  {"et", "Imperial"},
35  {"mt", "Imperial"},
36  {"em", "MIT"},
37  {"ee", "DESY-KIT"},
38  {"mm", "DESY-KIT"},
39  {"tt", "CERN"}
40  };
41 
42  map<string, VString> bkg_procs;
43  bkg_procs["et"] = {"ZTT", "W", "QCD", "ZL", "ZJ", "TT", "VV"};
44  bkg_procs["mt"] = {"ZTT", "W", "QCD", "ZL", "ZJ", "TT", "VV"};
45  bkg_procs["em"] = {"Ztt", "EWK", "Fakes", "ttbar", "ggH_hww125", "qqH_hww125"};
46  bkg_procs["ee"] = {"ZTT", "WJets", "QCD", "ZEE", "TTJ", "Dibosons", "ggH_hww125", "qqH_hww125"};
47  bkg_procs["mm"] = {"ZTT", "WJets", "QCD", "ZMM", "TTJ", "Dibosons", "ggH_hww125", "qqH_hww125"};
48  bkg_procs["tt"] = {"ZTT", "W", "QCD", "ZL", "ZJ", "TT", "VV"};
49 
50  VString sig_procs = {"ggH", "qqH", "WH", "ZH"};
51 
52  map<string, Categories> cats;
53  cats["et_7TeV"] = {
54  {1, "eleTau_0jet_medium"}, {2, "eleTau_0jet_high"},
55  {3, "eleTau_1jet_medium"}, {5, "eleTau_1jet_high_mediumhiggs"},
56  {6, "eleTau_vbf"}};
57 
58  cats["et_8TeV"] = {
59  {1, "eleTau_0jet_medium"}, {2, "eleTau_0jet_high"},
60  {3, "eleTau_1jet_medium"}, {5, "eleTau_1jet_high_mediumhiggs"},
61  {6, "eleTau_vbf_loose"}, {7, "eleTau_vbf_tight"}};
62 
63  cats["mt_7TeV"] = {
64  {1, "muTau_0jet_medium"}, {2, "muTau_0jet_high"},
65  {3, "muTau_1jet_medium"}, {4, "muTau_1jet_high_lowhiggs"}, {5, "muTau_1jet_high_mediumhiggs"},
66  {6, "muTau_vbf"}};
67 
68  cats["mt_8TeV"] = {
69  {1, "muTau_0jet_medium"}, {2, "muTau_0jet_high"},
70  {3, "muTau_1jet_medium"}, {4, "muTau_1jet_high_lowhiggs"}, {5, "muTau_1jet_high_mediumhiggs"},
71  {6, "muTau_vbf_loose"}, {7, "muTau_vbf_tight"}};
72 
73  cats["em_7TeV"] = {
74  {0, "emu_0jet_low"}, {1, "emu_0jet_high"},
75  {2, "emu_1jet_low"}, {3, "emu_1jet_high"},
76  {4, "emu_vbf_loose"}};
77 
78  cats["em_8TeV"] = {
79  {0, "emu_0jet_low"}, {1, "emu_0jet_high"},
80  {2, "emu_1jet_low"}, {3, "emu_1jet_high"},
81  {4, "emu_vbf_loose"}, {5, "emu_vbf_tight"}};
82 
83  cats["ee_7TeV"] = {
84  {0, "ee_0jet_low"}, {1, "ee_0jet_high"},
85  {2, "ee_1jet_low"}, {3, "ee_1jet_high"},
86  {4, "ee_vbf"}};
87  cats["ee_8TeV"] = cats["ee_7TeV"];
88 
89  cats["mm_7TeV"] = {
90  {0, "mumu_0jet_low"}, {1, "mumu_0jet_high"},
91  {2, "mumu_1jet_low"}, {3, "mumu_1jet_high"},
92  {4, "mumu_vbf"}};
93  cats["mm_8TeV"] = cats["mm_7TeV"];
94 
95  cats["tt_8TeV"] = {
96  {0, "tauTau_1jet_high_mediumhiggs"}, {1, "tauTau_1jet_high_highhiggs"},
97  {2, "tauTau_vbf"}};
98 
99  vector<string> masses = ch::ValsFromRange("110:145|5");
100 
101  cout << ">> Creating processes and observations...\n";
102  for (string era : {"7TeV", "8TeV"}) {
103  for (auto chn : chns) {
104  cb.AddObservations(
105  {"*"}, {"htt"}, {era}, {chn}, cats[chn+"_"+era]);
106  cb.AddProcesses(
107  {"*"}, {"htt"}, {era}, {chn}, bkg_procs[chn], cats[chn+"_"+era], false);
108  cb.AddProcesses(
109  masses, {"htt"}, {era}, {chn}, sig_procs, cats[chn+"_"+era], true);
110  }
111  }
112  // Have to drop ZL from tautau_vbf category
113  cb.FilterProcs([](ch::Process const* p) {
114  return p->bin() == "tauTau_vbf" && p->process() == "ZL";
115  });
116 
117  cout << ">> Adding systematic uncertainties...\n";
122 
123  cout << ">> Extracting histograms from input root files...\n";
124  for (string era : {"7TeV", "8TeV"}) {
125  for (string chn : chns) {
126  // Skip 7TeV tt:
127  if (chn == "tt" && era == "7TeV") continue;
128  string file = aux_shapes + input_folders[chn] + "/htt_" + chn +
129  ".inputs-sm-" + era + "-hcg.root";
130  cb.cp().channel({chn}).era({era}).backgrounds().ExtractShapes(
131  file, "$BIN/$PROCESS", "$BIN/$PROCESS_$SYSTEMATIC");
132  cb.cp().channel({chn}).era({era}).signals().ExtractShapes(
133  file, "$BIN/$PROCESS$MASS", "$BIN/$PROCESS$MASS_$SYSTEMATIC");
134  }
135  }
136 
138  cout << ">> Scaling signal process rates...\n";
139  map<string, TGraph> xs;
140  // Get the table of H->tau tau BRs vs mass
141  xs["htt"] = ch::TGraphFromTable(input_dir+"/xsecs_brs/htt_YR3.txt", "mH", "br");
142  for (string const& e : {"7TeV", "8TeV"}) {
143  for (string const& p : sig_procs) {
144  // Get the table of xsecs vs mass for process "p" and era "e":
145  xs[p+"_"+e] = ch::TGraphFromTable(input_dir+"/xsecs_brs/"+p+"_"+e+"_YR3.txt", "mH", "xsec");
146  cout << ">>>> Scaling for process " << p << " and era " << e << "\n";
147  cb.cp().process({p}).era({e}).ForEachProc([&](ch::Process *proc) {
148  double m = boost::lexical_cast<double>(proc->mass());
149  proc->set_rate(proc->rate() * xs[p+"_"+e].Eval(m) * xs["htt"].Eval(m));
150  });
151  }
152  }
154  xs["hww_over_htt"] = ch::TGraphFromTable(input_dir+"/xsecs_brs/hww_over_htt.txt", "mH", "ratio");
155  for (string const& e : {"7TeV", "8TeV"}) {
156  for (string const& p : {"ggH", "qqH"}) {
157  cb.cp().channel({"em"}).process({p+"_hww125"}).era({e})
158  .ForEachProc([&](ch::Process *proc) {
159  proc->set_rate(proc->rate() * xs[p+"_"+e].Eval(125.) * xs["htt"].Eval(125.));
160  proc->set_rate(proc->rate() * xs["hww_over_htt"].Eval(125.));
161  });
162  }
163  }
164 
165  cout << ">> Merging bin errors and generating bbb uncertainties...\n";
166 
168  auto bbb = ch::BinByBinFactory()
169  .SetAddThreshold(0.1)
170  .SetMergeThreshold(0.5)
171  .SetFixNorm(true);
173 
174  ch::CombineHarvester cb_et = cb.cp().channel({"et"});
175  bbb.MergeAndAdd(cb_et.cp().era({"7TeV"}).bin_id({1, 2}).process({"ZL", "ZJ", "QCD", "W"}), cb);
176  bbb.MergeAndAdd(cb_et.cp().era({"7TeV"}).bin_id({3, 5}).process({"W"}), cb);
177  bbb.MergeAndAdd(cb_et.cp().era({"8TeV"}).bin_id({1, 2}).process({"ZL", "ZJ", "QCD", "W"}), cb);
178  bbb.MergeAndAdd(cb_et.cp().era({"8TeV"}).bin_id({3, 5}).process({"W"}), cb);
179  bbb.MergeAndAdd(cb_et.cp().era({"7TeV"}).bin_id({6}).process({"ZL", "ZJ", "W", "ZTT"}), cb);
181  bbb.MergeAndAdd(cb_et.cp().era({"8TeV"}).bin_id({6}).process({"ZL", "ZJ", "W"}), cb);
183  bbb.MergeAndAdd(cb_et.cp().era({"8TeV"}).bin_id({7}).process({"ZL", "ZJ", "W", "ZTT"}), cb);
184 
185  ch::CombineHarvester cb_mt = cb.cp().channel({"mt"});
186  bbb.MergeAndAdd(cb_mt.cp().era({"7TeV"}).bin_id({1, 2, 3, 4}).process({"W", "QCD"}), cb);
187  bbb.MergeAndAdd(cb_mt.cp().era({"8TeV"}).bin_id({1, 2, 3, 4}).process({"W", "QCD"}), cb);
188  bbb.MergeAndAdd(cb_mt.cp().era({"7TeV"}).bin_id({5}).process({"W"}), cb);
189  bbb.MergeAndAdd(cb_mt.cp().era({"7TeV"}).bin_id({6}).process({"W", "ZTT"}), cb);
190  bbb.MergeAndAdd(cb_mt.cp().era({"8TeV"}).bin_id({5, 6}).process({"W"}), cb);
191  bbb.MergeAndAdd(cb_mt.cp().era({"8TeV"}).bin_id({7}).process({"W", "ZTT"}), cb);
192 
193  ch::CombineHarvester cb_em = cb.cp().channel({"em"});
194  bbb.MergeAndAdd(cb_em.cp().era({"7TeV"}).bin_id({1, 3}).process({"Fakes"}), cb);
195  bbb.MergeAndAdd(cb_em.cp().era({"8TeV"}).bin_id({1, 3}).process({"Fakes"}), cb);
196  bbb.MergeAndAdd(cb_em.cp().era({"7TeV"}).bin_id({4}).process({"Fakes", "EWK", "Ztt"}), cb);
197  bbb.MergeAndAdd(cb_em.cp().era({"8TeV"}).bin_id({5}).process({"Fakes", "EWK", "Ztt"}), cb);
198  bbb.MergeAndAdd(cb_em.cp().era({"8TeV"}).bin_id({4}).process({"Fakes", "EWK"}), cb);
199 
200  ch::CombineHarvester cb_tt = cb.cp().channel({"tt"});
201  bbb.MergeAndAdd(cb_tt.cp().era({"8TeV"}).bin_id({0, 1, 2}).process({"ZTT", "QCD"}), cb);
202 
203  bbb.SetAddThreshold(0.); // ee and mm use a different threshold
204  ch::CombineHarvester cb_ll = cb.cp().channel({"ee", "mm"});
205  bbb.MergeAndAdd(cb_ll.cp().era({"7TeV"}).bin_id({1, 3, 4}).process({"ZTT", "ZEE", "ZMM", "TTJ"}), cb);
206  bbb.MergeAndAdd(cb_ll.cp().era({"8TeV"}).bin_id({1, 3, 4}).process({"ZTT", "ZEE", "ZMM", "TTJ"}), cb);
207 
208  cout << ">> Setting standardised bin names...\n";
211  VString droplist = ch::ParseFileLines(
212  aux_pruning + "uncertainty-pruning-drop-131128-sm.txt");
213  cout << ">> Droplist contains " << droplist.size() << " entries\n";
214 
215  set<string> to_drop;
216  for (auto x : droplist) to_drop.insert(x);
217 
218  auto pre_drop = cb.syst_name_set();
219  cb.syst_name(droplist, false);
220  auto post_drop = cb.syst_name_set();
221  cout << ">> Systematics dropped: " << pre_drop.size() - post_drop.size()
222  << "\n";
224 
225  // The following is an example of duplicating existing objects and modifying
226  // them in the process. Here we clone all mH=125 signals, adding "_SM125" to
227  // the process name, switching it to background and giving it the generic mass
228  // label. This would let us create a datacard for doing a second Higgs search
229 
230  // ch::CloneProcsAndSysts(cb.cp().signals().mass({"125"}), cb,
231  // [](ch::Object* p) {
232  // p->set_process(p->process() + "_SM125");
233  // p->set_signal(false);
234  // p->set_mass("*");
235  // });
236 
237 
238  // Writing datacards manually - easier with CardWriter below:
239 
240  // string folder = "output/sm_cards/LIMITS";
241  // boost::filesystem::create_directories(folder);
242  // boost::filesystem::create_directories(folder + "/common");
243  // for (auto m : masses) {
244  // boost::filesystem::create_directories(folder + "/" + m);
245  // }
246 
247  // for (string chn : chns) {
248  // TFile output((folder + "/common/htt_" + chn + ".input.root").c_str(),
249  // "RECREATE");
250  // auto bins = cb.cp().channel({chn}).bin_set();
251  // for (auto b : bins) {
252  // for (auto m : masses) {
253  // cout << ">> Writing datacard for bin: " << b << " and mass: " << m
254  // << "\r" << flush;
255  // cb.cp().channel({chn}).bin({b}).mass({m, "*"}).WriteDatacard(
256  // folder + "/" + m + "/" + b + ".txt", output);
257  // }
258  // }
259  // output.Close();
260  // }
261 
262 
263 
264  // Here we define a CardWriter with a template for how the text datacard
265  // and the root files should be named.
266  ch::CardWriter writer("$TAG/$MASS/$ANALYSIS_$CHANNEL_$BINID_$ERA.txt",
267  "$TAG/common/$ANALYSIS_$CHANNEL.input_$ERA.root");
268  // writer.SetVerbosity(1);
269  writer.WriteCards("output/sm_cards/cmb", cb);
270  for (auto chn : cb.channel_set()) {
271  writer.WriteCards("output/sm_cards/" + chn, cb.cp().channel({chn}));
272  }
273 
274  cout << "\n>> Done!\n";
275 }
int main()
Merges bin uncertainties and creates bin-by-bin statistical uncertainties.
Definition: BinByBin.h:21
BinByBinFactory & SetAddThreshold(double val)
Set the fractional bin error threshold for bin-by-bin creation and for participation in the merging a...
Definition: BinByBin.h:99
BinByBinFactory & SetFixNorm(bool fix)
Whether or not the bin-by-bin systematics are allowed to vary the process normalisation.
Definition: BinByBin.h:124
BinByBinFactory & SetMergeThreshold(double val)
The threshold for the merging algorithm.
Definition: BinByBin.h:107
Automates the writing of datacards into directory structures.
Definition: CardWriter.h:50
std::map< std::string, CombineHarvester > WriteCards(std::string const &tag, ch::CombineHarvester &cmb) const
Write datacards according to patterns, substituting $TAG for tag
Definition: CardWriter.cc:96
CombineHarvester & bin_id(std::vector< int > const &vec, bool cond=true)
void AddProcesses(std::vector< std::string > mass, std::vector< std::string > analysis, std::vector< std::string > era, std::vector< std::string > channel, std::vector< std::string > procs, ch::Categories bin, bool signal)
CombineHarvester & process(std::vector< std::string > const &vec, bool cond=true)
CombineHarvester & era(std::vector< std::string > const &vec, bool cond=true)
CombineHarvester & syst_name(std::vector< std::string > const &vec, bool cond=true)
void AddObservations(std::vector< std::string > mass, std::vector< std::string > analysis, std::vector< std::string > era, std::vector< std::string > channel, ch::Categories bin)
std::set< std::string > channel_set()
CombineHarvester & FilterProcs(Function func)
std::set< std::string > syst_name_set()
void ExtractShapes(std::string const &file, std::string const &rule, std::string const &syst_rule)
CombineHarvester cp()
Creates and returns a shallow copy of the CombineHarvester instance.
CombineHarvester & channel(std::vector< std::string > const &vec, bool cond=true)
virtual std::string const & process() const
Definition: Object.h:20
virtual std::string const & bin() const
Definition: Object.h:17
virtual std::string const & mass() const
Definition: Object.h:38
void set_rate(double const &rate)
Definition: Process.h:24
double rate() const
Definition: Process.h:25
std::vector< std::string > ParseFileLines(std::string const &file_name)
Definition: Utilities.cc:224
void AddSystematics_ee_mm(CombineHarvester &cb)
TGraph TGraphFromTable(std::string filename, std::string const &x_column, std::string const &y_column)
Definition: Utilities.cc:121
void SetStandardBinNames(CombineHarvester &cb, std::string const &pattern="$ANALYSIS_$CHANNEL_$BINID_$ERA")
Definition: Utilities.cc:78
std::vector< std::pair< int, std::string > > Categories
void AddSystematics_et_mt(CombineHarvester &cb)
std::vector< std::string > ValsFromRange(std::string const &input, std::string const &fmt="%.0f")
Generate a vector of values using ranges and intervals specified in a string.
Definition: Utilities.cc:281
void AddSystematics_tt(CombineHarvester &cb)
void AddSystematics_em(CombineHarvester &cb)