CombineHarvester
CMSHistFuncFactory.cc
Go to the documentation of this file.
2 #include <iostream>
3 #include <set>
4 #include <vector>
5 #include <string>
6 #include "boost/lexical_cast.hpp"
7 #include "boost/format.hpp"
8 #include "boost/multi_array.hpp"
9 #include "TVector.h"
10 #include "TGraphErrors.h"
11 #include "RooFitResult.h"
12 #include "RooRealVar.h"
13 #include "RooDataHist.h"
14 #include "RooProduct.h"
15 #include "RooConstVar.h"
17 
18 namespace ch {
19 
20 /*
21 TODO:
22 2) Morphing options
23 3) Remove morphing proc errors
24 */
25 
26 CMSHistFuncFactory::CMSHistFuncFactory() : v_(1), hist_mode_(0), rebin_(false) {}
27 
28 void CMSHistFuncFactory::Run(ch::CombineHarvester &cb, RooWorkspace &ws, std::map<std::string, std::string> process_vs_norm_postfix_map) {
29  for (auto const& bin : cb.bin_set()) {
30  for (auto const& proc : cb.cp().bin({bin}).process_set()) {
31  if (v_) {
32  std::cout << ">> Processing " << bin << "," << proc << "\n";
33  }
34  if (process_vs_norm_postfix_map.find(proc) == process_vs_norm_postfix_map.end())
35  {
36  norm_postfix_ = "norm";
37  }
38  else
39  {
40  norm_postfix_ = process_vs_norm_postfix_map[proc];
41  }
42  RunSingleProc(cb, ws, bin, proc);
43  }
44  TH1F data_hist = cb.cp().bin({bin}).GetObservedShape();
45  if (rebin_) data_hist = RebinHist(data_hist);
46  // data_hist.Print("range");
47 
48  RooRealVar *xvar = ws.var(TString::Format("CMS_x_%s", bin.c_str()));
49  RooDataHist rdh_dat(TString::Format("%s_data_obs", bin.c_str()), "",
50  RooArgList(*xvar), &data_hist);
51 
52  ws.import(rdh_dat);
53 
54  cb.cp().bin({bin}).ForEachObs([&](ch::Observation * p) {
55  p->set_shape(nullptr, false);
56  p->set_rate(1.0);
57  });
58  }
59 }
60 
61 void CMSHistFuncFactory::Run(ch::CombineHarvester &cb, RooWorkspace &ws) {
62  CMSHistFuncFactory::Run(cb, ws, {});
63 }
64 
65 void CMSHistFuncFactory::RunSingleProc(CombineHarvester& cb, RooWorkspace& ws,
66  std::string bin, std::string process) {
67  using std::vector;
68  using std::set;
69  using std::string;
70  using boost::lexical_cast;
71  using boost::multi_array;
72  using boost::extents;
73 
74  TString key = bin + "_" + process;
75 
76 
77  CombineHarvester cbp = cb.cp().bin({bin}).process({process});
78  vector<string> m_str_vec = Set2Vec(cbp.SetFromProcs(
79  std::mem_fn(&ch::Process::mass)));
80  unsigned m = m_str_vec.size();
81  vector<double> m_vec;
82  if (m_str_vec.size() == 1) {
83  } else {
84  std::sort(m_str_vec.begin(), m_str_vec.end(),
85  [](string const& s1, string const& s2) {
86  return lexical_cast<double>(s1) < lexical_cast<double>(s2);
87  });
88  for (auto const& s : m_str_vec) {
89  if (v_) std::cout << ">>>> Mass point: " << s << "\n";
90  m_vec.push_back(lexical_cast<double>(s));
91  }
92  }
93 
94  // ss = "shape systematic"
95  // Make a list of the names of shape systematics affecting this process
96  vector<string> ss_vec =
97  Set2Vec(cbp.cp().syst_type({"shape", "shapeU"}).syst_name_set());
98  // Now check if all shape systematics are present for all mass points
99  for (auto const& s : m_str_vec) {
100  if (cbp.cp().syst_type({"shape", "shapeU"}).mass({s}).syst_name_set().size() !=
101  ss_vec.size()) {
102  throw std::runtime_error(FNERROR(
103  "Some mass points do not have the full set of shape systematics, "
104  "this is currently unsupported"));
105  }
106  }
107  unsigned ss = ss_vec.size(); // number of shape systematics
108 
109  vector<string> ls_vec =
110  Set2Vec(cbp.cp().syst_type({"lnN"}).syst_name_set());
111  unsigned ls = ls_vec.size(); // number of lnN systematics
112 
113  // Store pointers to each ch::Process (one per mass point) in the CH instance
114  multi_array<ch::Process *, 1> pr_arr(extents[m]);
115  // Store pointers pointers to each ch::Systematic for each mass point (hence
116  // an ss * m 2D array)
117  multi_array<ch::Systematic *, 2> ss_arr(extents[ss][m]);
118  // With shape systematics we have to support cases where the value in the
119  // datacard is != 1.0, i.e. we are scaling the parameter that goes into the
120  // Gaussian constraint PDF - we have to pass this factor on when we build the
121  // normal vertical-interpolation PDF for each mass point. We will store these
122  // scale factors in this array, one per shape systematic.
123  multi_array<double, 1> ss_scale_arr(extents[ss]);
124  // Really just for book-keeping, we'll set this flag to true when the shape
125  // systematic scale factor != 1
126  multi_array<bool, 1> ss_must_scale_arr(extents[ss]);
127  // Similar to the ss_arr above, store the lnN ch::Systematic objects. Note
128  // implicit in all this is the assumption that the processes at each mass
129  // point have exactly the same list of systematic uncertainties.
130  multi_array<ch::Systematic *, 2> ls_arr(extents[ls][m]);
131 
132  // This array holds pointers to the RooRealVar objects that will become our
133  // shape nuisance parameters, e.g. "CMS_scale_t_mutau_8TeV"
134  multi_array<std::shared_ptr<RooRealVar>, 1> ss_scale_var_arr(extents[ss]);
135  // And this array holds the constant scale factors that we'll build from
136  // ss_scale_arr above
137  multi_array<std::shared_ptr<RooConstVar>, 1> ss_scale_fac_arr(extents[ss]);
138  // Finally this array will contain the scale_var * scale_fac product where we
139  // need to provide a scaled value of the nuisance parameter instead of the
140  // parameter itself in building the vertical-interp. PDF
141  multi_array<std::shared_ptr<RooProduct>, 1> ss_scale_prod_arr(extents[ss]);
142 
143  // Now let's fill some of these arrays...
144  for (unsigned mi = 0; mi < m; ++mi) {
145  // The ch::Process pointers
146  cbp.cp().mass({m_str_vec[mi]}).ForEachProc([&](ch::Process *p) {
147  pr_arr[mi] = p;
148  });
149  for (unsigned ssi = 0; ssi < ss; ++ssi) {
150  // The ch::Systematic pointers for shape systematics
151  cbp.cp().mass({m_str_vec[mi]}).syst_name({ss_vec[ssi]})
152  .ForEachSyst([&](ch::Systematic *n) {
153  ss_arr[ssi][mi] = n;
154  });
155  }
156  for (unsigned lsi = 0; lsi < ls; ++lsi) {
157  // The ch::Systematic pointers for lnN systematics
158  cbp.cp().mass({m_str_vec[mi]}).syst_name({ls_vec[lsi]})
159  .ForEachSyst([&](ch::Systematic *n) {
160  ls_arr[lsi][mi] = n;
161  });
162  }
163  }
164 
166  // We need to build a RooArgList of the vertical morphing parameters for the
167  // vertical-interpolation pdf - this will be the same for each mass point so
168  // we only build it once
169  RooArgList ss_list;
170  for (unsigned ssi = 0; ssi < ss; ++ssi) {
171  // Make the nuisance parameter var. We use shared_ptrs here as a convenience
172  // because they will take care of automatically cleaning up the memory at
173  // the end
174  ss_scale_var_arr[ssi] =
175  std::make_shared<RooRealVar>(ss_vec[ssi].c_str(), "", 0);
176  // We'll make a quick check that the scale factor for this systematic is the
177  // same for all mass points. We could do a separate scaling at each mass
178  // point but this would create a lot of complications
179  set<double> scales;
180  // Insert the scale from each mass point into the set, if it has a size
181  // larger than one at the end then we have a problem!
182  for (unsigned mi = 0; mi < m; ++mi) {
183  scales.insert(ss_arr[ssi][mi]->scale());
184  }
185  if (scales.size() > 1) {
186  // Don't let the user proceed, we can't build the model they want
187  std::runtime_error(FNERROR(
188  "Shape morphing parameters that vary with mass are not allowed"));
189  } else {
190  // Everything ok, set the scale value in its array
191  ss_scale_arr[ssi] = *(scales.begin());
192  // Handle the case where the scale factor is != 1
193  if (std::fabs(ss_scale_arr[ssi] - 1.0) > 1E-6) {
194  ss_must_scale_arr[ssi] = true;
195  // Build the RooConstVar with the value of the scale factor
196  ss_scale_fac_arr[ssi] = std::make_shared<RooConstVar>(
197  TString::Format("%g", ss_scale_arr[ssi]), "",
198  ss_scale_arr[ssi]);
199  // Create the product of the floating nuisance parameter and the
200  // constant scale factor
201  ss_scale_prod_arr[ssi] = std::make_shared<RooProduct>(
202  ss_vec[ssi] + "_scaled_" + key, "",
203  RooArgList(*(ss_scale_var_arr[ssi]), *(ss_scale_fac_arr[ssi])));
204  // Add this to the list
205  ss_list.add(*(ss_scale_prod_arr[ssi]));
206  } else {
207  // If the scale factor is 1.0 then we just add the nuisance parameter
208  // directly to our list
209  ss_list.add(*(ss_scale_var_arr[ssi]));
210  }
211  }
212  }
214  // Summarise the info on the shape systematics and scale factors
215  if (v_) {
216  std::cout << ">> Shape systematics: " << ss << "\n";
217  for (unsigned ssi = 0; ssi < ss; ++ssi) {
218  std::cout << boost::format("%-50s %-5i %-8.3g\n")
219  % ss_vec[ssi] % ss_must_scale_arr[ssi] % ss_scale_arr[ssi];
220  }
221  }
222 
223  // lms = "lnN morphing systematic"
224  // Now we have some work to do with the lnN systematics. We can consider two cases:
225  // a) The uncertainty is the same for each mass point => we can leave it in
226  // the datacard as is and let text2workspace do its normal thing
227  // b) The uncertainty varies between mass points => we can't capture this
228  // information in the text datacard in the usual way, so we'll build a RooFit
229  // object that effectively makes the lnN uncertainty a function of the mass
230  // variable
231  // We'll use "lms" to refer to case b), which we'll try to figure out now...
232  vector<string> lms_vec; // vec of systematic names
233  set<string> lms_set; // set of systematic names
234  // index positions in our full ls_arr array for the lms systematics
235  vector<unsigned > lms_vec_idx;
236  for (unsigned lsi = 0; lsi < ls; ++lsi) {
237  // Extra complication is that the user might have been evil and mixed
238  // symmetric and asymmetric lnN values, we'll try and detect changes in
239  // either
240  set<double> k_hi;
241  set<double> k_lo;
242  // Go through each mass point for this systematic and add the uncertainty
243  // values (so-called "kappa" values)
244  for (unsigned mi = 0; mi < m; ++mi) {
245  Systematic *n = ls_arr[lsi][mi];
246  k_hi.insert(n->value_u());
247  if (n->asymm()) {
248  k_lo.insert(n->value_d());
249  }
250  }
251  // If either of these sets has more than one entry then this is a lms case
252  if (k_hi.size() > 1 || k_lo.size() > 1) {
253  lms_vec.push_back(ls_vec[lsi]);
254  lms_set.insert(ls_vec[lsi]);
255  lms_vec_idx.push_back(lsi);
256  }
257  }
258  unsigned lms = lms_vec.size();
259  // New array for the pointers to the lms Systematic objects
260  multi_array<ch::Systematic *, 2> lms_arr(extents[lms][m]);
261  for (unsigned lmsi = 0; lmsi < lms; ++lmsi) {
262  for (unsigned mi = 0; mi < m; ++mi) {
263  lms_arr[lmsi][mi] = ls_arr[lms_vec_idx[lmsi]][mi];
264  }
265  }
266 
267  // We will need to create the nuisance parameters for these now
268  multi_array<std::shared_ptr<RooRealVar>, 1> lms_var_arr(extents[lms]);
269  for (unsigned lmsi = 0; lmsi < lms; ++lmsi) {
270  lms_var_arr[lmsi] =
271  std::make_shared<RooRealVar>(lms_vec[lmsi].c_str(), "", 0);
272  }
273 
274  // Give a summary of the lms systematics to the user
275  if (v_) {
276  std::cout << ">> lnN morphing systematics: " << lms << "\n";
277  for (unsigned lmsi = 0; lmsi < lms; ++lmsi) {
278  std::cout << ">>>> " << lms_vec[lmsi] << "\n";
279  }
280  }
281 
282  // 2D array of all input histograms, size is (mass points * (nominal +
283  // 2*shape-systs)). The factor of 2 needed for the Up and Down shapes
284  multi_array<std::shared_ptr<TH1F>, 2> hist_arr(extents[m][1+ss*2]);
285  // We also need the array of process yields vs mass, because this will have to
286  // be interpolated too
287  multi_array<double, 1> rate_arr(extents[m]);
288  // Also store the yield uncertainty - we don't actually need this for the signal
289  // model, but will include it in the debug TGraph
290  multi_array<double, 1> rate_unc_arr(extents[m]);
291  // The vertical-interpolation PDF needs the TH1 inputs in the format of a TList
292  multi_array<std::shared_ptr<TList>, 1> list_arr(extents[m]);
293  // Combine always treats the normalisation part of shape systematics as
294  // distinct from the actual shape morphing. Essentially the norm part is
295  // treated as an asymmetric lnN. We have to make the kappa_hi and kappa_lo a
296  // function of the mass parameter too, so we need two more arrays in (ss * m)
297  multi_array<double, 2> ss_k_hi_arr(extents[ss][m]);
298  multi_array<double, 2> ss_k_lo_arr(extents[ss][m]);
299  // For each shape systematic we will build a RooSpline1D, configured to
300  // interpolate linearly between the kappa values
301  multi_array<std::shared_ptr<RooAbsReal>, 1> ss_spl_hi_arr(extents[ss]);
302  multi_array<std::shared_ptr<RooAbsReal>, 1> ss_spl_lo_arr(extents[ss]);
303  // To define the actually process scaling as a function of these kappa values,
304  // we need an AsymPow object per mass point
305  multi_array<std::shared_ptr<AsymPow>, 1> ss_asy_arr(extents[ss]);
306 
307  // Similar set of objects needed for the lms normalisation systematics
308  multi_array<double, 2> lms_k_hi_arr(extents[lms][m]);
309  multi_array<double, 2> lms_k_lo_arr(extents[lms][m]);
310  multi_array<std::shared_ptr<RooSpline1D>, 1> lms_spl_hi_arr(extents[lms]);
311  multi_array<std::shared_ptr<RooSpline1D>, 1> lms_spl_lo_arr(extents[lms]);
312  multi_array<std::shared_ptr<AsymPow>, 1> lms_asy_arr(extents[lms]);
313 
314  for (unsigned mi = 0; mi < m; ++mi) {
315  // Grab the nominal process histograms. We also have to convert every
316  // histogram to a uniform integer binning, because this is what
317  // text2workspace will do for all the non-morphed processes in our datacard,
318  // and we need the binning of these to be in sync.
319  hist_arr[mi][0] =
320  std::make_shared<TH1F>(AsTH1F(pr_arr[mi]->ClonedScaledShape().get()));
321  if (rebin_) *hist_arr[mi][0] = RebinHist(*hist_arr[mi][0]);
322  if (m > 1) {
323  for (int b = 1; b < hist_arr[mi][0]->GetNbinsX() + 1; ++b) {
324  hist_arr[mi][0]->SetBinError(b, 0.);
325  }
326  }
327  // If the user supplied a TFile pointer we'll dump a bunch of info into it
328  // for debugging
329  // if (file) {
330  // file->WriteTObject(pr_arr[mi]->shape(), key + "_" + m_str_vec[mi]);
331  // }
332  // Store the process rate
333  rate_arr[mi] = 1.;
334  // auto proc_hist = pr_arr[mi]->ClonedScaledShape();
335  // proc_hist->IntegralAndError(1, proc_hist->GetNbinsX(), rate_unc_arr[mi]);
336  // Do the same for the Up and Down shapes
337  for (unsigned ssi = 0; ssi < ss; ++ssi) {
338  hist_arr[mi][1 + 2 * ssi] =
339  std::make_shared<TH1F>(AsTH1F(ss_arr[ssi][mi]->shape_u()));
340  hist_arr[mi][2 + 2 * ssi] =
341  std::make_shared<TH1F>(AsTH1F(ss_arr[ssi][mi]->shape_d()));
342  if (rebin_) *hist_arr[mi][1 + 2 * ssi] = RebinHist(*hist_arr[mi][1 + 2 * ssi]);
343  if (rebin_) *hist_arr[mi][2 + 2 * ssi] = RebinHist(*hist_arr[mi][2 + 2 * ssi]);
344  TH1F* h_hi = hist_arr[mi][1 + 2 * ssi].get();
345  TH1F* h_lo = hist_arr[mi][2 + 2 * ssi].get();
346  if (h_hi->Integral() > 0.) {
347  h_hi->Scale(hist_arr[mi][0]->Integral() / h_hi->Integral());
348  }
349  if (h_lo->Integral() > 0.) {
350  h_lo->Scale(hist_arr[mi][0]->Integral() / h_lo->Integral());
351  }
352 
353  // TODO: Need to implement the logic of normalising shape variations here
354 
355  // if (file) {
356  // file->WriteTObject(ss_arr[ssi][mi]->shape_u(),
357  // key + "_" + m_str_vec[mi] + "_" + ss_vec[ssi] + "Up");
358  // file->WriteTObject(ss_arr[ssi][mi]->shape_d(),
359  // key + "_" + m_str_vec[mi] + "_" + ss_vec[ssi] + "Down");
360  // }
361  // Store the uncertainty ("kappa") values for the shape systematics
362  ss_k_hi_arr[ssi][mi] = ss_arr[ssi][mi]->value_u();
363  ss_k_lo_arr[ssi][mi] = ss_arr[ssi][mi]->value_d();
364  // For the normalisation we scale the kappa instead of putting the scaling
365  // parameter as the variable
366  if (std::fabs(ss_scale_arr[ssi] - 1.0) > 1E-6) {
367  ss_k_hi_arr[ssi][mi] = std::pow(ss_arr[ssi][mi]->value_u(), ss_scale_arr[ssi]);
368  ss_k_lo_arr[ssi][mi] = std::pow(ss_arr[ssi][mi]->value_d(), ss_scale_arr[ssi]);
369  }
370  }
371  // And now the uncertainty values for the lnN systematics that vary with mass
372  // We'll force these to be asymmetric even if they're not
373  for (unsigned lmsi = 0; lmsi < lms; ++lmsi) {
374  lms_k_hi_arr[lmsi][mi] = lms_arr[lmsi][mi]->value_u();
375  if (lms_arr[lmsi][mi]->asymm()) {
376  lms_k_lo_arr[lmsi][mi] = lms_arr[lmsi][mi]->value_d();
377  } else {
378  lms_k_lo_arr[lmsi][mi] = 1. / lms_arr[lmsi][mi]->value_u();
379  }
380  }
381  }
382  // Now we'll fill these objects..
383 
384 
385  // Now we've made all our histograms, we'll put them in the TList format
386  // of [nominal, syst_1_Up, syst_1_Down, ... , syst_N_Up, syst_N_Down]
387  // for (unsigned mi = 0; mi < m; ++mi) {
388  // list_arr[mi] = std::make_shared<TList>();
389  // for (unsigned xi = 0; xi < (1 + ss * 2); ++xi) {
390  // list_arr[mi]->Add(hist_arr[mi][xi].get());
391  // }
392  // }
393 
394  // Print the values of the yields and kappa factors that will be inputs
395  // to our spline interpolation objects
396  if (v_) {
397  for (unsigned mi = 0; mi < m; ++mi) {
398  std::cout << boost::format("%-10s") % m_str_vec[mi];
399  }
400  std::cout << "\n";
401  for (unsigned mi = 0; mi < m; ++mi) {
402  std::cout << boost::format("%-10.5g") % rate_arr[mi];
403  }
404  std::cout << "\n";
405  for (unsigned ssi = 0; ssi < ss; ++ssi) {
406  std::cout << ss_vec[ssi] << " Up" << std::endl;
407  for (unsigned mi = 0; mi < m; ++mi) {
408  std::cout << boost::format("%-10.5g") % ss_k_hi_arr[ssi][mi];
409  }
410  std::cout << "\n";
411  std::cout << ss_vec[ssi] << " Down" << std::endl;
412  for (unsigned mi = 0; mi < m; ++mi) {
413  std::cout << boost::format("%-10.5g") % ss_k_lo_arr[ssi][mi];
414  }
415  std::cout << "\n";
416  }
417  for (unsigned lmsi = 0; lmsi < lms; ++lmsi) {
418  std::cout << lms_vec[lmsi] << " Up" << std::endl;
419  for (unsigned mi = 0; mi < m; ++mi) {
420  std::cout << boost::format("%-10.5g") % lms_k_hi_arr[lmsi][mi];
421  }
422  std::cout << "\n";
423  std::cout << lms_vec[lmsi] << " Down" << std::endl;
424  for (unsigned mi = 0; mi < m; ++mi) {
425  std::cout << boost::format("%-10.5g") % lms_k_lo_arr[lmsi][mi];
426  }
427  std::cout << "\n";
428  }
429  }
430 
431  // Can do more sophistical spline interpolation if we want, but let's use
432  // simple LINEAR interpolation for now
433  TString interp = "LINEAR";
434 
435  // Here when the force_template_limit option is requested
436  // we have to add some extra terms to the vector of masses to ensure that
437  // the signal pdf goes to 0 outside of the MC template range.
438 
439  vector<double> new_m_vec(m_vec);
440  // Insert an entry at either end of the vector for a mass just slightly
441  // outside of the range
442  if (m > 1) {
443  new_m_vec.insert(new_m_vec.begin(),m_vec[0]-1E-6);
444  new_m_vec.push_back(m_vec[m-1]+1E-6);
445  }
446  // Create a corresponding rate array with 0 entries for these new masses
447  multi_array<double, 1> new_rate_arr(extents[m+2]);
448  new_rate_arr[0] = 0.0;
449  // for(unsigned i = 0; i < m; ++i) new_rate_arr[i+1] = rate_arr[i] ;
450 
451  // With CMSHistFunc the normalisation is handled internally
452  for(unsigned i = 0; i < m; ++i) new_rate_arr[i+1] = 1.0;
453  new_rate_arr[m+1] = 0.0;
454 
455  bool force_template_limit = false;
456 
457  // if (v_ && force_template_limit) {
458  // std::cout << ">>>> Forcing rate to 0 outside of template range:" << "\n";
459  // for(unsigned mi = 0; mi < m+2; ++mi) {
460  // std::cout << boost::format("%-10.5g") % new_m_vec[mi];
461  // }
462  // std::cout << "\n";
463  // for(unsigned mi = 0; mi < m+2; ++mi) {
464  // std::cout << boost::format("%-10.5g") % new_rate_arr[mi];
465  // }
466  // std::cout << "\n";
467  // }
468  // Create the 1D spline directly from the rate array
470  std::shared_ptr<RooSpline1D> rate_spline;
471  if (m > 1) rate_spline = std::make_shared<RooSpline1D>("interp_rate_"+key, "", *mass_var[process],
472  force_template_limit ? m+2 : m,
473  force_template_limit ? new_m_vec.data() : m_vec.data(),
474  force_template_limit ? new_rate_arr.data() : rate_arr.data(),
475  interp);
476  // if (file) {
477  // TGraphErrors tmp(m, m_vec.data(), rate_arr.data(), nullptr, rate_unc_arr.data());
478  // file->WriteTObject(&tmp, "interp_rate_"+key);
479  // }
480  // Collect all terms that will go into the total normalisation:
481  // nominal * systeff_1 * systeff_2 * ... * systeff_N
482  RooArgList rate_prod;
483  if (m > 1) {
484  rate_prod.add(*rate_spline);
485  }
486  // For each shape systematic build a 1D spline for kappa_hi and kappa_lo
487  for (unsigned ssi = 0; ssi < ss; ++ssi) {
488  if (m > 1) {
489  ss_spl_hi_arr[ssi] = std::make_shared<RooSpline1D>("spline_hi_" +
490  key + "_" + ss_vec[ssi], "", *mass_var[process], m, m_vec.data(),
491  ss_k_hi_arr[ssi].origin(), interp);
492  ss_spl_lo_arr[ssi] = std::make_shared<RooSpline1D>("spline_lo_" +
493  key + "_" + ss_vec[ssi], "", *mass_var[process], m, m_vec.data(),
494  ss_k_lo_arr[ssi].origin(), interp);
495  } else {
496  ss_spl_hi_arr[ssi] = std::make_shared<RooConstVar>(TString::Format("%g", ss_k_hi_arr[ssi][0]), "", ss_k_hi_arr[ssi][0]);
497  ss_spl_lo_arr[ssi] = std::make_shared<RooConstVar>(TString::Format("%g", ss_k_lo_arr[ssi][0]), "", ss_k_lo_arr[ssi][0]);
498  }
499  ss_asy_arr[ssi] = std::make_shared<AsymPow>("systeff_" +
500  key + "_" + ss_vec[ssi], "",
501  *(ss_spl_lo_arr[ssi]), *(ss_spl_hi_arr[ssi]),
502  *(ss_scale_var_arr[ssi]));
503  // if (file) {
504  // TGraph tmp_hi(m, m_vec.data(), ss_k_hi_arr[ssi].origin());
505  // file->WriteTObject(&tmp_hi, "spline_hi_" + key + "_" + ss_vec[ssi]);
506  // TGraph tmp_lo(m, m_vec.data(), ss_k_lo_arr[ssi].origin());
507  // file->WriteTObject(&tmp_lo, "spline_lo_" + key + "_" + ss_vec[ssi]);
508  // }
509  // Then build the AsymPow object for each systematic as a function of the
510  // kappas and the nuisance parameter
511  rate_prod.add(*(ss_asy_arr[ssi]));
512  }
513  // Same procedure for the lms normalisation systematics: build the splines
514  // then the AsymPows and add to the rate_prod list
515  for (unsigned lmsi = 0; lmsi < lms; ++lmsi) {
516  lms_spl_hi_arr[lmsi] = std::make_shared<RooSpline1D>("spline_hi_" +
517  key + "_" + lms_vec[lmsi], "", *mass_var[process], m, m_vec.data(),
518  lms_k_hi_arr[lmsi].origin(), interp);
519  lms_spl_lo_arr[lmsi] = std::make_shared<RooSpline1D>("spline_lo_" +
520  key + "_" + lms_vec[lmsi], "", *mass_var[process], m, m_vec.data(),
521  lms_k_lo_arr[lmsi].origin(), interp);
522  lms_asy_arr[lmsi] = std::make_shared<AsymPow>("systeff_" +
523  key + "_" + lms_vec[lmsi], "", *(lms_spl_lo_arr[lmsi]),
524  *(lms_spl_hi_arr[lmsi]), *(lms_var_arr[lmsi]));
525  rate_prod.add(*(lms_asy_arr[lmsi]));
526  }
527  // We'll come back to this rate_prod a bit later.
528 
529  // Now some fun with binning. We anticipate that the process histograms we
530  // have been supplied could have a finer binning than is actually wanted for
531  // the analysis (and the fit), in order to avoid known problems with the RMS
532  // of a peaking distribution not being morphed smoothly from mass point to
533  // mass point if the binning is too wide. The RooMorphingPdf will handle
534  // re-binning on the fly, but we have to tell it how to rebin. To do this we
535  // assume the observed data histogram has the target binning.
536  TH1F data_hist = cbp.GetObservedShape();
537  TH1F proc_hist = cbp.GetShape();
538  // The x-axis variable has to be called "CMS_th1x", as this is what
539  // text2workspace will use for all the normal processes
540  // data_hist.Print("range");
541  TString xvar_name = TString::Format("CMS_x_%s", bin.c_str());
542  RooRealVar xvar = VarFromHist(xvar_name, xvar_name, data_hist);
543  // RooRealVar xvar("CMS_th1x", "CMS_th1x", 0,
544  // static_cast<float>(data_hist.GetNbinsX()));
545  // xvar.setBins(data_hist.GetNbinsX());
546 
547  // Create a second x-axis variable, named specific to the bin, that will be
548  // for the finer-binned input
549  // RooRealVar morph_xvar(("CMS_th1x_"+bin).c_str(), "", 0,
550  // static_cast<float>(proc_hist.GetNbinsX()));
551  // We're not going to need roofit to evaluate anything as a function of this
552  // morphing x-axis variable, so we set it constant
553  // morph_xvar.setConstant();
554  // morph_xvar.setBins(proc_hist.GetNbinsX());
555  if (v_) {
556  xvar.Print();
557  // morph_xvar.Print();
558  }
559 
560  // Follow what ShapeTools.py does and set the smoothing region
561  // to the minimum of all of the shape scales
562  double qrange = 1.;
563  for (unsigned ssi = 0; ssi < ss; ++ssi) {
564  if (ss_scale_arr[ssi] < qrange) qrange = ss_scale_arr[ssi];
565  }
566 
567 
568  TString morph_name = key + "_morph";
569  // At long last, we can build our pdf, giving it:
570  // xvar : the fixed "CMS_th1x" x-axis variable with uniform integer binning
571  // mass_var: the floating mass value for the interpolation
572  // vpdf_list: the list of vertical-interpolation pdfs
573  // m_vec: the corresponding list of mass points
574  // allow_morph: if false will just evaluate to the closest pdf in mass
575  // data_hist.GetXaxis(): The original (non-uniform) target binning
576  // proc_hist.GetXaxis(): The original (non-uniform) morphing binning
578  CMSHistFunc morph_func(morph_name, "", xvar, *hist_arr[0][0]);
579  morph_func.setVerticalSmoothRegion(qrange);
580  morph_func.setHorizontalType(CMSHistFunc::HorizontalType::Integral);
581  morph_func.setVerticalMorphs(ss_list);
582  if (m > 1) {
583  morph_func.addHorizontalMorph(*mass_var[process], TVectorD(m, m_vec.data()));
584  }
585  morph_func.prepareStorage();
586 
587  for (unsigned mi = 0; mi < m; ++mi) {
588  morph_func.setShape(0, mi, 0, 0, *hist_arr[mi][0]);
589  for (unsigned ssi = 0; ssi < ss; ++ssi) {
590  morph_func.setShape(0, mi, ssi + 1, 0, *hist_arr[mi][2 + 2 * ssi]);
591  morph_func.setShape(0, mi, ssi + 1, 1, *hist_arr[mi][1 + 2 * ssi]);
592  }
593  }
594  // And we can make the final normalisation product
595  // The value of norm_postfix is very important. text2workspace will only look
596  // for for a term with the pdf name + "_norm". But it might be the user wants
597  // to add even more terms to this total normalisation, so we give them the option
598  // of using some other suffix.
600  RooProduct morph_rate(morph_name + "_" + TString(norm_postfix_), "",
601  rate_prod);
603 
604  // Dump even more plots
605  // if (file) MakeMorphDebugPlots(&morph_pdf, &mass_var, m_vec, file, &data_hist);
606 
607  // Load our pdf and norm objects into the workspace
608  ws.import(morph_func, RooFit::RecycleConflictNodes());
609  if (rate_prod.getSize() > 0) {
610  ws.import(morph_rate, RooFit::RecycleConflictNodes());
611  } else {
612  std::cout << "No normalisation term to import!\n";
613  }
614 
615  // if (!verbose) RooMsgService::instance().setGlobalKillBelow(backup_msg_level);
616 //
617  // Now we can clean up the CH instance a bit
618  // We only need one entry for each Process or Systematic now, not one per mass point
619  // We'll modify the first mass point to house our new pdf and norm, and drop
620  // the rest.
621  std::string mass_min = m_str_vec.at(0);
622 
623  // Dump Process entries for other mass points
624  cb.FilterProcs([&](ch::Process const* p) {
625  return p->bin() == bin && p->process() == process && p->mass() != mass_min;
626  });
627  // Dump Systematic entries for other mass points, but only if the type is shape
628  // or a lnN that we found varied as a function of the mass. We've already built
629  // these uncertainties into our normalisation term. Constant lnN systematics can
630  // remain in the datacard and be added by text2workspace
631  cb.FilterSysts([&](ch::Systematic const* n) {
632  return (n->bin() == bin && n->process() == process) &&
633  ((n->mass() != mass_min) || (n->type() == "shape" || n->type() == "shapeU") ||
634  (lms_set.count(n->name())));
635  });
636  // With the remaining Process entry (should only be one if we did this right),
637  // Make the mass generic ("*"), drop the TH1 and set the rate to 1.0, as this
638  // will now be read from our norm object
639  cb.ForEachProc([&](ch::Process * p) {
640  if (p->bin() == bin && p->process() == process) {
641  if (m > 1) {
642  p->set_mass("*");
643  }
644  p->set_shape(nullptr, false);
645  p->set_rate(1.0);
646  }
647  });
648  // Just declare the mass to be generic for the remaining systematics
649  if (m > 1) {
650  cb.ForEachSyst([&](ch::Systematic * n) {
651  if (n->bin() == bin && n->process() == process) {
652  n->set_mass("*");
653  }
654  });
655  }
656 
657  // And we're done, but note that we haven't populated the Process entry with
658  // the PDF or norm objects we created, as we assume the user has more work to
659  // do on their RooWorkspace before copying into the CH instance. Once they've
660  // imported the workspace:
661  //
662  // cb.AddWorkspace(ws);
663  //
664  // They can populate the Process entries, e.g. if all signals were replaced
665  // with Morphing PDFs:
666  //
667  // cb.cp().signals().ExtractPdfs(cb, "htt", "$BIN_$PROCESS_morph");
669 }
670 
671 // }
672 
673 // void MakeMorphDebugPlots(RooMorphingPdf* pdf, RooAbsReal* mass,
674 // std::vector<double> const& masses, TFile* f, TH1 *ref_bins) {
675 // RooRealVar *rrv = dynamic_cast<RooRealVar*>(mass);
676 // if (!rrv) return;
677 // f->cd();
678 // f->mkdir(pdf->GetName());
679 // gDirectory->cd(pdf->GetName());
680 // for (unsigned i = 0; i < masses.size(); ++i) {
681 // rrv->setVal(masses[i]);
682 // TH1 * h = pdf->createHistogram("CMS_th1x");
683 // h->AddDirectory(false);
684 // TH1 * h2 = nullptr;
685 // if (ref_bins) {
686 // h2 = (TH1*)ref_bins->Clone();
687 // h2->Reset();
688 // for (int x = 1; x <= h2->GetNbinsX(); ++x) {
689 // h2->SetBinContent(x, h->GetBinContent(x));
690 // }
691 // } else {
692 // h2 = h;
693 // h = nullptr;
694 // }
695 // h2->AddDirectory(false);
696 // h2->SetName(TString::Format("actual_point_%g", masses[i]));
697 // gDirectory->WriteTObject(h2);
698 // if (h) delete h;
699 // if (h2) delete h2;
700 // }
701 // double m = masses.front();
702 // double step = 1;
703 // if (((masses.back() - masses.front()) / step) > 100.) step = step * 10.;
704 // if (((masses.back() - masses.front()) / step) > 100.) step = step * 10.;
705 // if (((masses.back() - masses.front()) / step) < 10.) step = step/10.;
706 // while (m <= masses.back()) {
707 // rrv->setVal(m);
708 // TH1* hm = pdf->createHistogram("CMS_th1x");
709 // hm->AddDirectory(false);
710 // TH1 * hm2 = nullptr;
711 // if (ref_bins) {
712 // hm2 = (TH1*)ref_bins->Clone();
713 // hm2->Reset();
714 // for (int x = 1; x <= hm2->GetNbinsX(); ++x) {
715 // hm2->SetBinContent(x, hm->GetBinContent(x));
716 // }
717 // } else {
718 // hm2 = hm;
719 // hm = nullptr;
720 // }
721 // hm2->AddDirectory(false);
722 // hm2->SetName(TString::Format("morph_point_%g", m));
723 // //It struggles with m=0, instead adds really small number close to 0
724 // if(fabs(m) < 1E-5) hm2->SetName(TString::Format("morph_point_0"));
725 // gDirectory->WriteTObject(hm2);
726 // if (hm) delete hm;
727 // if (hm2) delete hm2;
728 // m += step;
729 // }
730 // f->cd();
731 // }
732 }
#define FNERROR(x)
Definition: Logging.h:9
void Run(CombineHarvester &cb, RooWorkspace &ws, std::map< std::string, std::string > process_vs_norm_postfix_map)
CombineHarvester & mass(std::vector< std::string > const &vec, bool cond=true)
void ForEachSyst(Function func)
void ForEachProc(Function func)
std::set< std::string > process_set()
std::set< R > SetFromProcs(T func)
Fill an std::set using only the Process entries.
CombineHarvester & bin(std::vector< std::string > const &vec, bool cond=true)
std::set< std::string > bin_set()
CombineHarvester & FilterSysts(Function func)
CombineHarvester & FilterProcs(Function func)
std::set< std::string > syst_name_set()
CombineHarvester cp()
Creates and returns a shallow copy of the CombineHarvester instance.
CombineHarvester & syst_type(std::vector< std::string > const &vec, bool cond=true)
virtual std::string const & process() const
Definition: Object.h:20
virtual std::string const & bin() const
Definition: Object.h:17
virtual std::string const & mass() const
Definition: Object.h:38
void set_rate(double const &rate)
Definition: Observation.h:20
void set_shape(std::unique_ptr< TH1 > shape, bool set_rate)
Definition: Observation.cc:66
void set_rate(double const &rate)
Definition: Process.h:24
void set_shape(std::unique_ptr< TH1 > shape, bool set_rate)
Definition: Process.cc:81
std::string const & type() const
Definition: Systematic.h:25
std::string const & name() const
Definition: Systematic.h:22
Definition: Algorithm.h:10
TH1F RebinHist(TH1F const &hist)
Definition: Utilities.cc:170
std::vector< T > Set2Vec(std::set< T > const &in)
Definition: Utilities.h:101