CombineHarvester
MorphFunctions.cc
Go to the documentation of this file.
2 #include <iostream>
3 #include <set>
4 #include <vector>
5 #include <string>
6 #include "boost/lexical_cast.hpp"
7 #include "boost/format.hpp"
8 #include "boost/multi_array.hpp"
9 #include "TGraphErrors.h"
10 #include "RooFitResult.h"
11 #include "RooRealVar.h"
12 #include "RooDataHist.h"
13 #include "RooProduct.h"
14 #include "RooConstVar.h"
16 
17 namespace ch {
18 
20 std::string BuildRooMorphing(RooWorkspace& ws, CombineHarvester& cb,
21  std::string const& bin, std::string const& process,
22  RooAbsReal& mass_var, std::string norm_postfix,
23  bool allow_morph, bool verbose, bool force_template_limit, TFile * file) {
25  // To keep the code concise we'll make some using-declarations here
26  using std::set;
27  using std::vector;
28  using std::string;
29  using boost::lexical_cast;
30  using boost::multi_array;
31  using boost::extents;
32 
33  // RooFit can be rather noisy on stdout, unless verbose = true we'll kill
34  // everything that's not an error, and restore the user's MsgLevel at the end
35  RooFit::MsgLevel backup_msg_level =
36  RooMsgService::instance().globalKillBelow();
37  if (!verbose) RooMsgService::instance().setGlobalKillBelow(RooFit::WARNING);
38 
39  // Everything here works on the assumption that the bin,process,mass
40  // combinations we find in this CH instance refer to single processes. So bin
41  // should uniquely label an event category, and mass should be defined and
42  // convertible from string to float for every process entry.
43  if (verbose) {
44  std::cout << ">> Bin: " << bin << " Process: " << process << "\n";
45  }
46  TString key = bin + "_" + process;
47 
48  // For the sake of efficiency, take a filtered shallow-copy of the input CH
49  // instance that just contains the bin,process combination we've been asked to
50  // turn into a RooMorphingPdf
51  CombineHarvester cb_bp = cb.cp().bin({bin}).process({process});
52 
54  // Get a vector of the mass values
55  vector<string> m_str_vec = Set2Vec(cb_bp.SetFromProcs(
56  std::mem_fn(&ch::Process::mass)));
57 
58  // We now sort these based on numerical value, hence the lexical_cast to double
59  // At the moment this also serves as our check that all mass values are
60  // float-convertible, as an exception will be thrown by lexical_cast if not.
61  std::sort(m_str_vec.begin(), m_str_vec.end(),
62  [](string const& s1, string const& s2) {
63  return lexical_cast<double>(s1) < lexical_cast<double>(s2);
64  });
65 
66  // Convert the sorted vector of mass strings to an actual vector of doubles
67  vector<double> m_vec;
68  for (auto const& s : m_str_vec) {
69  if (verbose) std::cout << ">>>> Mass point: " << s << "\n";
70  m_vec.push_back(lexical_cast<double>(s));
71  }
72  // So, we have m mass points to consider
73  unsigned m = m_vec.size();
75 
76  // ss = "shape systematic"
77  // Make a list of the names of shape systematics affecting this process
78  vector<string> ss_vec =
79  Set2Vec(cb_bp.cp().syst_type({"shape"}).syst_name_set());
80  // Now check if all shape systematics are present for all mass points
81  for (auto const& s : m_str_vec) {
82  if (cb_bp.cp().syst_type({"shape"}).mass({s}).syst_name_set().size() !=
83  ss_vec.size()) {
84  throw std::runtime_error(FNERROR(
85  "Some mass points do not have the full set of shape systematics, "
86  "this is currently unsupported"));
87  }
88  }
89 
90  unsigned ss = ss_vec.size(); // number of shape systematics
91 
92  // ls = "lnN systematic"
93  // Make a list of the regular lnN normalisation systematics affecting this
94  // process
95  vector<string> ls_vec =
96  Set2Vec(cb_bp.cp().syst_type({"lnN"}).syst_name_set());
97  unsigned ls = ls_vec.size(); // number of lnN systematics
98 
99  // Create a bunch of empty arrays to store the information we need in a more
100  // convenient format. We use the boost multi_array class because it's a nice
101  // multi-dimensional array implementation, and safer to use than standard
102  // C-arrays
103 
104  // Store pointers to each ch::Process (one per mass point) in the CH instance
105  multi_array<ch::Process *, 1> pr_arr(extents[m]);
106  // Store pointers pointers to each ch::Systematic for each mass point (hence
107  // an ss * m 2D array)
108  multi_array<ch::Systematic *, 2> ss_arr(extents[ss][m]);
109  // With shape systematics we have to support cases where the value in the
110  // datacard is != 1.0, i.e. we are scaling the parameter that goes into the
111  // Gaussian constraint PDF - we have to pass this factor on when we build the
112  // normal vertical-interpolation PDF for each mass point. We will store these
113  // scale factors in this array, one per shape systematic.
114  multi_array<double, 1> ss_scale_arr(extents[ss]);
115  // Really just for book-keeping, we'll set this flag to true when the shape
116  // systematic scale factor != 1
117  multi_array<bool, 1> ss_must_scale_arr(extents[ss]);
118  // Similar to the ss_arr above, store the lnN ch::Systematic objects. Note
119  // implicit in all this is the assumption that the processes at each mass
120  // point have exactly the same list of systematic uncertainties.
121  multi_array<ch::Systematic *, 2> ls_arr(extents[ls][m]);
122 
123  // This array holds pointers to the RooRealVar objects that will become our
124  // shape nuisance parameters, e.g. "CMS_scale_t_mutau_8TeV"
125  multi_array<std::shared_ptr<RooRealVar>, 1> ss_scale_var_arr(extents[ss]);
126  // And this array holds the constant scale factors that we'll build from
127  // ss_scale_arr above
128  multi_array<std::shared_ptr<RooConstVar>, 1> ss_scale_fac_arr(extents[ss]);
129  // Finally this array will contain the scale_var * scale_fac product where we
130  // need to provide a scaled value of the nuisance parameter instead of the
131  // parameter itself in building the vertical-interp. PDF
132  multi_array<std::shared_ptr<RooProduct>, 1> ss_scale_prod_arr(extents[ss]);
133 
134 
135  // Now let's fill some of these arrays...
136  for (unsigned mi = 0; mi < m; ++mi) {
137  // The ch::Process pointers
138  cb_bp.cp().mass({m_str_vec[mi]}).ForEachProc([&](ch::Process *p) {
139  pr_arr[mi] = p;
140  });
141  for (unsigned ssi = 0; ssi < ss; ++ssi) {
142  // The ch::Systematic pointers for shape systematics
143  cb_bp.cp().mass({m_str_vec[mi]}).syst_name({ss_vec[ssi]})
144  .ForEachSyst([&](ch::Systematic *n) {
145  ss_arr[ssi][mi] = n;
146  });
147  }
148  for (unsigned lsi = 0; lsi < ls; ++lsi) {
149  // The ch::Systematic pointers for lnN systematics
150  cb_bp.cp().mass({m_str_vec[mi]}).syst_name({ls_vec[lsi]})
151  .ForEachSyst([&](ch::Systematic *n) {
152  ls_arr[lsi][mi] = n;
153  });
154  }
155  }
156 
158  // We need to build a RooArgList of the vertical morphing parameters for the
159  // vertical-interpolation pdf - this will be the same for each mass point so
160  // we only build it once
161  RooArgList ss_list;
162  for (unsigned ssi = 0; ssi < ss; ++ssi) {
163  // Make the nuisance parameter var. We use shared_ptrs here as a convenience
164  // because they will take care of automatically cleaning up the memory at
165  // the end
166  ss_scale_var_arr[ssi] =
167  std::make_shared<RooRealVar>(ss_vec[ssi].c_str(), "", 0);
168  // We'll make a quick check that the scale factor for this systematic is the
169  // same for all mass points. We could do a separate scaling at each mass
170  // point but this would create a lot of complications
171  set<double> scales;
172  // Insert the scale from each mass point into the set, if it has a size
173  // larger than one at the end then we have a problem!
174  for (unsigned mi = 0; mi < m; ++mi) {
175  scales.insert(ss_arr[ssi][mi]->scale());
176  }
177  if (scales.size() > 1) {
178  // Don't let the user proceed, we can't build the model they want
179  std::runtime_error(FNERROR(
180  "Shape morphing parameters that vary with mass are not allowed"));
181  } else {
182  // Everything ok, set the scale value in its array
183  ss_scale_arr[ssi] = *(scales.begin());
184  // Handle the case where the scale factor is != 1
185  if (std::fabs(ss_scale_arr[ssi] - 1.0) > 1E-6) {
186  ss_must_scale_arr[ssi] = true;
187  // Build the RooConstVar with the value of the scale factor
188  ss_scale_fac_arr[ssi] = std::make_shared<RooConstVar>(
189  TString::Format("%g", ss_scale_arr[ssi]), "",
190  ss_scale_arr[ssi]);
191  // Create the product of the floating nuisance parameter and the
192  // constant scale factor
193  ss_scale_prod_arr[ssi] = std::make_shared<RooProduct>(
194  ss_vec[ssi] + "_scaled_" + key, "",
195  RooArgList(*(ss_scale_var_arr[ssi]), *(ss_scale_fac_arr[ssi])));
196  // Add this to the list
197  ss_list.add(*(ss_scale_prod_arr[ssi]));
198  } else {
199  // If the scale factor is 1.0 then we just add the nuisance parameter
200  // directly to our list
201  ss_list.add(*(ss_scale_var_arr[ssi]));
202  }
203  }
204  }
206 
207  // Summarise the info on the shape systematics and scale factors
208  if (verbose) {
209  std::cout << ">> Shape systematics: " << ss << "\n";
210  for (unsigned ssi = 0; ssi < ss; ++ssi) {
211  std::cout << boost::format("%-50s %-5i %-8.3g\n")
212  % ss_vec[ssi] % ss_must_scale_arr[ssi] % ss_scale_arr[ssi];
213  }
214  }
215 
216 
217  // lms = "lnN morphing systematic"
218  // Now we have some work to do with the lnN systematics. We can consider two cases:
219  // a) The uncertainty is the same for each mass point => we can leave it in
220  // the datacard as is and let text2workspace do its normal thing
221  // b) The uncertainty varies between mass points => we can't capture this
222  // information in the text datacard in the usual way, so we'll build a RooFit
223  // object that effectively makes the lnN uncertainty a function of the mass
224  // variable
225  // We'll use "lms" to refer to case b), which we'll try to figure out now...
226  vector<string> lms_vec; // vec of systematic names
227  set<string> lms_set; // set of systematic names
228  // index positions in our full ls_arr array for the lms systematics
229  vector<unsigned > lms_vec_idx;
230  for (unsigned lsi = 0; lsi < ls; ++lsi) {
231  // Extra complication is that the user might have been evil and mixed
232  // symmetric and asymmetric lnN values, we'll try and detect changes in
233  // either
234  set<double> k_hi;
235  set<double> k_lo;
236  // Go through each mass point for this systematic and add the uncertainty
237  // values (so-called "kappa" values)
238  for (unsigned mi = 0; mi < m; ++mi) {
239  Systematic *n = ls_arr[lsi][mi];
240  k_hi.insert(n->value_u());
241  if (n->asymm()) {
242  k_lo.insert(n->value_d());
243  }
244  }
245  // If either of these sets has more than one entry then this is a lms case
246  if (k_hi.size() > 1 || k_lo.size() > 1) {
247  lms_vec.push_back(ls_vec[lsi]);
248  lms_set.insert(ls_vec[lsi]);
249  lms_vec_idx.push_back(lsi);
250  }
251  }
252  unsigned lms = lms_vec.size();
253  // New array for the pointers to the lms Systematic objects
254  multi_array<ch::Systematic *, 2> lms_arr(extents[lms][m]);
255  for (unsigned lmsi = 0; lmsi < lms; ++lmsi) {
256  for (unsigned mi = 0; mi < m; ++mi) {
257  lms_arr[lmsi][mi] = ls_arr[lms_vec_idx[lmsi]][mi];
258  }
259  }
260 
261  // We will need to create the nuisance parameters for these now
262  multi_array<std::shared_ptr<RooRealVar>, 1> lms_var_arr(extents[lms]);
263  for (unsigned lmsi = 0; lmsi < lms; ++lmsi) {
264  lms_var_arr[lmsi] =
265  std::make_shared<RooRealVar>(lms_vec[lmsi].c_str(), "", 0);
266  }
267 
268  // Give a summary of the lms systematics to the user
269  if (verbose) {
270  std::cout << ">> lnN morphing systematics: " << lms << "\n";
271  for (unsigned lmsi = 0; lmsi < lms; ++lmsi) {
272  std::cout << ">>>> " << lms_vec[lmsi] << "\n";
273  }
274  }
275 
276  // Now we move into a phase of building all the objects we need:
277 
278  // 2D array of all input histograms, size is (mass points * (nominal +
279  // 2*shape-systs)). The factor of 2 needed for the Up and Down shapes
280  multi_array<std::shared_ptr<TH1F>, 2> hist_arr(extents[m][1+ss*2]);
281  // We also need the array of process yields vs mass, because this will have to
282  // be interpolated too
283  multi_array<double, 1> rate_arr(extents[m]);
284  // Also store the yield uncertainty - we don't actually need this for the signal
285  // model, but will include it in the debug TGraph
286  multi_array<double, 1> rate_unc_arr(extents[m]);
287  // The vertical-interpolation PDF needs the TH1 inputs in the format of a TList
288  multi_array<std::shared_ptr<TList>, 1> list_arr(extents[m]);
289  // Combine always treats the normalisation part of shape systematics as
290  // distinct from the actual shape morphing. Essentially the norm part is
291  // treated as an asymmetric lnN. We have to make the kappa_hi and kappa_lo a
292  // function of the mass parameter too, so we need two more arrays in (ss * m)
293  multi_array<double, 2> ss_k_hi_arr(extents[ss][m]);
294  multi_array<double, 2> ss_k_lo_arr(extents[ss][m]);
295  // For each shape systematic we will build a RooSpline1D, configured to
296  // interpolate linearly between the kappa values
297  multi_array<std::shared_ptr<RooSpline1D>, 1> ss_spl_hi_arr(extents[ss]);
298  multi_array<std::shared_ptr<RooSpline1D>, 1> ss_spl_lo_arr(extents[ss]);
299  // To define the actually process scaling as a function of these kappa values,
300  // we need an AsymPow object per mass point
301  multi_array<std::shared_ptr<AsymPow>, 1> ss_asy_arr(extents[ss]);
302 
303  // Similar set of objects needed for the lms normalisation systematics
304  multi_array<double, 2> lms_k_hi_arr(extents[lms][m]);
305  multi_array<double, 2> lms_k_lo_arr(extents[lms][m]);
306  multi_array<std::shared_ptr<RooSpline1D>, 1> lms_spl_hi_arr(extents[lms]);
307  multi_array<std::shared_ptr<RooSpline1D>, 1> lms_spl_lo_arr(extents[lms]);
308  multi_array<std::shared_ptr<AsymPow>, 1> lms_asy_arr(extents[lms]);
309 
310  // Now we'll fill these objects..
311 
312  for (unsigned mi = 0; mi < m; ++mi) {
313  // Grab the nominal process histograms. We also have to convert every
314  // histogram to a uniform integer binning, because this is what
315  // text2workspace will do for all the non-morphed processes in our datacard,
316  // and we need the binning of these to be in sync.
317  hist_arr[mi][0] =
318  std::make_shared<TH1F>(RebinHist(AsTH1F(pr_arr[mi]->shape())));
319  // If the user supplied a TFile pointer we'll dump a bunch of info into it
320  // for debugging
321  if (file) {
322  file->WriteTObject(pr_arr[mi]->shape(), key + "_" + m_str_vec[mi]);
323  }
324  // Store the process rate
325  rate_arr[mi] = pr_arr[mi]->rate();
326  auto proc_hist = pr_arr[mi]->ClonedScaledShape();
327  proc_hist->IntegralAndError(1, proc_hist->GetNbinsX(), rate_unc_arr[mi]);
328  // Do the same for the Up and Down shapes
329  for (unsigned ssi = 0; ssi < ss; ++ssi) {
330  hist_arr[mi][1 + 2 * ssi] =
331  std::make_shared<TH1F>(RebinHist(AsTH1F(ss_arr[ssi][mi]->shape_u())));
332  hist_arr[mi][2 + 2 * ssi] =
333  std::make_shared<TH1F>(RebinHist(AsTH1F(ss_arr[ssi][mi]->shape_d())));
334  if (file) {
335  file->WriteTObject(ss_arr[ssi][mi]->shape_u(),
336  key + "_" + m_str_vec[mi] + "_" + ss_vec[ssi] + "Up");
337  file->WriteTObject(ss_arr[ssi][mi]->shape_d(),
338  key + "_" + m_str_vec[mi] + "_" + ss_vec[ssi] + "Down");
339  }
340  // Store the uncertainty ("kappa") values for the shape systematics
341  ss_k_hi_arr[ssi][mi] = ss_arr[ssi][mi]->value_u();
342  ss_k_lo_arr[ssi][mi] = ss_arr[ssi][mi]->value_d();
343  // For the normalisation we scale the kappa instead of putting the scaling
344  // parameter as the variable
345  if (std::fabs(ss_scale_arr[ssi] - 1.0) > 1E-6) {
346  ss_k_hi_arr[ssi][mi] = std::pow(ss_arr[ssi][mi]->value_u(), ss_scale_arr[ssi]);
347  ss_k_lo_arr[ssi][mi] = std::pow(ss_arr[ssi][mi]->value_d(), ss_scale_arr[ssi]);
348  }
349  }
350  // And now the uncertainty values for the lnN systematics that vary with mass
351  // We'll force these to be asymmetric even if they're not
352  for (unsigned lmsi = 0; lmsi < lms; ++lmsi) {
353  lms_k_hi_arr[lmsi][mi] = lms_arr[lmsi][mi]->value_u();
354  if (lms_arr[lmsi][mi]->asymm()) {
355  lms_k_lo_arr[lmsi][mi] = lms_arr[lmsi][mi]->value_d();
356  } else {
357  lms_k_lo_arr[lmsi][mi] = 1. / lms_arr[lmsi][mi]->value_u();
358  }
359  }
360  }
361 
362  // Now we've made all our histograms, we'll put them in the TList format
363  // of [nominal, syst_1_Up, syst_1_Down, ... , syst_N_Up, syst_N_Down]
364  for (unsigned mi = 0; mi < m; ++mi) {
365  list_arr[mi] = std::make_shared<TList>();
366  for (unsigned xi = 0; xi < (1 + ss * 2); ++xi) {
367  list_arr[mi]->Add(hist_arr[mi][xi].get());
368  }
369  }
370 
371  // Print the values of the yields and kappa factors that will be inputs
372  // to our spline interpolation objects
373  if (verbose) {
374  for (unsigned mi = 0; mi < m; ++mi) {
375  std::cout << boost::format("%-10s") % m_str_vec[mi];
376  }
377  std::cout << "\n";
378  for (unsigned mi = 0; mi < m; ++mi) {
379  std::cout << boost::format("%-10.5g") % rate_arr[mi];
380  }
381  std::cout << "\n";
382  for (unsigned ssi = 0; ssi < ss; ++ssi) {
383  std::cout << ss_vec[ssi] << " Up" << std::endl;
384  for (unsigned mi = 0; mi < m; ++mi) {
385  std::cout << boost::format("%-10.5g") % ss_k_hi_arr[ssi][mi];
386  }
387  std::cout << "\n";
388  std::cout << ss_vec[ssi] << " Down" << std::endl;
389  for (unsigned mi = 0; mi < m; ++mi) {
390  std::cout << boost::format("%-10.5g") % ss_k_lo_arr[ssi][mi];
391  }
392  std::cout << "\n";
393  }
394  for (unsigned lmsi = 0; lmsi < lms; ++lmsi) {
395  std::cout << lms_vec[lmsi] << " Up" << std::endl;
396  for (unsigned mi = 0; mi < m; ++mi) {
397  std::cout << boost::format("%-10.5g") % lms_k_hi_arr[lmsi][mi];
398  }
399  std::cout << "\n";
400  std::cout << lms_vec[lmsi] << " Down" << std::endl;
401  for (unsigned mi = 0; mi < m; ++mi) {
402  std::cout << boost::format("%-10.5g") % lms_k_lo_arr[lmsi][mi];
403  }
404  std::cout << "\n";
405  }
406  }
407 
408  // Can do more sophistical spline interpolation if we want, but let's use
409  // simple LINEAR interpolation for now
410  TString interp = "LINEAR";
411 
412  // Here when the force_template_limit option is requested
413  // we have to add some extra terms to the vector of masses to ensure that
414  // the signal pdf goes to 0 outside of the MC template range.
415 
416  vector<double> new_m_vec(m_vec);
417  // Insert an entry at either end of the vector for a mass just slightly
418  // outside of the range
419  new_m_vec.insert(new_m_vec.begin(),m_vec[0]-1E-6);
420  new_m_vec.push_back(m_vec[m-1]+1E-6);
421  // Create a corresponding rate array with 0 entries for these new masses
422  multi_array<double, 1> new_rate_arr(extents[m+2]);
423  new_rate_arr[0] = 0.0;
424  for(unsigned i = 0; i < m; ++i) new_rate_arr[i+1] = rate_arr[i] ;
425  new_rate_arr[m+1] = 0.0;
426 
427  if (verbose && force_template_limit) {
428  std::cout << ">>>> Forcing rate to 0 outside of template range:" << "\n";
429  for(unsigned mi = 0; mi < m+2; ++mi) {
430  std::cout << boost::format("%-10.5g") % new_m_vec[mi];
431  }
432  std::cout << "\n";
433  for(unsigned mi = 0; mi < m+2; ++mi) {
434  std::cout << boost::format("%-10.5g") % new_rate_arr[mi];
435  }
436  std::cout << "\n";
437  }
438  // Create the 1D spline directly from the rate array
440  RooSpline1D rate_spline("interp_rate_"+key, "", mass_var,
441  force_template_limit ? m+2 : m,
442  force_template_limit ? new_m_vec.data() : m_vec.data(),
443  force_template_limit ? new_rate_arr.data() : rate_arr.data(),
444  interp);
446 
447  if (file) {
448  TGraphErrors tmp(m, m_vec.data(), rate_arr.data(), nullptr, rate_unc_arr.data());
449  file->WriteTObject(&tmp, "interp_rate_"+key);
450  }
451  // Collect all terms that will go into the total normalisation:
452  // nominal * systeff_1 * systeff_2 * ... * systeff_N
453  RooArgList rate_prod(rate_spline);
454  // For each shape systematic build a 1D spline for kappa_hi and kappa_lo
455  for (unsigned ssi = 0; ssi < ss; ++ssi) {
456  ss_spl_hi_arr[ssi] = std::make_shared<RooSpline1D>("spline_hi_" +
457  key + "_" + ss_vec[ssi], "", mass_var, m, m_vec.data(),
458  ss_k_hi_arr[ssi].origin(), interp);
459  ss_spl_lo_arr[ssi] = std::make_shared<RooSpline1D>("spline_lo_" +
460  key + "_" + ss_vec[ssi], "", mass_var, m, m_vec.data(),
461  ss_k_lo_arr[ssi].origin(), interp);
462  if (file) {
463  TGraph tmp_hi(m, m_vec.data(), ss_k_hi_arr[ssi].origin());
464  file->WriteTObject(&tmp_hi, "spline_hi_" + key + "_" + ss_vec[ssi]);
465  TGraph tmp_lo(m, m_vec.data(), ss_k_lo_arr[ssi].origin());
466  file->WriteTObject(&tmp_lo, "spline_lo_" + key + "_" + ss_vec[ssi]);
467  }
468  // Then build the AsymPow object for each systematic as a function of the
469  // kappas and the nuisance parameter
470  ss_asy_arr[ssi] = std::make_shared<AsymPow>("systeff_" +
471  key + "_" + ss_vec[ssi], "",
472  *(ss_spl_lo_arr[ssi]), *(ss_spl_hi_arr[ssi]),
473  *(ss_scale_var_arr[ssi]));
474  rate_prod.add(*(ss_asy_arr[ssi]));
475  }
476  // Same procedure for the lms normalisation systematics: build the splines
477  // then the AsymPows and add to the rate_prod list
478  for (unsigned lmsi = 0; lmsi < lms; ++lmsi) {
479  lms_spl_hi_arr[lmsi] = std::make_shared<RooSpline1D>("spline_hi_" +
480  key + "_" + lms_vec[lmsi], "", mass_var, m, m_vec.data(),
481  lms_k_hi_arr[lmsi].origin(), interp);
482  lms_spl_lo_arr[lmsi] = std::make_shared<RooSpline1D>("spline_lo_" +
483  key + "_" + lms_vec[lmsi], "", mass_var, m, m_vec.data(),
484  lms_k_lo_arr[lmsi].origin(), interp);
485  lms_asy_arr[lmsi] = std::make_shared<AsymPow>("systeff_" +
486  key + "_" + lms_vec[lmsi], "", *(lms_spl_lo_arr[lmsi]),
487  *(lms_spl_hi_arr[lmsi]), *(lms_var_arr[lmsi]));
488  rate_prod.add(*(lms_asy_arr[lmsi]));
489  }
490  // We'll come back to this rate_prod a bit later.
491 
492  // Now some fun with binning. We anticipate that the process histograms we
493  // have been supplied could have a finer binning than is actually wanted for
494  // the analysis (and the fit), in order to avoid known problems with the RMS
495  // of a peaking distribution not being morphed smoothly from mass point to
496  // mass point if the binning is too wide. The RooMorphingPdf will handle
497  // re-binning on the fly, but we have to tell it how to rebin. To do this we
498  // assume the observed data histogram has the target binning.
499  TH1F data_hist = cb_bp.GetObservedShape();
500  TH1F proc_hist = cb_bp.GetShape();
501  // The x-axis variable has to be called "CMS_th1x", as this is what
502  // text2workspace will use for all the normal processes
503  RooRealVar xvar("CMS_th1x", "CMS_th1x", 0,
504  static_cast<float>(data_hist.GetNbinsX()));
505  xvar.setBins(data_hist.GetNbinsX());
506 
507  // Create a second x-axis variable, named specific to the bin, that will be
508  // for the finer-binned input
509  RooRealVar morph_xvar(("CMS_th1x_"+bin).c_str(), "", 0,
510  static_cast<float>(proc_hist.GetNbinsX()));
511  // We're not going to need roofit to evaluate anything as a function of this
512  // morphing x-axis variable, so we set it constant
513  morph_xvar.setConstant();
514  morph_xvar.setBins(proc_hist.GetNbinsX());
515  if (verbose) {
516  xvar.Print();
517  morph_xvar.Print();
518  }
519 
520  // Now we can build the array of vertical-interpolation pdfs (the same that
521  // text2workspace builds for every process), one per mass point
522  multi_array<std::shared_ptr<FastVerticalInterpHistPdf2>, 1> vpdf_arr(
523  extents[m]);
524  RooArgList vpdf_list;
525 
526  TString vert_name = key + "_";
527 
528  // Follow what ShapeTools.py does and set the smoothing region
529  // to the minimum of all of the shape scales
530  double qrange = 1.;
531  for (unsigned ssi = 0; ssi < ss; ++ssi) {
532  if (ss_scale_arr[ssi] < qrange) qrange = ss_scale_arr[ssi];
533  }
534 
535  for (unsigned mi = 0; mi < m; ++mi) {
536  // Construct it with the right binning, the right histograms and the right
537  // scaling parameters
538  vpdf_arr[mi] = std::make_shared<FastVerticalInterpHistPdf2>(
539  vert_name + m_str_vec[mi] + "_vmorph", "", morph_xvar, *(list_arr[mi]),
540  ss_list, qrange, 0);
541  // Add it to a list that we'll supply to the RooMorphingPdf
542  vpdf_list.add(*(vpdf_arr[mi]));
543  }
544  TString morph_name = key + "_morph";
545  // At long last, we can build our pdf, giving it:
546  // xvar : the fixed "CMS_th1x" x-axis variable with uniform integer binning
547  // mass_var: the floating mass value for the interpolation
548  // vpdf_list: the list of vertical-interpolation pdfs
549  // m_vec: the corresponding list of mass points
550  // allow_morph: if false will just evaluate to the closest pdf in mass
551  // data_hist.GetXaxis(): The original (non-uniform) target binning
552  // proc_hist.GetXaxis(): The original (non-uniform) morphing binning
554  RooMorphingPdf morph_pdf(morph_name, "", xvar, mass_var, vpdf_list,
555  m_vec, allow_morph, *(data_hist.GetXaxis()),
556  *(proc_hist.GetXaxis()));
558  // And we can make the final normalisation product
559  // The value of norm_postfix is very important. text2workspace will only look
560  // for for a term with the pdf name + "_norm". But it might be the user wants
561  // to add even more terms to this total normalisation, so we give them the option
562  // of using some other suffix.
564  RooProduct morph_rate(morph_name + "_" + TString(norm_postfix), "",
565  rate_prod);
567 
568  // Dump even more plots
569  if (file) MakeMorphDebugPlots(&morph_pdf, &mass_var, m_vec, file, &data_hist);
570 
571  // Load our pdf and norm objects into the workspace
572  ws.import(morph_pdf, RooFit::RecycleConflictNodes());
573  ws.import(morph_rate, RooFit::RecycleConflictNodes());
574 
575  if (!verbose) RooMsgService::instance().setGlobalKillBelow(backup_msg_level);
576 
577  // Now we can clean up the CH instance a bit
578  // We only need one entry for each Process or Systematic now, not one per mass point
579  // We'll modify the first mass point to house our new pdf and norm, and drop
580  // the rest.
581  std::string mass_min = m_str_vec.at(0);
582 
583  // Dump Process entries for other mass points
584  cb.FilterProcs([&](ch::Process const* p) {
585  return p->bin() == bin && p->process() == process && p->mass() != mass_min;
586  });
587  // Dump Systematic entries for other mass points, but only if the type is shape
588  // or a lnN that we found varied as a function of the mass. We've already built
589  // these uncertainties into our normalisation term. Constant lnN systematics can
590  // remain in the datacard and be added by text2workspace
591  cb.FilterSysts([&](ch::Systematic const* n) {
592  return (n->bin() == bin && n->process() == process) &&
593  ((n->mass() != mass_min) || (n->type() == "shape") ||
594  (lms_set.count(n->name())));
595  });
596  // With the remaining Process entry (should only be one if we did this right),
597  // Make the mass generic ("*"), drop the TH1 and set the rate to 1.0, as this
598  // will now be read from our norm object
599  cb.ForEachProc([&](ch::Process * p) {
600  if (p->bin() == bin && p->process() == process) {
601  p->set_mass("*");
602  p->set_shape(nullptr, false);
603  p->set_rate(1.0);
604  }
605  });
606  // Just declare the mass to be generic for the remaining systematics
607  cb.ForEachSyst([&](ch::Systematic * n) {
608  if (n->bin() == bin && n->process() == process) {
609  n->set_mass("*");
610  }
611  });
612 
613  // And we're done, but note that we haven't populated the Process entry with
614  // the PDF or norm objects we created, as we assume the user has more work to
615  // do on their RooWorkspace before copying into the CH instance. Once they've
616  // imported the workspace:
617  //
618  // cb.AddWorkspace(ws);
619  //
620  // They can populate the Process entries, e.g. if all signals were replaced
621  // with Morphing PDFs:
622  //
623  // cb.cp().signals().ExtractPdfs(cb, "htt", "$BIN_$PROCESS_morph");
624  return std::string(morph_name);
625 }
626 
627 void MakeMorphDebugPlots(RooMorphingPdf* pdf, RooAbsReal* mass,
628  std::vector<double> const& masses, TFile* f, TH1 *ref_bins) {
629  RooRealVar *rrv = dynamic_cast<RooRealVar*>(mass);
630  if (!rrv) return;
631  f->cd();
632  f->mkdir(pdf->GetName());
633  gDirectory->cd(pdf->GetName());
634  for (unsigned i = 0; i < masses.size(); ++i) {
635  rrv->setVal(masses[i]);
636  TH1 * h = pdf->createHistogram("CMS_th1x");
637  h->AddDirectory(false);
638  TH1 * h2 = nullptr;
639  if (ref_bins) {
640  h2 = (TH1*)ref_bins->Clone();
641  h2->Reset();
642  for (int x = 1; x <= h2->GetNbinsX(); ++x) {
643  h2->SetBinContent(x, h->GetBinContent(x));
644  }
645  } else {
646  h2 = h;
647  h = nullptr;
648  }
649  h2->AddDirectory(false);
650  h2->SetName(TString::Format("actual_point_%g", masses[i]));
651  gDirectory->WriteTObject(h2);
652  if (h) delete h;
653  if (h2) delete h2;
654  }
655  double m = masses.front();
656  double step = 1;
657  if (((masses.back() - masses.front()) / step) > 100.) step = step * 10.;
658  if (((masses.back() - masses.front()) / step) > 100.) step = step * 10.;
659  if (((masses.back() - masses.front()) / step) < 10.) step = step/10.;
660  while (m <= masses.back()) {
661  rrv->setVal(m);
662  TH1* hm = pdf->createHistogram("CMS_th1x");
663  hm->AddDirectory(false);
664  TH1 * hm2 = nullptr;
665  if (ref_bins) {
666  hm2 = (TH1*)ref_bins->Clone();
667  hm2->Reset();
668  for (int x = 1; x <= hm2->GetNbinsX(); ++x) {
669  hm2->SetBinContent(x, hm->GetBinContent(x));
670  }
671  } else {
672  hm2 = hm;
673  hm = nullptr;
674  }
675  hm2->AddDirectory(false);
676  hm2->SetName(TString::Format("morph_point_%g", m));
677  //It struggles with m=0, instead adds really small number close to 0
678  if(fabs(m) < 1E-5) hm2->SetName(TString::Format("morph_point_0"));
679  gDirectory->WriteTObject(hm2);
680  if (hm) delete hm;
681  if (hm2) delete hm2;
682  m += step;
683  }
684  f->cd();
685 }
686 }
#define FNERROR(x)
Definition: Logging.h:9
CombineHarvester & mass(std::vector< std::string > const &vec, bool cond=true)
void ForEachSyst(Function func)
void ForEachProc(Function func)
std::set< R > SetFromProcs(T func)
Fill an std::set using only the Process entries.
CombineHarvester & bin(std::vector< std::string > const &vec, bool cond=true)
CombineHarvester & FilterSysts(Function func)
CombineHarvester & FilterProcs(Function func)
std::set< std::string > syst_name_set()
CombineHarvester cp()
Creates and returns a shallow copy of the CombineHarvester instance.
CombineHarvester & syst_type(std::vector< std::string > const &vec, bool cond=true)
virtual std::string const & process() const
Definition: Object.h:20
virtual std::string const & bin() const
Definition: Object.h:17
virtual std::string const & mass() const
Definition: Object.h:38
double value_u() const
Definition: Systematic.h:28
std::string const & type() const
Definition: Systematic.h:25
bool asymm() const
Definition: Systematic.h:37
std::string const & name() const
Definition: Systematic.h:22
double value_d() const
Definition: Systematic.h:31
Definition: Algorithm.h:10
std::string BuildRooMorphing(RooWorkspace &ws, CombineHarvester &cb, std::string const &bin, std::string const &process, RooAbsReal &mass_var, std::string norm_postfix, bool allow_morph, bool verbose, bool force_template_limit=false, TFile *file=nullptr)
[part1]
void MakeMorphDebugPlots(RooMorphingPdf *pdf, RooAbsReal *mass, std::vector< double > const &masses, TFile *f, TH1 *ref_bins=nullptr)
TH1F RebinHist(TH1F const &hist)
Definition: Utilities.cc:170
std::vector< T > Set2Vec(std::set< T > const &in)
Definition: Utilities.h:101
TH1F AsTH1F(TH1 const *hist)