CombineHarvester
ValidationTools.cc
Go to the documentation of this file.
2 #include <iostream>
3 #include <vector>
4 #include <set>
5 #include <string>
6 #include <fstream>
7 #include <map>
8 #include "boost/format.hpp"
9 #include "RooFitResult.h"
10 #include "RooRealVar.h"
11 #include "RooDataHist.h"
12 #include "RooAbsReal.h"
13 #include "RooAbsData.h"
15 
16 namespace ch {
17 using json = nlohmann::json;
18 
20  std::cout<<"Systematic "<<syst->name()<<" for process "<<syst->process()<<" in region "<<syst->bin()<<" :";
21 }
22 
23 void PrintProc(ch::Process *proc){
24  std::cout<<"Process "<<proc->process()<<" in region "<<proc->bin()<<" :";
25 }
26 
27 
28 
30  cb.ForEachSyst([&](ch::Systematic *sys){
31  if(sys->type()=="shape" && ( (sys->value_u() > 1. && sys->value_d() > 1.) || (sys->value_u() < 1. && sys->value_d() < 1.))){
32  jsobj["uncertVarySameDirect"][sys->name()][sys->bin()][sys->process()]={{"value_u",sys->value_u()},{"value_d",sys->value_d()}};
33  }
34  });
35 }
36 
38  cb.ForEachSyst([&](ch::Systematic *sys){
39  if(sys->type()=="shape" && ( (sys->value_u() > 1. && sys->value_d() > 1.) || (sys->value_u() < 1. && sys->value_d() < 1.))){
40  PrintSystematic(sys);
41  std::cout<<" Up/Down normalisations go in the same direction: up variation: "<<sys->value_u()<<", down variation: "<<sys->value_d()<<std::endl;
42  }
43  });
44 }
45 
47  cb.ForEachSyst([&](ch::Systematic *sys){
48  const TH1* hist_u;
49  const TH1* hist_d;
50  if(sys->type()=="shape" && ( fabs(sys->value_u() - sys->value_d()) < 0.0000001)){
51  hist_u = sys->shape_u();
52  hist_d = sys->shape_d();
53  bool is_same=1;
54  for(int i=1;i<=hist_u->GetNbinsX();i++){
55  if(fabs(hist_u->GetBinContent(i))+fabs(hist_d->GetBinContent(i))>0){
56  if(2*double(fabs(hist_u->GetBinContent(i)-hist_d->GetBinContent(i)))/(fabs(hist_u->GetBinContent(i))+fabs(hist_d->GetBinContent(i)))>0.001) is_same = 0;
57  }
58  }
59  if(is_same){
60  jsobj["uncertTemplSame"][sys->name()][sys->bin()][sys->process()]={{"value_u",sys->value_u()},{"value_d",sys->value_d()}};
61  }
62  }
63  });
64 }
65 
67  cb.ForEachSyst([&](ch::Systematic *sys){
68  const TH1* hist_u;
69  const TH1* hist_d;
70  if(sys->type()=="shape" && ( fabs(sys->value_u() - sys->value_d()) < 0.0000001)){
71  hist_u = sys->shape_u();
72  hist_d = sys->shape_d();
73  bool is_same=1;
74  for(int i=1;i<=hist_u->GetNbinsX();i++){
75  if(fabs(hist_u->GetBinContent(i))+fabs(hist_d->GetBinContent(i))>0){
76  if(2*double(fabs(hist_u->GetBinContent(i)-hist_d->GetBinContent(i)))/(fabs(hist_u->GetBinContent(i))+fabs(hist_d->GetBinContent(i)))>0.001) is_same = 0;
77  }
78  }
79  if(is_same){
80  PrintSystematic(sys);
81  std::cout<<"Up/Down templates are identical: up variation: "<<sys->value_u()<<", down variation: "<<sys->value_d()<<std::endl;
82  }
83  }
84  });
85 }
86 
87 
88 
90  std::vector<ch::Process*> empty_procs;
91  auto bins = cb.bin_set();
92  cb.ForEachProc([&](ch::Process *proc){
93  if(proc->rate()==0.){
94  empty_procs.push_back(proc);
95  if (jsobj["emptyProcessShape"][proc->bin()] !=NULL){
96  jsobj["emptyProcessShape"][proc->bin()].push_back(proc->process());
97  } else {
98  jsobj["emptyProcessShape"][proc->bin()] = {proc->process()};
99  }
100  }
101  });
102  cb.ForEachSyst([&](ch::Systematic *sys){
103  bool no_check=0;
104  for( unsigned int i=0; i< empty_procs.size(); i++){
105  if ( MatchingProcess(*sys,*empty_procs.at(i)) ) no_check=1;
106  }
107  if(!no_check){
108  if(sys->type()=="shape" && (sys->value_u()==0. || sys->value_d()==0.)){
109  jsobj["emptySystematicShape"][sys->name()][sys->bin()][sys->process()]={{"value_u",sys->value_u()},{"value_d",sys->value_d()}};
110  }
111  }
112  });
113 }
114 
116  std::vector<ch::Process*> empty_procs;
117  cb.ForEachProc([&](ch::Process *proc){
118  if(proc->rate()==0.){
119  empty_procs.push_back(proc);
120  PrintProc(proc);
121  std::cout<<" has 0 yield"<<std::endl;
122  }
123  });
124  cb.ForEachSyst([&](ch::Systematic *sys){
125  bool no_check=0;
126  for( unsigned int i=0; i< empty_procs.size(); i++){
127  if ( MatchingProcess(*sys,*empty_procs.at(i)) ) no_check=1;
128  }
129  if(!no_check){
130  if(sys->type()=="shape" && (sys->value_u()==0. || sys->value_d()==0.)){
131  PrintSystematic(sys);
132  std::cout<<" At least one empty histogram: up variation: "<<sys->value_u()<<" Down variation: "<<sys->value_d()<<std::endl;
133  }
134  }
135  });
136 }
137 
138 void CheckNormEff(CombineHarvester& cb, double maxNormEff){
139  std::vector<ch::Process*> empty_procs;
140  cb.ForEachProc([&](ch::Process *proc){
141  if(proc->rate()==0.){
142  empty_procs.push_back(proc);
143  }
144  });
145  cb.ForEachSyst([&](ch::Systematic *sys){
146  bool no_check=0;
147  for( unsigned int i=0; i< empty_procs.size(); i++){
148  if ( MatchingProcess(*sys,*empty_procs.at(i)) ) no_check=1;
149  }
150  if(!no_check && ((sys->type()=="shape" && (sys->value_u()-1 > maxNormEff || sys->value_u()-1 < -maxNormEff || sys->value_d()-1>maxNormEff || sys->value_d()-1< -maxNormEff)) || (sys->type()=="lnN" && (sys->value_u()-1 > maxNormEff || sys->value_u()-1 < - maxNormEff) ))){
151  PrintSystematic(sys);
152  std::cout<<"Uncertainty has a large normalisation effect: up variation: "<<sys->value_u()<<" Down variation: "<<sys->value_d()<<std::endl;
153  }
154  });
155 }
156 
157 void CheckNormEff(CombineHarvester& cb, double maxNormEff, json& jsobj){
158  std::vector<ch::Process*> empty_procs;
159  cb.ForEachProc([&](ch::Process *proc){
160  if(proc->rate()==0.){
161  empty_procs.push_back(proc);
162  }
163  });
164  cb.ForEachSyst([&](ch::Systematic *sys){
165  bool no_check=0;
166  for( unsigned int i=0; i< empty_procs.size(); i++){
167  if ( MatchingProcess(*sys,*empty_procs.at(i)) ) no_check=1;
168  }
169  if(!no_check && ((sys->type()=="shape" && (std::abs(sys->value_u()-1) > maxNormEff || std::abs(sys->value_d()-1)>maxNormEff)) || (sys->type()=="lnN" && (std::abs(sys->value_u()-1) > maxNormEff) ))){
170  jsobj["largeNormEff"][sys->name()][sys->bin()][sys->process()]={{"value_u",sys->value_u()},{"value_d",sys->value_d()}};
171  }
172  });
173 }
174 
176  TH1F tothist;
177  auto bins = cb.bin_set();
178  for(auto b : bins){
179  auto cb_bin_backgrounds = cb.cp().bin({b}).backgrounds();
180  tothist = cb_bin_backgrounds.GetShape();
181  for(int i=1;i<=tothist.GetNbinsX();i++){
182  if(tothist.GetBinContent(i)<=0){
183  std::cout<<"Channel "<<b<<" bin "<<i<<" of the templates is empty in background"<<std::endl;
184  }
185  }
186  }
187 }
188 
190  TH1F tothist;
191  auto bins = cb.bin_set();
192  for(auto b : bins){
193  auto cb_bin_backgrounds = cb.cp().bin({b}).backgrounds();
194  tothist = cb_bin_backgrounds.GetShape();
195  for(int i=1;i<=tothist.GetNbinsX();i++){
196  if(tothist.GetBinContent(i)<=0){
197  if (jsobj["emptyBkgBin"][b] !=NULL){
198  jsobj["emptyBkgBin"][b].push_back(i);
199  } else {
200  jsobj["emptyBkgBin"][b] = {i};
201  }
202  }
203  }
204  }
205 }
206 
207 
209  double diff_lim=0.001;
210  cb.ForEachSyst([&](ch::Systematic *sys){
211  const TH1* hist_u;
212  const TH1* hist_d;
213  TH1F hist_nom;
214  if(sys->type()=="shape"){
215  hist_u = sys->shape_u();
216  hist_d = sys->shape_d();
217  hist_nom=cb.cp().bin({sys->bin()}).process({sys->process()}).GetShape();
218  hist_nom.Scale(1./hist_nom.Integral());
219  double up_diff=0;
220  double down_diff=0;
221  for(int i=1;i<=hist_u->GetNbinsX();i++){
222  if(fabs(hist_u->GetBinContent(i))+fabs(hist_nom.GetBinContent(i))>0){
223  up_diff+=2*double(fabs(hist_u->GetBinContent(i)-hist_nom.GetBinContent(i)))/(fabs(hist_u->GetBinContent(i))+fabs(hist_nom.GetBinContent(i)));
224  }
225  if(fabs(hist_d->GetBinContent(i))+fabs(hist_nom.GetBinContent(i))>0){
226  down_diff+=2*double(fabs(hist_d->GetBinContent(i)-hist_nom.GetBinContent(i)))/(fabs(hist_d->GetBinContent(i))+fabs(hist_nom.GetBinContent(i)));
227  }
228  }
229  if(up_diff<diff_lim && down_diff<diff_lim){
230  PrintSystematic(sys);
231  std::cout<<"Uncertainty probably has no genuine shape effect. Summed relative difference per bin between normalised nominal and up shape: "<<up_diff<<" between normalised nominal and down shape: "<<down_diff<<std::endl;
232  }
233  }
234  });
235 }
236 
237 
239  double diff_lim=0.001;
240  cb.ForEachSyst([&](ch::Systematic *sys){
241  const TH1* hist_u;
242  const TH1* hist_d;
243  TH1F hist_nom;
244  if(sys->type()=="shape"){
245  hist_u = sys->shape_u();
246  hist_d = sys->shape_d();
247  hist_nom=cb.cp().bin({sys->bin()}).process({sys->process()}).GetShape();
248  hist_nom.Scale(1./hist_nom.Integral());
249  double up_diff=0;
250  double down_diff=0;
251  for(int i=1;i<=hist_u->GetNbinsX();i++){
252  if(fabs(hist_u->GetBinContent(i))+fabs(hist_nom.GetBinContent(i))>0){
253  up_diff+=2*double(fabs(hist_u->GetBinContent(i)-hist_nom.GetBinContent(i)))/(fabs(hist_u->GetBinContent(i))+fabs(hist_nom.GetBinContent(i)));
254  }
255  if(fabs(hist_d->GetBinContent(i))+fabs(hist_nom.GetBinContent(i))>0){
256  down_diff+=2*double(fabs(hist_d->GetBinContent(i)-hist_nom.GetBinContent(i)))/(fabs(hist_d->GetBinContent(i))+fabs(hist_nom.GetBinContent(i)));
257  }
258  }
259  if(up_diff<diff_lim && down_diff<diff_lim) jsobj["smallShapeEff"][sys->name()][sys->bin()][sys->process()]={{"diff_u",up_diff},{"diff_d",down_diff}};
260  }
261  });
262 }
263 
264 void CheckSmallSignals(CombineHarvester& cb,double minSigFrac){
265  auto bins = cb.bin_set();
266  for(auto b : bins){
267  auto cb_bin_signals = cb.cp().bin({b}).signals();
268  auto cb_bin_backgrounds = cb.cp().bin({b}).backgrounds();
269  auto cb_bin = cb.cp().bin({b});
270  double sigrate = cb_bin_signals.GetRate();
271  for(auto p : cb_bin_signals.process_set()){
272  if(cb_bin_signals.cp().process({p}).GetRate() < minSigFrac*sigrate){
273  std::cout<<"Very small signal process. In bin "<<b<<" signal process "<<p<<" has yield "<<cb_bin_signals.cp().process({p}).GetRate()<<". Total signal rate in this bin is "<<sigrate<<std::endl;
274  }
275  }
276  }
277 }
278 
279 
280 void CheckSmallSignals(CombineHarvester& cb, double minSigFrac, json& jsobj){
281  auto bins = cb.bin_set();
282  for(auto b : bins){
283  auto cb_bin_signals = cb.cp().bin({b}).signals();
284  auto cb_bin_backgrounds = cb.cp().bin({b}).backgrounds();
285  auto cb_bin = cb.cp().bin({b});
286  double sigrate = cb_bin_signals.GetRate();
287  for(auto p : cb_bin_signals.process_set()){
288  if(cb_bin_signals.cp().process({p}).GetRate() < minSigFrac*sigrate){
289  jsobj["smallSignalProc"][b][p]={{"sigrate_tot",sigrate},{"procrate",cb_bin_signals.cp().process({p}).GetRate()}};
290  }
291  }
292  }
293 }
294 
295 
296 void ValidateCards(CombineHarvester& cb, std::string const& filename, double maxNormEff, double minSigFrac){
297  json output_js;
298  bool is_shape_card=1;
299  cb.ForEachProc([&](ch::Process *proc){
300  if(proc->pdf()||!(proc->shape())){
301  is_shape_card=0;
302  }
303  });
304  if(is_shape_card){
305  ValidateShapeUncertaintyDirection(cb, output_js);
306  CheckSizeOfShapeEffect(cb, output_js);
307  ValidateShapeTemplates(cb,output_js);
308  CheckEmptyBins(cb,output_js);
309  } else {
310  std::cout<<"Not a shape-based datacard / shape-based datacard using RooDataHist. Skipping checks on systematic shapes."<<std::endl;
311  }
312  CheckEmptyShapes(cb, output_js);
313  CheckNormEff(cb, maxNormEff, output_js);
314  CheckSmallSignals(cb,minSigFrac, output_js);
315  std::ofstream outfile(filename);
316  outfile <<std::setw(4)<<output_js<<std::endl;
317 }
318 
319 }
void ForEachSyst(Function func)
void ForEachProc(Function func)
CombineHarvester & bin(std::vector< std::string > const &vec, bool cond=true)
std::set< std::string > bin_set()
CombineHarvester cp()
Creates and returns a shallow copy of the CombineHarvester instance.
virtual std::string const & process() const
Definition: Object.h:20
virtual std::string const & bin() const
Definition: Object.h:17
RooAbsReal const * pdf() const
Definition: Process.h:60
double rate() const
Definition: Process.h:25
TH1 const * shape() const
Definition: Process.h:52
double value_u() const
Definition: Systematic.h:28
std::string const & type() const
Definition: Systematic.h:25
std::string const & name() const
Definition: Systematic.h:22
double value_d() const
Definition: Systematic.h:31
Definition: Algorithm.h:10
void ValidateShapeTemplates(CombineHarvester &cb, json &jsobj)
void ValidateShapeUncertaintyDirection(CombineHarvester &cb, json &jsobj)
void CheckNormEff(CombineHarvester &cb, double maxNormEff, json &jsobj)
void PrintProc(ch::Process *proc)
void CheckSizeOfShapeEffect(CombineHarvester &cb, json &jsobj)
void ValidateCards(CombineHarvester &cb, std::string const &filename, double maxNormEff, double minSigFrac)
void CheckEmptyBins(CombineHarvester &cb, json &jsobj)
void CheckEmptyShapes(CombineHarvester &cb, json &jsobj)
bool MatchingProcess(T const &first, U const &second)
Definition: Utilities.h:43
nlohmann::json json
void CheckSmallSignals(CombineHarvester &cb, double minSigFrac, json &jsobj)
void PrintSystematic(ch::Systematic *syst)