Source code for lumin.nn.metrics.class_eval
import numpy as np
from typing import List
from .eval_metric import EvalMetric
from ..data.fold_yielder import FoldYielder
from ...evaluation.ams import ams_scan_quick, ams_scan_slow
from ...utils.misc import to_binary_class
__all__ = ['AMS', 'MultiAMS']
[docs]class AMS(EvalMetric):
r'''
Class to compute maximum Approximate Median Significance (https://arxiv.org/abs/1007.1727) using classifier which directly predicts the class of data in a
binary classifiaction problem.
AMS is computed on a single fold of data provided by a :class:`~lumin.nn.data.fold_yielder.FoldYielder` and automatically reweights data by event
multiplicity to account missing weights.
Arguments:
n_total:total number of events in entire data set
wgt_name: name of weight group in fold file to use. N.B. if you have reweighted to balance classes, be sure to use the un-reweighted weights.
targ_name: name of target group in fold file
br: constant bias offset for background yield
syst_unc_b: fractional systematic uncertainty on background yield
use_quick_scan: whether to optimise AMS by the :meth:`~lumin.evaluation.ams.ams_scan_quick` method (fast but suffers floating point precision)
if False use :meth:`~lumin.evaluation.ams.ams_scan_slow` (slower but more accurate)
Examples::
>>> ams_metric = AMS(n_total=250000, br=10, wgt_name='gen_orig_weight')
>>>
>>> ams_metric = AMS(n_total=250000, syst_unc_b=0.1,
... wgt_name='gen_orig_weight', use_quick_scan=False)
'''
def __init__(self, n_total:int, wgt_name:str, targ_name:str='targets', br:float=0, syst_unc_b:float=0, use_quick_scan:bool=True):
super().__init__(targ_name=targ_name, wgt_name=wgt_name)
self.n_total,self.br,self.syst_unc_b,self.use_quick_scan,self.lower_metric_better = n_total,br,syst_unc_b,use_quick_scan,False
[docs] def evaluate(self, fy:FoldYielder, idx:int, y_pred:np.ndarray) -> float:
r'''
Compute maximum AMS on fold using provided predictions.
Arguments:
fy: :class:`~lumin.nn.data.fold_yielder.FoldYielder` interfacing to data
idx: fold index corresponding to fold for which y_pred was computed
y_pred: predictions for fold
Returns:
Maximum AMS computed on reweighted data from fold
Examples::
>>> ams = ams_metric.evaluate(train_fy, val_id, val_preds)
'''
df = self.get_df(fy, idx, y_pred)
if self.use_quick_scan: ams, _ = ams_scan_quick(df, wgt_factor=self.n_total/len(y_pred), br=self.br, syst_unc_b=self.syst_unc_b)
else: ams, _ = ams_scan_slow(df, wgt_factor=self.n_total/len(y_pred), br=self.br, syst_unc_b=self.syst_unc_b, show_prog=False)
return ams
[docs]class MultiAMS(AMS):
r'''
Class to compute maximum Approximate Median Significance (https://arxiv.org/abs/1007.1727) using classifier which predicts the class of data in a multiclass
classifiaction problem which can be reduced to a binary classification problem
AMS is computed on a single fold of data provided by a :class:`~lumin.nn.data.fold_yielder.FoldYielder` and automatically reweights data by event
multiplicity to account missing weights.
Arguments:
n_total:total number of events in entire data set
wgt_name: name of weight group in fold file to use. N.B. if you have reweighted to balance classes, be sure to use the un-reweighted weights.
targ_name: name of target group in fold file which indicates whether the event is signal or background
zero_preds: list of predicted classes which correspond to class 0 in the form pred_[i], where i is a NN output index
one_preds: list of predicted classes which correspond to class 1 in the form pred_[i], where i is a NN output index
br: constant bias offset for background yield
syst_unc_b: fractional systematic uncertainty on background yield
use_quick_scan: whether to optimise AMS by the :meth:`~lumin.evaluation.ams.ams_scan_quick` method (fast but suffers floating point precision)
if False use :meth:`~lumin.evaluation.ams.ams_scan_slow` (slower but more accurate)
Examples::
>>> ams_metric = MultiAMS(n_total=250000, br=10, targ_name='gen_target',
... wgt_name='gen_orig_weight',
... zero_preds=['pred_0', 'pred_1', 'pred_2'],
... one_preds=['pred_3'])
>>>
>>> ams_metric = MultiAMS(n_total=250000, syst_unc_b=0.1,
... targ_name='gen_target',
... wgt_name='gen_orig_weight',
... use_quick_scan=False,
... zero_preds=['pred_0', 'pred_1', 'pred_2'],
... one_preds=['pred_3'])
'''
def __init__(self, n_total:int, wgt_name:str, targ_name:str, zero_preds:List[str], one_preds:List[str], br:float=0, syst_unc_b:float=0,
use_quick_scan:bool=True):
super().__init__(n_total=n_total, br=br, syst_unc_b=syst_unc_b, use_quick_scan=use_quick_scan, targ_name=targ_name, wgt_name=wgt_name)
self.zero_preds,self.one_preds = zero_preds,one_preds
[docs] def evaluate(self, fy:FoldYielder, idx:int, y_pred:np.ndarray) -> float:
r'''
Compute maximum AMS on fold using provided predictions.
Arguments:
fy: :class:`~lumin.nn.data.fold_yielder.FoldYielder` interfacing to data
idx: fold index corresponding to fold for which y_pred was computed
y_pred: predictions for fold
Returns:
Maximum AMS computed on reweighted data from fold
Examples::
>>> ams = ams_metric.evaluate(train_fy, val_id, val_preds)
'''
# TODO: make the zero and one preds more user-friendly
df = self.get_df(fy, idx, y_pred)
to_binary_class(df, self.zero_preds, self.one_preds)
if self.use_quick_scan: ams, _ = ams_scan_quick(df, wgt_factor=self.n_total/len(y_pred), br=self.br, syst_unc_b=self.syst_unc_b)
else: ams, _ = ams_scan_slow(df, wgt_factor=self.n_total/len(y_pred), br=self.br, syst_unc_b=self.syst_unc_b, show_prog=False)
return ams