Shortcuts

Source code for lumin.nn.metrics.class_eval

import numpy as np
from typing import List

from .eval_metric import EvalMetric
from ..data.fold_yielder import FoldYielder
from ...evaluation.ams import ams_scan_quick, ams_scan_slow
from ...utils.misc import to_binary_class

__all__ = ['AMS', 'MultiAMS']


[docs]class AMS(EvalMetric): r''' Class to compute maximum Approximate Median Significance (https://arxiv.org/abs/1007.1727) using classifier which directly predicts the class of data in a binary classifiaction problem. AMS is computed on a single fold of data provided by a :class:`~lumin.nn.data.fold_yielder.FoldYielder` and automatically reweights data by event multiplicity to account missing weights. Arguments: n_total:total number of events in entire data set wgt_name: name of weight group in fold file to use. N.B. if you have reweighted to balance classes, be sure to use the un-reweighted weights. targ_name: name of target group in fold file br: constant bias offset for background yield syst_unc_b: fractional systematic uncertainty on background yield use_quick_scan: whether to optimise AMS by the :meth:`~lumin.evaluation.ams.ams_scan_quick` method (fast but suffers floating point precision) if False use :meth:`~lumin.evaluation.ams.ams_scan_slow` (slower but more accurate) Examples:: >>> ams_metric = AMS(n_total=250000, br=10, wgt_name='gen_orig_weight') >>> >>> ams_metric = AMS(n_total=250000, syst_unc_b=0.1, ... wgt_name='gen_orig_weight', use_quick_scan=False) ''' def __init__(self, n_total:int, wgt_name:str, targ_name:str='targets', br:float=0, syst_unc_b:float=0, use_quick_scan:bool=True): super().__init__(targ_name=targ_name, wgt_name=wgt_name) self.n_total,self.br,self.syst_unc_b,self.use_quick_scan,self.lower_metric_better = n_total,br,syst_unc_b,use_quick_scan,False
[docs] def evaluate(self, fy:FoldYielder, idx:int, y_pred:np.ndarray) -> float: r''' Compute maximum AMS on fold using provided predictions. Arguments: fy: :class:`~lumin.nn.data.fold_yielder.FoldYielder` interfacing to data idx: fold index corresponding to fold for which y_pred was computed y_pred: predictions for fold Returns: Maximum AMS computed on reweighted data from fold Examples:: >>> ams = ams_metric.evaluate(train_fy, val_id, val_preds) ''' df = self.get_df(fy, idx, y_pred) if self.use_quick_scan: ams, _ = ams_scan_quick(df, wgt_factor=self.n_total/len(y_pred), br=self.br, syst_unc_b=self.syst_unc_b) else: ams, _ = ams_scan_slow(df, wgt_factor=self.n_total/len(y_pred), br=self.br, syst_unc_b=self.syst_unc_b, show_prog=False) return ams
[docs]class MultiAMS(AMS): r''' Class to compute maximum Approximate Median Significance (https://arxiv.org/abs/1007.1727) using classifier which predicts the class of data in a multiclass classifiaction problem which can be reduced to a binary classification problem AMS is computed on a single fold of data provided by a :class:`~lumin.nn.data.fold_yielder.FoldYielder` and automatically reweights data by event multiplicity to account missing weights. Arguments: n_total:total number of events in entire data set wgt_name: name of weight group in fold file to use. N.B. if you have reweighted to balance classes, be sure to use the un-reweighted weights. targ_name: name of target group in fold file which indicates whether the event is signal or background zero_preds: list of predicted classes which correspond to class 0 in the form pred_[i], where i is a NN output index one_preds: list of predicted classes which correspond to class 1 in the form pred_[i], where i is a NN output index br: constant bias offset for background yield syst_unc_b: fractional systematic uncertainty on background yield use_quick_scan: whether to optimise AMS by the :meth:`~lumin.evaluation.ams.ams_scan_quick` method (fast but suffers floating point precision) if False use :meth:`~lumin.evaluation.ams.ams_scan_slow` (slower but more accurate) Examples:: >>> ams_metric = MultiAMS(n_total=250000, br=10, targ_name='gen_target', ... wgt_name='gen_orig_weight', ... zero_preds=['pred_0', 'pred_1', 'pred_2'], ... one_preds=['pred_3']) >>> >>> ams_metric = MultiAMS(n_total=250000, syst_unc_b=0.1, ... targ_name='gen_target', ... wgt_name='gen_orig_weight', ... use_quick_scan=False, ... zero_preds=['pred_0', 'pred_1', 'pred_2'], ... one_preds=['pred_3']) ''' def __init__(self, n_total:int, wgt_name:str, targ_name:str, zero_preds:List[str], one_preds:List[str], br:float=0, syst_unc_b:float=0, use_quick_scan:bool=True): super().__init__(n_total=n_total, br=br, syst_unc_b=syst_unc_b, use_quick_scan=use_quick_scan, targ_name=targ_name, wgt_name=wgt_name) self.zero_preds,self.one_preds = zero_preds,one_preds
[docs] def evaluate(self, fy:FoldYielder, idx:int, y_pred:np.ndarray) -> float: r''' Compute maximum AMS on fold using provided predictions. Arguments: fy: :class:`~lumin.nn.data.fold_yielder.FoldYielder` interfacing to data idx: fold index corresponding to fold for which y_pred was computed y_pred: predictions for fold Returns: Maximum AMS computed on reweighted data from fold Examples:: >>> ams = ams_metric.evaluate(train_fy, val_id, val_preds) ''' # TODO: make the zero and one preds more user-friendly df = self.get_df(fy, idx, y_pred) to_binary_class(df, self.zero_preds, self.one_preds) if self.use_quick_scan: ams, _ = ams_scan_quick(df, wgt_factor=self.n_total/len(y_pred), br=self.br, syst_unc_b=self.syst_unc_b) else: ams, _ = ams_scan_slow(df, wgt_factor=self.n_total/len(y_pred), br=self.br, syst_unc_b=self.syst_unc_b, show_prog=False) return ams
Read the Docs v: v0.3.1
Versions
latest
stable
v0.3.2
v0.3.1
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.

Docs

Access comprehensive developer and user documentation for LUMIN

View Docs

Tutorials

Get tutorials for beginner and advanced researchers demonstrating many of the features of LUMIN

View Tutorials