Shortcuts

Source code for lumin.evaluation.ams

import numpy as np
import pandas as pd
from typing import Tuple
from fastprogress import progress_bar

import torch
from torch import Tensor

__all__ = ['calc_ams', 'calc_ams_torch', 'ams_scan_quick', 'ams_scan_slow']


[docs]def calc_ams(s:float, b:float, br:float=0, unc_b:float=0) -> float: r''' Compute Approximate Median Significance (https://arxiv.org/abs/1007.1727) Arguments: s: signal weight b: background weight br: background offset bias unc_b: fractional systemtatic uncertainty on background Returns: Approximate Median Significance if b > 0 else -1 ''' if b == 0: return -1 if not unc_b: radicand = 2*((s+b+br)*np.log(1.0+s/(b+br))-s) else: sigma_b_2 = np.square(unc_b*b) radicand = 2*(((s+b)*np.log((s+b)*(b+sigma_b_2)/((b**2)+((s+b)*sigma_b_2))))-(((b**2)/sigma_b_2)*np.log(1+((sigma_b_2*s)/(b*(b+sigma_b_2)))))) return np.sqrt(radicand) if radicand > 0 else -1
[docs]def calc_ams_torch(s:Tensor, b:Tensor, br:float=0, unc_b:float=0) -> Tensor: r''' Compute Approximate Median Significance (https://arxiv.org/abs/1007.1727) using Tensor inputs Arguments: s: signal weight b: background weight br: background offset bias unc_b: fractional systemtatic uncertainty on background Returns: Approximate Median Significance if b > 0 else 1e-18 * s ''' '''Compute Approximate Median Significance with torch for signal (background) weight s (b), fractional systemtatic uncertainty unc_b, and offset br''' if b == 0: return 1e-18*s if not unc_b: radicand = 2*((s+b+br)*torch.log(1.0+s/(b+br))-s) else: sigma_b_2 = torch.square(unc_b*b) radicand = 2*(((s+b)*torch.log((s+b)*(b+sigma_b_2)/((b**2)+((s+b)*sigma_b_2))))-(((b**2)/sigma_b_2)*torch.log(1+((sigma_b_2*s)/(b*(b+sigma_b_2)))))) return torch.sqrt(radicand) if radicand > 0 else 1e-18*s
[docs]def ams_scan_quick(df:pd.DataFrame, wgt_factor:float=1, br:float=0, syst_unc_b:float=0, pred_name:str='pred', targ_name:str='gen_target', wgt_name:str='gen_weight') -> Tuple[float,float]: r''' Scan accross a range of possible prediction thresholds in order to maximise the Approximate Median Significance (https://arxiv.org/abs/1007.1727). Note that whilst this method is quicker than :meth:`~lumin.evaluation.ams.ams_scan_slow`, it sufferes from float precison. Not recommended for final evaluation. Arguments: df: DataFrame containing prediction data wgt_factor: factor to reweight signal and background weights br: background offset bias syst_unc_b: fractional systemtatic uncertainty on background pred_name: column to use as predictions targ_name: column to use as truth labels for signal and background wgt_name: column to use as weights for signal and background events Returns: maximum AMS prediction threshold corresponding to maximum AMS ''' max_ams, threshold = 0, 0.0 df = df.sort_values(by=[pred_name]) s = np.sum(df.loc[(df[targ_name] == 1), wgt_name]) b = np.sum(df.loc[(df[targ_name] == 0), wgt_name]) for i, cut in enumerate(df[pred_name]): ams = calc_ams(max(0, s*wgt_factor), max(0, b*wgt_factor), br, syst_unc_b) if ams > max_ams: max_ams, threshold = ams, cut if df[targ_name].values[i]: s -= df[wgt_name].values[i] else: b -= df[wgt_name].values[i] return max_ams, threshold
[docs]def ams_scan_slow(df:pd.DataFrame, wgt_factor:float=1, br:float=0, syst_unc_b:float=0, use_stat_unc:bool=False, start_cut:float=0.9, min_events:int=10, pred_name:str='pred', targ_name:str='gen_target', wgt_name:str='gen_weight', show_prog:bool=True) -> Tuple[float,float]: r''' Scan accross a range of possible prediction thresholds in order to maximise the Approximate Median Significance (https://arxiv.org/abs/1007.1727). Note that whilst this method is slower than :meth:`~lumin.evaluation.ams.ams_scan_quick`, it does not suffer as much from float precison. Additionally it allows one to account for statistical uncertainty in AMS calculation. Arguments: df: DataFrame containing prediction data wgt_factor: factor to reweight signal and background weights br: background offset bias syst_unc_b: fractional systemtatic uncertainty on background use_stat_unc: whether to account for the statistical uncertainty on the background start_cut: minimum prediction to consider; useful for speeding up scan min_events: minimum number of background unscaled events required to pass threshold pred_name: column to use as predictions targ_name: column to use as truth labels for signal and background wgt_name: column to use as weights for signal and background events show_prog: whether to display progress and ETA of scan Returns: maximum AMS prediction threshold corresponding to maximum AMS ''' max_ams, threshold = 0, 0.0 sig, bkg = df[df[targ_name] == 1], df[df[targ_name] == 0] syst_unc_b2 = np.square(syst_unc_b) for i, cut in enumerate(progress_bar(df.loc[df[pred_name] >= start_cut, pred_name].values, display=show_prog, leave=show_prog)): bkg_pass = bkg.loc[(bkg[pred_name] >= cut), wgt_name] n_bkg = len(bkg_pass) if n_bkg < min_events: continue s = np.sum(sig.loc[(sig[pred_name] >= cut), wgt_name]) b = np.sum(bkg_pass) if use_stat_unc: unc_b = np.sqrt(syst_unc_b2+(1/n_bkg)) else: unc_b = syst_unc_b ams = calc_ams(s*wgt_factor, b*wgt_factor, br, unc_b) if ams > max_ams: max_ams, threshold = ams, cut return max_ams, threshold
Read the Docs v: v0.7.0
Versions
latest
stable
v0.7.0
v0.6.0
v0.5.1
v0.5.0
v0.4.0.1
v0.3.1
Downloads
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.

Docs

Access comprehensive developer and user documentation for LUMIN

View Docs

Tutorials

Get tutorials for beginner and advanced researchers demonstrating many of the features of LUMIN

View Tutorials