Shortcuts

Source code for lumin.evaluation.ams

from typing import Tuple

import numpy as np
import pandas as pd
import torch
from fastprogress import progress_bar
from torch import Tensor

__all__ = ["calc_ams", "calc_ams_torch", "ams_scan_quick", "ams_scan_slow"]


[docs]def calc_ams(s: float, b: float, br: float = 0, unc_b: float = 0) -> float: r""" Compute Approximate Median Significance (https://arxiv.org/abs/1007.1727) Arguments: s: signal weight b: background weight br: background offset bias unc_b: fractional systemtatic uncertainty on background Returns: Approximate Median Significance if b > 0 else -1 """ if b == 0: return -1 if not unc_b: radicand = 2 * ((s + b + br) * np.log(1.0 + s / (b + br)) - s) else: sigma_b_2 = np.square(unc_b * b) radicand = 2 * ( ((s + b) * np.log((s + b) * (b + sigma_b_2) / ((b**2) + ((s + b) * sigma_b_2)))) - (((b**2) / sigma_b_2) * np.log(1 + ((sigma_b_2 * s) / (b * (b + sigma_b_2))))) ) return np.sqrt(radicand) if radicand > 0 else -1
[docs]def calc_ams_torch(s: Tensor, b: Tensor, br: float = 0, unc_b: float = 0) -> Tensor: r""" Compute Approximate Median Significance (https://arxiv.org/abs/1007.1727) using Tensor inputs Arguments: s: signal weight b: background weight br: background offset bias unc_b: fractional systemtatic uncertainty on background Returns: Approximate Median Significance if b > 0 else 1e-18 * s """ """Compute Approximate Median Significance with torch for signal (background) weight s (b), fractional systemtatic uncertainty unc_b, and offset br""" if b == 0: return 1e-18 * s if not unc_b: radicand = 2 * ((s + b + br) * torch.log(1.0 + s / (b + br)) - s) else: sigma_b_2 = torch.square(unc_b * b) radicand = 2 * ( ((s + b) * torch.log((s + b) * (b + sigma_b_2) / ((b**2) + ((s + b) * sigma_b_2)))) - (((b**2) / sigma_b_2) * torch.log(1 + ((sigma_b_2 * s) / (b * (b + sigma_b_2))))) ) return torch.sqrt(radicand) if radicand > 0 else 1e-18 * s
[docs]def ams_scan_quick( df: pd.DataFrame, wgt_factor: float = 1, br: float = 0, syst_unc_b: float = 0, pred_name: str = "pred", targ_name: str = "gen_target", wgt_name: str = "gen_weight", ) -> Tuple[float, float]: r""" Scan across a range of possible prediction thresholds in order to maximise the Approximate Median Significance (https://arxiv.org/abs/1007.1727). Note that whilst this method is quicker than :meth:`~lumin.evaluation.ams.ams_scan_slow`, it sufferes from float precison. Not recommended for final evaluation. Arguments: df: DataFrame containing prediction data wgt_factor: factor to reweight signal and background weights br: background offset bias syst_unc_b: fractional systemtatic uncertainty on background pred_name: column to use as predictions targ_name: column to use as truth labels for signal and background wgt_name: column to use as weights for signal and background events Returns: maximum AMS prediction threshold corresponding to maximum AMS """ max_ams, threshold = 0, 0.0 df = df.sort_values(by=[pred_name]) s = np.sum(df.loc[(df[targ_name] == 1), wgt_name]) b = np.sum(df.loc[(df[targ_name] == 0), wgt_name]) for i, cut in enumerate(df[pred_name]): ams = calc_ams(max(0, s * wgt_factor), max(0, b * wgt_factor), br, syst_unc_b) if ams > max_ams: max_ams, threshold = ams, cut if df[targ_name].values[i]: s -= df[wgt_name].values[i] else: b -= df[wgt_name].values[i] return max_ams, threshold
[docs]def ams_scan_slow( df: pd.DataFrame, wgt_factor: float = 1, br: float = 0, syst_unc_b: float = 0, use_stat_unc: bool = False, start_cut: float = 0.9, min_events: int = 10, pred_name: str = "pred", targ_name: str = "gen_target", wgt_name: str = "gen_weight", show_prog: bool = True, ) -> Tuple[float, float]: r""" Scan across a range of possible prediction thresholds in order to maximise the Approximate Median Significance (https://arxiv.org/abs/1007.1727). Note that whilst this method is slower than :meth:`~lumin.evaluation.ams.ams_scan_quick`, it does not suffer as much from float precison. Additionally it allows one to account for statistical uncertainty in AMS calculation. Arguments: df: DataFrame containing prediction data wgt_factor: factor to reweight signal and background weights br: background offset bias syst_unc_b: fractional systemtatic uncertainty on background use_stat_unc: whether to account for the statistical uncertainty on the background start_cut: minimum prediction to consider; useful for speeding up scan min_events: minimum number of background unscaled events required to pass threshold pred_name: column to use as predictions targ_name: column to use as truth labels for signal and background wgt_name: column to use as weights for signal and background events show_prog: whether to display progress and ETA of scan Returns: maximum AMS prediction threshold corresponding to maximum AMS """ max_ams, threshold = 0, 0.0 sig, bkg = df[df[targ_name] == 1], df[df[targ_name] == 0] syst_unc_b2 = np.square(syst_unc_b) for i, cut in enumerate( progress_bar(df.loc[df[pred_name] >= start_cut, pred_name].values, display=show_prog, leave=show_prog) ): bkg_pass = bkg.loc[(bkg[pred_name] >= cut), wgt_name] n_bkg = len(bkg_pass) if n_bkg < min_events: continue s = np.sum(sig.loc[(sig[pred_name] >= cut), wgt_name]) b = np.sum(bkg_pass) if use_stat_unc: unc_b = np.sqrt(syst_unc_b2 + (1 / n_bkg)) else: unc_b = syst_unc_b ams = calc_ams(s * wgt_factor, b * wgt_factor, br, unc_b) if ams > max_ams: max_ams, threshold = ams, cut return max_ams, threshold

Docs

Access comprehensive developer and user documentation for LUMIN

View Docs

Tutorials

Get tutorials for beginner and advanced researchers demonstrating many of the features of LUMIN

View Tutorials