Shortcuts

Source code for lumin.data_processing.hep_proc

import numpy as np
import pandas as pd
from typing import List, Dict, Tuple, Union, Optional, Set
import warnings

__all__ = ['to_cartesian', 'to_pt_eta_phi', 'delta_phi', 'twist', 'add_abs_mom', 'add_mass', 'add_energy', 'add_mt', 'get_vecs', 'fix_event_phi', 'fix_event_z',
           'fix_event_y', 'event_to_cartesian', 'proc_event', 'calc_pair_mass']

'''
Todo:
- Add non inplace versions/options
'''


[docs]def to_cartesian(df:pd.DataFrame, vec:str, drop:bool=False) -> None: r''' Vectoriesed conversion of 3-momenta to Cartesian coordinates inplace, optionally dropping old pT,eta,phi features Arguments: df: DataFrame to alter vec: column prefix of vector components to alter, e.g. 'muon' for columns ['muon_pt', 'muon_phi', 'muon_eta'] drop: Whether to remove original columns and just keep the new ones ''' z = f'{vec}_eta' in df.columns try: pt = df[f'{vec}_pT'] pt_name = f'{vec}_pT' except KeyError: pt = df[f'{vec}_pt'] pt_name = f'{vec}_pt' if z: eta = df[f'{vec}_eta'] phi = df[f'{vec}_phi'] df[f'{vec}_px'] = pt*np.cos(phi) df[f'{vec}_py'] = pt*np.sin(phi) if z: df[f'{vec}_pz'] = pt*np.sinh(eta) if drop: df.drop(columns=[pt_name, f"{vec}_phi"], inplace=True) if z: df.drop(columns=[f"{vec}_eta"], inplace=True)
[docs]def to_pt_eta_phi(df:pd.DataFrame, vec:str, eta:Optional[bool]=None, drop:bool=False) -> None: r''' Vectoriesed conversion of 3-momenta to pT,eta,phi coordinates inplace, optionally dropping old px,py,pz features .. Attention:: eta is now deprecieated as it is now infered from `df`. Will be removed in `V0.4` Arguments: df: DataFrame to alter vec: column prefix of vector components to alter, e.g. 'muon' for columns ['muon_px', 'muon_py', 'muon_pz'] eta: depreciated as now infered drop: Whether to remove original columns and just keep the new ones ''' # XXX Remove in v0.4 if eta is not None: warnings.warn('''Passing eta is now no longer required, it is infered from DF. The eta argument is now depreciated and will be removed in v0.4''') eta = f'{vec}_pz' in df.columns px = df[f"{vec}_px"] py = df[f"{vec}_py"] if eta: pz = df[f"{vec}_pz"] df[f'{vec}_pT'] = np.sqrt(np.square(px) + np.square(py)) if eta: df[f'{vec}_eta'] = np.arcsinh(pz/df[f'{vec}_pT']) df[f'{vec}_phi'] = np.arcsin(py/df[f'{vec}_pT']) df.loc[(df[f"{vec}_px"] < 0) & (df[f"{vec}_py"] > 0), f'{vec}_phi'] = np.pi-df.loc[(df[f"{vec}_px"] < 0) & (df[f"{vec}_py"] > 0), f'{vec}_phi'] df.loc[(df[f"{vec}_px"] < 0) & (df[f"{vec}_py"] < 0), f'{vec}_phi'] = -(np.pi+df.loc[(df[f"{vec}_px"] < 0) & (df[f"{vec}_py"] < 0), f'{vec}_phi']) df.loc[(df[f"{vec}_px"] < 0) & (df[f"{vec}_py"] == 0), f'{vec}_phi'] = \ np.random.choice((-np.pi, np.pi), df[(df[f"{vec}_px"] < 0) & (df[f"{vec}_py"] == 0)].shape[0]) if drop: df.drop(columns=[f"{vec}_px", f"{vec}_py"], inplace=True) if eta: df.drop(columns=[f"{vec}_pz"], inplace=True)
[docs]def delta_phi(arr_a:Union[float,np.ndarray], arr_b:Union[float,np.ndarray]) -> Union[float,np.ndarray]: r''' Vectorised compututation of modulo 2pi angular seperation of array of angles b from array of angles a, in range [-pi,pi] Arguments: arr_a: reference angles arr_b: final angles Returns: angular separation as float or np.array ''' df = pd.DataFrame() # Better way to do this without df? df['dphi'] = arr_b-arr_a while len(df[df.dphi > np.pi]) > 0: df.loc[df.dphi > np.pi, 'dphi'] -= 2*np.pi while len(df[df.dphi < -np.pi]) > 0: df.loc[df.dphi < -np.pi, 'dphi'] += 2*np.pi return df.dphi.values
[docs]def twist(dphi:Union[float,np.ndarray], deta:Union[float,np.ndarray]) -> Union[float,np.ndarray]: r''' Vectorised computation of twist between vectors (https://arxiv.org/abs/1010.3698) Arguments: dphi: delta phi separations deta: delta eta separations Returns: angular separation as float or np.array ''' return np.arctan(np.abs(dphi/deta))
[docs]def add_abs_mom(df:pd.DataFrame, vec:str, z:bool=True) -> None: r''' Vectorised computation 3-momenta magnitude, adding new column in place. Currently only works for Cartesian vectors Arguments: df: DataFrame to alter vec: column prefix of vector components, e.g. 'muon' for columns ['muon_px', 'muon_py', 'muon_pz'] z: whether to consider the z-component of the momenta ''' # TODO extend to work on pT, eta, phi vectors if z: df[f'{vec}_absp'] = np.sqrt(np.square(df[f'{vec}_px'])+np.square(df[f'{vec}_py'])+np.square(df[f'{vec}_pz'])) else: df[f'{vec}_absp'] = np.sqrt(np.square(df[f'{vec}_px'])+np.square(df[f'{vec}_py']))
[docs]def add_mass(df:pd.DataFrame, vec:str) -> None: r''' Vectorised computation of mass of 4-vector, adding new column in place. Arguments: df: DataFrame to alter vec: column prefix of vector components, e.g. 'muon' for columns ['muon_px', 'muon_py', 'muon_pz'] ''' if f'{vec}_absp' not in df.columns: add_abs_mom(df, vec) df[f'{vec}_mass'] = np.sqrt(np.square(df[f'{vec}_E'])-np.square(df[f'{vec}_absp']))
[docs]def add_energy(df:pd.DataFrame, vec:str) -> None: r''' Vectorised computation of energy of 4-vector, adding new column in place. Arguments: df: DataFrame to alter vec: column prefix of vector components, e.g. 'muon' for columns ['muon_px', 'muon_py', 'muon_pz'] ''' if f'{vec}_absp' not in df.columns: add_abs_mom(df, vec) df[f'{vec}_E'] = np.sqrt(np.square(df[f'{vec}_mass'])+np.square(df[f'{vec}_absp']))
[docs]def add_mt(df:pd.DataFrame, vec:str, mpt_name:str='mpt'): r''' Vectorised computation of transverse mass of 4-vector with respect to missing transverse momenta, adding new column in place. Currently only works for pT, eta, phi vectors Arguments: df: DataFrame to alter vec: column prefix of vector components, e.g. 'muon' for columns ['muon_px', 'muon_py', 'muon_pz'] mpt_name: column prefix of vector of missing transverse momenta components, e.g. 'mpt' for columns ['mpt_pT', 'mpt_phi'] ''' # TODO: extend to work on Cartesian coordinates try: df[f'{vec}_mT'] = np.sqrt(2*df[f'{vec}_pT']*df[f'{mpt_name}_pT']*(1-np.cos(delta_phi(df[f'{vec}_phi'], df[f'{mpt_name}_phi'])))) except KeyError: df[f'{vec}_mt'] = np.sqrt(2*df[f'{vec}_pt']*df[f'{mpt_name}_pt']*(1-np.cos(delta_phi(df[f'{vec}_phi'], df[f'{mpt_name}_phi']))))
[docs]def get_vecs(feats:List[str], strict:bool=True) -> Set[str]: r''' Filter list of features to get list of 3-momenta defined in the list. Works for both pT, eta, phi and Cartesian coordinates. If strict, return only vectors with all coordinates present in feature list. Arguments: feats: list of features to filter strict: whether to require all 3-momenta components to be present in the list Returns: set of unique 3-momneta prefixes ''' low = [f.lower() for f in feats] all_vecs = [f for f in feats if (f.lower().endswith('_pt') or f.lower().endswith('_phi') or f.lower().endswith('_eta')) or (f.lower().endswith('_px') or f.lower().endswith('_py') or f.lower().endswith('_pz'))] if not strict: return set([v[:v.rfind('_')] for v in all_vecs]) vecs = [v[:v.rfind('_')] for v in all_vecs if (f'{v[:v.rfind("_")]}_pt'.lower() in low and f'{v[:v.rfind("_")]}_phi'.lower() in low) or (f'{v[:v.rfind("_")]}_px'.lower() in low and f'{v[:v.rfind("_")]}_py'.lower() in low)] return set(vecs)
[docs]def fix_event_phi(df:pd.DataFrame, ref_vec:str) -> None: r''' Rotate event in phi such that ref_vec is at phi == 0. Performed inplace. Currently only works on vectors defined in pT, eta, phi Arguments: df: DataFrame to alter ref_vec: column prefix of vector components to use as reference, e.g. 'muon' for columns ['muon_pT', 'muon_eta', 'muon_phi'] ''' # TODO: extend to work on Cartesian coordinates for v in get_vecs(df.columns): if v != ref_vec: df[f'{v}_phi'] = delta_phi(df[f'{ref_vec}_phi'], df[f'{v}_phi']) df[f'{ref_vec}_phi'] = 0
[docs]def fix_event_z(df:pd.DataFrame, ref_vec:str) -> None: r''' Flip event in z-axis such that ref_vec is in positive z-direction. Performed inplace. Works for both pT, eta, phi and Cartesian coordinates. Arguments: df: DataFrame to alter ref_vec: column prefix of vector components to use as reference, e.g. 'muon' for columns ['muon_pT', 'muon_eta', 'muon_phi'] ''' if f'{ref_vec}_eta' in df.columns: cut = (df[f'{ref_vec}_eta'] < 0) for v in get_vecs(df.columns): try: df.loc[cut, f'{v}_eta'] = -df.loc[cut, f'{v}_eta'] except KeyError: print(f'eta component of {v} not found') else: cut = cut = (df[f'{ref_vec}_pz'] < 0) for v in get_vecs(df.columns): try: df.loc[cut, f'{v}_pz'] = -df.loc[cut, f'{v}_pz'] except KeyError: print(f'pz component of {v} not found')
[docs]def fix_event_y(df:pd.DataFrame, ref_vec_0:str, ref_vec_1:str) -> None: r''' Flip event in y-axis such that ref_vec_1 has a higher py than ref_vec_0. Performed in place. Works for both pT, eta, phi and Cartesian coordinates. Arguments: df: DataFrame to alter ref_vec_0: column prefix of vector components to use as reference 0, e.g. 'muon' for columns ['muon_pT', 'muon_eta', 'muon_phi'] ref_vec_1: column prefix of vector components to use as reference 1, e.g. 'muon' for columns ['muon_pT', 'muon_eta', 'muon_phi'] ''' if f'{ref_vec_1}_phi' in df.columns: cut = (df[f'{ref_vec_1}_phi'] < 0) for v in get_vecs(df.columns): if v != ref_vec_0: df.loc[cut, f'{v}_phi'] = -df.loc[cut, f'{v}_phi'] else: cut = (df[f'{ref_vec_1}_py'] < 0) for v in get_vecs(df.columns): if v != ref_vec_0: df.loc[cut, f'{v}_py'] = -df.loc[cut, f'{v}_py']
[docs]def event_to_cartesian(df:pd.DataFrame, drop:bool=False, ignore:Optional[List[str]]=None) -> None: r''' Convert entire event to Cartesian coordinates, except vectors listed in ignore. Optionally, drop old pT,eta,phi features. Perfomed inplace. Arguments: df: DataFrame to alter drop: whether to drop old coordinates ignore: vectors to ignore when converting ''' for v in get_vecs(df.columns): if ignore is None or v not in ignore: to_cartesian(df, v, drop=drop)
[docs]def proc_event(df:pd.DataFrame, fix_phi:bool=False, fix_y=False, fix_z=False, use_cartesian=False, ref_vec_0:str=None, ref_vec_1:str=None, keep_feats:Optional[List[str]]=None, default_vals:Optional[List[str]]=None) -> None: r''' Process event: Pass data through inplace various conversions and drop uneeded columns. Data expected to consist of vectors defined in pT, eta, phi. Arguments: df: DataFrame to alter fix_phi: whether to rotate events using :meth:`~lumin.data_prcoessing.hep.proc.fix_event_phi` fix_y: whether to flip events using :meth:`~lumin.data_prcoessing.hep.proc.fix_event_y` fix_z: whether to flip events using :meth:`~lumin.data_prcoessing.hep.proc.fix_event_z` use_cartesian: wether to convert vectors to Cartesian coordinates ref_vec_0: column prefix of vector components to use as reference (0) for :meth:~lumin.data_prcoessing.hep.proc.fix_event_phi`, :meth:`~lumin.data_prcoessing.hep.proc.fix_event_y`, and :meth:`~lumin.data_prcoessing.hep.proc.fix_event_z` e.g. 'muon' for columns ['muon_pT', 'muon_eta', 'muon_phi'] ref_vec_1: column prefix of vector components to use as reference 1 for :meth:`~lumin.data_prcoessing.hep.proc.fix_event_z`, e.g. 'muon' for columns ['muon_pT', 'muon_eta', 'muon_phi'] keep_feats: columns to keep which would otherwise be dropped default_vals: list of default values which might be used to represent missing vector components. These will be replaced with np.nan. ''' df.replace([np.inf, -np.inf]+default_vals if default_vals is not None else [np.inf, -np.inf], np.nan, inplace=True) if keep_feats is not None: for f in keep_feats: df[f'{f}keep'] = df[f'{f}'] if fix_phi: print(f'Setting {ref_vec_0} to phi = 0') fix_event_phi(df, ref_vec_0) if fix_y: print(f'Setting {ref_vec_1} to positve phi') fix_event_y(df, ref_vec_0, ref_vec_1) if fix_z: print(f'Setting {ref_vec_0} to positive eta') fix_event_z(df, ref_vec_0) if use_cartesian: print("Converting to use_cartesian coordinates") event_to_cartesian(df, drop=True) if fix_phi and not use_cartesian: df.drop(columns=[f"{ref_vec_0}_phi"], inplace=True) elif fix_phi and use_cartesian: df.drop(columns=[f"{ref_vec_0}_py"], inplace=True) if keep_feats is not None: for f in keep_feats: df[f'{f}'] = df[f'{f}keep'] df.drop(columns=[f'{f}keep'], inplace=True)
[docs]def calc_pair_mass(df:pd.DataFrame, masses:Union[Tuple[float,float],Tuple[np.ndarray,np.ndarray]], feat_map:Dict[str,str]) -> np.ndarray: r''' Vectorised computation of invarient mass of pair of particles with given masses, using 3-momenta. Only works for vectors defined in Cartesian coordinates. Arguments: df: DataFrame vector components masses: tuple of masses of particles (either constant or different pair of masses per pair of particles) feat_map: dictionary mapping of requested momentum components to the features in df Returns: np.array of invarient masses ''' # TODO: rewrite to not use a DataFrame for holding parent vector # TODO: add inplace option # TODO: extend to work on pT, eta, phi coordinates tmp = pd.DataFrame() tmp['0_E'] = np.sqrt((masses[0]**2)+np.square(df.loc[:, feat_map['0_px']])+np.square(df.loc[:, feat_map['0_py']])+np.square(df.loc[:, feat_map['0_pz']])) tmp['1_E'] = np.sqrt((masses[1]**2)+np.square(df.loc[:, feat_map['1_px']])+np.square(df.loc[:, feat_map['1_py']])+np.square(df.loc[:, feat_map['1_pz']])) tmp['p_px'] = df.loc[:, feat_map['0_px']]+df.loc[:, feat_map['1_px']] tmp['p_py'] = df.loc[:, feat_map['0_py']]+df.loc[:, feat_map['1_py']] tmp['p_pz'] = df.loc[:, feat_map['0_pz']]+df.loc[:, feat_map['1_pz']] tmp['p_E'] = tmp.loc[:, '0_E']+tmp.loc[:, '1_E'] tmp['p_p2'] = np.square(tmp.loc[:, 'p_px'])+np.square(tmp.loc[:, 'p_py'])+np.square(tmp.loc[:, 'p_pz']) tmp['p_mass'] = np.sqrt(np.square(tmp.loc[:, 'p_E'])-tmp.loc[:, 'p_p2']) return tmp.p_mass.values
Read the Docs v: v0.3.1
Versions
latest
stable
v0.3.2
v0.3.1
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.

Docs

Access comprehensive developer and user documentation for LUMIN

View Docs

Tutorials

Get tutorials for beginner and advanced researchers demonstrating many of the features of LUMIN

View Tutorials