Source code for lumin.nn.callbacks.data_callbacks

from typing import Union, Tuple, List, Optional
import numpy as np
from fastcore.all import is_listy, store_attr
from abc import abstractmethod, ABCMeta

import torch
from torch import Tensor

from .callback import Callback
from import BatchYielder

__all__ = ['BinaryLabelSmooth', 'BootstrapResample', 'ParametrisedPrediction', 'TargReplace']

[docs]class BinaryLabelSmooth(Callback): r''' Callback for applying label smoothing to binary classes, based on Applies smoothing during both training. Arguments: coefs: Smoothing coefficients: 0->coef[0] 1->1-coef[1]. if passed float, coef[0]=coef[1] Examples:: >>> lbl_smooth = BinaryLabelSmooth(0.1) >>> >>> lbl_smooth = BinaryLabelSmooth((0.1, 0.02)) ''' def __init__(self, coefs:Union[float,Tuple[float,float]]=0): super().__init__() self.coefs = coefs if isinstance(coefs, tuple) else (coefs, coefs)
[docs] def on_fold_begin(self) -> None: r''' Apply smoothing ''' if self.model.fit_params.state != 'train': return = m = == 0[m] = self.coefs[0][m] = 1-self.coefs[1]
[docs]class BootstrapResample(Callback): r''' Callback for bootstrap sampling new training datasets from original training data during (ensemble) training. Arguments: n_folds: the number of folds present in training :class:`` bag_each_time: whether to sample a new set for each sub-epoch or to use the same sample each time reweight: whether to reweight the sampleed data to mathch the weight sum (per class) of the original data Examples:: >>> bs_resample BootstrapResample(n_folds=len(train_fy)) ''' def __init__(self, n_folds:int, bag_each_time:bool=False, reweight:bool=True): super().__init__() self.n_trn_flds,self.bag_each_time,self.reweight = n_folds-1,bag_each_time,reweight def _get_sample(self, length:int) -> np.ndarray: return np.random.choice(range(length), length, replace=True) def _resample(self, sample:np.ndarray, by:BatchYielder) -> None: # Get weight sums before resampling if by.weights is not None and self.reweight: if 'class' in self.model.objective: weight_sum = {} for c in torch.unique(by.targets.squeeze()): weight_sum[c] = torch.sum(by.weights[by.targets.squeeze() == c]) else: weight_sum = torch.sum(by.weights) # Resample by.inputs[...] = by.inputs[sample] by.targets[...] = by.targets[sample] if by.weights is not None: by.weights[...] = by.weights[sample] # Reweight if self.reweight: if 'class' in self.model.objective: for c in weight_sum: by.weights[by.targets.squeeze() == c] *= weight_sum[c]/torch.sum(by.weights[by.targets.squeeze() == c]) else: by.weights *= weight_sum/torch.sum(by.weights)
[docs] def on_train_begin(self) -> None: r''' Resets internal parameters to prepare for a new training ''' super().on_train_begin() self.iter,self.samples = 0,[] np.random.seed() # Is this necessary?
[docs] def on_fold_begin(self) -> None: r''' Resamples training data for new epoch ''' if self.model.fit_params.state != 'train': return if self.bag_each_time or self.iter < self.n_trn_flds: sample = self._get_sample(len( if not self.bag_each_time: self.samples.append(sample) else: sample = self.samples[self.iter % self.n_trn_flds] self.iter += 1 self._resample(sample,
[docs]class ParametrisedPrediction(Callback): r''' Callback for running predictions for a parametersied network (; one which has been trained using one of more inputs which represent e.g. different hypotheses for the classes such as an unknown mass of some new particle. In such a scenario, multiple signal datasets could be used for training, with background receiving a random mass. During prediction one then needs to set these parametrisation features all to the same values to evaluat the model's response for that hypothesis. This callback can be passed to the predict method of the model/ensemble to adjust the parametrisation features to the desired values. Arguments: feats: list of feature names used during training (in the same order) param_feat: the feature name which is to be adjusted, or a list of features to adjust param_val: the value to which to set the paramertisation feature, of the list of values to set the parameterisation features to Examples:: >>> mass_param = ParametrisedPrediction(train_feats, 'res_mass', 300) >>> model.predict(fold_yeilder, pred_name=f'pred_mass_300', callbacks=[mass_param]) >>> >>> mass_param = ParametrisedPrediction(train_feats, 'res_mass', 300) >>> spin_param = ParametrisedPrediction(train_feats, 'spin', 1) >>> model.predict(fold_yeilder, pred_name=f'pred_mass_300', callbacks=[mass_param, spin_param]) ''' def __init__(self, feats:List[str], param_feat:Union[List[str],str], param_val:Union[List[float],float]): super().__init__() if not is_listy(param_feat): param_feat = [param_feat] if not is_listy(param_val): param_val = [param_val] self.param_val = list(param_val) self.param_idx = [feats.index(f) for f in param_feat]
[docs] def on_pred_begin(self) -> None: r''' Adjusts the data to be passed to the model by setting in place the parameterisation feature to the preset value ''' for f, v in zip(self.param_idx, self.param_val):[:, f] = v
[docs]class TargReplace(Callback): r''' Callback to replace target data with requested data from foldfile, allowing one to e.g. train two models simultaneously with the same inputs but different targets for e.g. adversarial training. At the end of validation epochs, the target data is swapped back to the original target data, to allow for the correct computation of any metrics Arguments: targ_feats: list of column names in foldfile to get and horizontally stack to replace target data in current :class:`` Examples:: >>> targ_replace = TargReplace(['is_fake']) >>> targ_replace = TargReplace(['class', 'is_fake']) ''' def __init__(self, targ_feats:List[str]): store_attr() super().__init__() if not is_listy(self.targ_feats): self.targ_feats = list(self.targ_feats)
[docs] def on_fold_begin(self) -> None: r''' Stack new target datasets and replace in target data in current :class:`` ''' targs = [] idx = self.model.fit_params.trn_idx if self.model.fit_params.state == 'train' else self.model.fit_params.val_idx for t in self.targ_feats: targs.append(self.model.fit_params.fy.get_column(t, n_folds=1, fold_idx=idx, add_newaxis=True)) = np.hstack(targs)
[docs] def on_epoch_end(self) -> None: r''' Swap target data back at the end of validation epochs ''' if self.model.fit_params.state != 'valid': return = self.model.fit_params.fy.get_column('targets', n_folds=1, fold_idx=self.model.fit_params.val_idx, add_newaxis=True)
class AbsWeightData(Callback, metaclass=ABCMeta): r''' Callback to weight folds of data accoridng to a function of the inputs or targets. Inherit and override the `weight_func` method according to your task. Arguments: on_eval: if true, also weight data during validation and testing ''' def __init__(self, on_eval:bool): super().__init__() self.on_eval = on_eval @abstractmethod def weight_func(self, x:Union[np.ndarray,Tensor], mx:Optional[Union[np.ndarray,Tensor]], y:Union[np.ndarray,Tensor], w:Union[np.ndarray,Tensor]) \ -> Union[np.ndarray,Tensor]: pass def on_fold_begin(self) -> None: r''' Weight all data in fold. ''' if self.model.fit_params.state != 'train' and not self.on_eval: return = self.weight_func(,,,
Read the Docs v: stable
On Read the Docs
Project Home

Free document hosting provided by Read the Docs.


Access comprehensive developer and user documentation for LUMIN

View Docs


Get tutorials for beginner and advanced researchers demonstrating many of the features of LUMIN

View Tutorials