Shortcuts

Source code for lumin.nn.metrics.reg_eval

import numpy as np
from typing import Optional, Callable
import pandas as pd
from statsmodels.stats.weightstats import DescrStatsW
from fastcore.all import store_attr

from ...utils.statistics import bootstrap_stats
from .eval_metric import EvalMetric

__all__ = ['RegPull', 'RegAsProxyPull']


[docs]class RegPull(EvalMetric): r''' Compute mean or standard deviation of delta or pull of some feature which is being directly regressed to. Optionally, use bootstrap resampling on validation data. Arguments: return_mean: whether to return the mean or the standard deviation use_bootstrap: whether to bootstrap resamples validation fold when computing statisitic use_pull: whether to return the pull (differences / targets) or delta (differences) name: optional name for metric, otherwise will be inferred from `use_pull` main_metric: whether this metic should be treated as the primary metric for SaveBest and EarlyStopping Will automatically set the first EvalMetric to be main if multiple primary metrics are submitted Examples:: >>> mean_pull = RegPull(return_mean=True, use_bootstrap=True, ... use_pull=True) >>> >>> std_delta = RegPull(return_mean=False, use_bootstrap=True, ... use_pull=False) >>> >>> mean_pull = RegPull(return_mean=True, use_bootstrap=False, ... use_pull=True, wgt_name='weights') ''' # TODO: Check how this handels multi-target regression, may need to adjust averaging axis & DescrStatsW may not handle multi-dimensional data well. def __init__(self, return_mean:bool, use_bootstrap:bool=False, use_pull:bool=True, name:Optional[str]=None, main_metric:bool=True): if name is None: name = 'pull' if use_pull else 'delta' super().__init__(name=name, lower_metric_better=True, main_metric=main_metric) store_attr(but=['name', 'main_metric']) def _compute(self, preds:np.ndarray, targets:np.ndarray, weights:Optional[np.ndarray]=None) -> float: delta = preds-targets if self.use_pull: delta /= targets if weights is not None: weights = weights.astype('float64') weights = weights/weights.sum() if self.use_bootstrap: bs = bootstrap_stats({'data':delta, 'mean':True, 'std':True, 'n':100, 'weights':weights}) return np.mean(bs['_mean']) if self.return_mean else np.mean(bs['_std']) else: if self.return_mean: return np.average(delta, weights=weights) else: return DescrStatsW(delta, ddof=1, weights=weights*len(weights) if weights is not None else None).std
[docs] def evaluate(self) -> float: r''' Compute mean or width of regression error. Returns: Mean or width of regression error ''' return self._compute(self.preds, self.targets, self.weights)
[docs]class RegAsProxyPull(RegPull): r''' Compute mean or standard deviation of delta or pull of some feature which is being indirectly regressed to via a proxy function. Optionally, use bootstrap resampling on validation data. Arguments: proxy_func: function which acts on regression predictions and adds pred and gen_target columns to the Pandas DataFrame it is passed which contains prediction columns pred_{i} return_mean: whether to return the mean or the standard deviation use_bootstrap: whether to bootstrap resamples validation fold when computing statisitic use_weights: whether to actually use weights if wgt_name is set use_pull: whether to return the pull (differences / targets) or delta (differences) targ_name: optional name of group in fold file containing regression targets name: optional name for metric, otherwise will be inferred from `use_pull` main_metric: whether this metic should be treated as the primary metric for SaveBest and EarlyStopping Will automatically set the first EvalMetric to be main if multiple primary metrics are submitted Examples:: >>> def reg_proxy_func(df): >>> df['pred'] = calc_pair_mass(df, (1.77682, 1.77682), ... {targ[targ.find('_t')+3:]: ... f'pred_{i}' for i, targ ... in enumerate(targ_feats)}) >>> df['gen_target'] = 125 >>> >>> std_delta = RegAsProxyPull(proxy_func=reg_proxy_func, ... return_mean=False, use_pull=False) ''' def __init__(self, proxy_func:Callable[[pd.DataFrame],None], return_mean:bool, targ_name:Optional[str]=None, use_bootstrap:bool=False, use_pull:bool=True, name:Optional[str]=None, main_metric:bool=True): if name is None: name = 'pull' if use_pull else 'delta' super().__init__(use_bootstrap=use_bootstrap, return_mean=use_bootstrap, use_pull=use_pull, main_metric=main_metric) store_attr(but=['use_bootstrap', 'use_bootstrap', 'use_pull', 'main_metric'])
[docs] def evaluate(self) -> float: r''' Compute statisitic on fold using provided predictions. Arguments: fy: :class:`~lumin.nn.data.fold_yielder.FoldYielder` interfacing to data idx: fold index corresponding to fold for which y_pred was computed y_pred: predictions for fold Returns: Statistic set in initialisation computed on the chsoen fold Examples:: >>> mean = mean_pull.evaluate(train_fy, val_id, val_preds) ''' df = self.get_df() self.proxy_func(df) return self._compute(df['pred'].values, df['gen_target'].values, df['gen_weight'].values if 'gen_weight' in df.columns else None)
Read the Docs v: stable
Versions
latest
stable
v0.8.0
v0.7.2
v0.7.1
v0.7.0
v0.6.0
v0.5.1
v0.5.0
v0.4.0.1
v0.3.1
Downloads
pdf
html
epub
On Read the Docs
Project Home
Builds

Free document hosting provided by Read the Docs.

Docs

Access comprehensive developer and user documentation for LUMIN

View Docs

Tutorials

Get tutorials for beginner and advanced researchers demonstrating many of the features of LUMIN

View Tutorials