Source code for lumin.optimisation.hyper_param

from typing import Tuple, Dict, List, Optional, Union
from fastprogress import master_bar, progress_bar
import numpy as np
from collections import OrderedDict
import timeit
from functools import partial
from fastcore.all import is_listy

from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier

from import FoldYielder
from ..nn.models.model_builder import ModelBuilder
from ..nn.models.model import Model
from ..nn.callbacks.opt_callbacks import LRFinder
from import plot_lr_finders
from ..plotting.plot_settings import PlotSettings

import matplotlib.pyplot as plt

__all__ = ['get_opt_rf_params', 'lr_find']

[docs]def get_opt_rf_params(x_trn:np.ndarray, y_trn:np.ndarray, x_val:np.ndarray, y_val:np.ndarray, objective:str, w_trn:Optional[np.ndarray]=None, w_val:Optional[np.ndarray]=None, params:Optional[OrderedDict]=None, n_estimators:int=40, verbose=True) \ -> Tuple[Dict[str,float],Union[RandomForestRegressor,RandomForestClassifier]]: r''' Use an ordered parameter-scan to roughly optimise Random Forest hyper-parameters. Arguments: x_trn: training input data y_trn: training target data x_val: validation input data y_val: validation target data objective: string representation of objective: either 'classification' or 'regression' w_trn: training weights w_val: validation weights params: ordered dictionary mapping parameters to optimise to list of values to cosnider n_estimators: number of trees to use in each forest verbose: Print extra information and show a live plot of model performance Returns: params: dictionary mapping parameters to their optimised values rf: best performing Random Forest ''' if params is None: params = OrderedDict({'min_samples_leaf': [1,3,5,10,25,50,100], 'max_features': [0.3,0.5,0.7,0.9]}) rf = RandomForestClassifier if 'class' in objective.lower() else RandomForestRegressor best_params = {'n_estimators': n_estimators, 'n_jobs': -1, 'max_features':'sqrt'} best_scores = [] scores = [] mb = master_bar(params) mb.names = ['Best', 'Scores'] if verbose: mb.update_graph([[[],[]], [[], []]]) for param in mb: pb = progress_bar(params[param], parent=mb) pb.comment = f'{param} = {params[param][0]}' for i, value in enumerate(pb): pb.comment = f'{param} = {params[param][min(i+1, len(params[param])-1)]}' m = rf(**{**best_params, param: value}), y=y_trn, sample_weight=w_trn) scores.append(m.score(X=x_val, y=y_val, sample_weight=w_val)) if len(best_scores) == 0 or scores[-1] > best_scores[-1]: best_scores.append(scores[-1]) best_params[param] = value if verbose: print(f'Better score schieved: {param} @ {value} = {best_scores[-1]:.4f}') best_m = m else: best_scores.append(best_scores[-1]) if verbose: mb.update_graph([[range(len(best_scores)), best_scores], [range(len(scores)), scores]]) if verbose: delattr(mb, 'fig') if verbose: plt.clf() return best_params, best_m
[docs]def lr_find(fy:FoldYielder, model_builder:ModelBuilder, bs:int, n_epochs:int=1, train_on_weights:bool=True, n_repeats:int=-1, lr_bounds:Tuple[float,float]=[1e-5, 10], cb_partials:Optional[List[partial]]=None, plot_settings:PlotSettings=PlotSettings(), bulk_move:bool=True, plot_savename:Optional[str]=None) -> List[LRFinder]: r''' Wrapper function for training using :class:`~lumin.nn.callbacks.opt_callbacks.LRFinder` which runs a Smith LR range test ( using folds in :class:``. Trains models for a set number of repeats, interpolating LR between set bounds. This repeats for each fold in :class:``, and loss evolution is averaged. Arguments: fy: :class:`` providing training data model_builder: :class:`~lumin.nn.models.model_builder.ModelBuilder` providing networks and optimisers bs: batch size n_epochs: number of epochs to train per fold train_on_weights: If weights are present, whether to use them for training shuffle_fold: whether to shuffle data in folds n_folds: if >= 1, will only train n_folds number of models, otherwise will train one model per fold lr_bounds: starting and ending LR values cb_partials: optional list of functools.partial, each of which will a instantiate :class:`~lumin.nn.callbacks.callback.Callback` when called plot_settings: :class:`~lumin.plotting.plot_settings.PlotSettings` class to control figure appearance savename: Optional name of file to which to save the plot Returns: List of :class:`~lumin.nn.callbacks.opt_callbacks.LRFinder` which were used for each model trained ''' if cb_partials is None: cb_partials = [] if not is_listy(cb_partials): cb_partials = [cb_partials] nb = n_epochs*(fy.n_folds-1)*fy.get_data_count(0)//bs lr_finders = [] tmr = timeit.default_timer() mb = master_bar(range(fy.n_folds) if n_repeats < 1 else range(min(n_repeats, fy.n_folds))) for idx in mb: model = Model(model_builder) cbs = [] for c in cb_partials: cbs.append(c()) lrf = LRFinder(lr_bounds=lr_bounds, nb=nb) trn_idxs = list(range(fy.n_folds)) trn_idxs.remove(idx), fy=fy, bs=bs, bulk_move=bulk_move, train_on_weights=train_on_weights, trn_idxs=trn_idxs, cbs=cbs+[lrf], model_bar=mb) lr_finders.append(lrf) del model print("LR finder took {:.3f}s ".format(timeit.default_timer()-tmr)) plot_lr_finders(lr_finders, loss_range='auto', settings=plot_settings, log_y='auto' if 'regress' in model_builder.objective.lower() else False, savename=plot_savename) return lr_finders
Read the Docs v: latest
On Read the Docs
Project Home

Free document hosting provided by Read the Docs.


Access comprehensive developer and user documentation for LUMIN

View Docs


Get tutorials for beginner and advanced researchers demonstrating many of the features of LUMIN

View Tutorials