Shortcuts

Source code for lumin.utils.statistics

import math
import multiprocessing as mp
from typing import Any, Dict, Optional, Tuple, Union

import numpy as np
from statsmodels.nonparametric.kde import KDEUnivariate

__all__ = ["bootstrap_stats", "get_moments", "uncert_round"]


[docs]def bootstrap_stats(args: Dict[str, Any], out_q: Optional[mp.Queue] = None) -> Union[None, Dict[str, Any]]: r""" Computes statistics and KDEs of data via sampling with replacement Arguments: args: dictionary of arguments. Possible keys are: data - data to resample name - name prepended to returned keys in result dict weights - array of weights matching length of data to use for weighted resampling n - number of times to resample data x - points at which to compute the kde values of resample data kde - whether to compute the kde values at x-points for resampled data mean - whether to compute the means of the resampled data std - whether to compute standard deviation of resampled data c68 - whether to compute the width of the absolute central 68.2 percentile of the resampled data out_q: if using multiporcessing can place result dictionary in provided queue Returns: Result dictionary if `out_q` is `None` else `None`. """ out_dict, mean, median, std, c68, boot = {}, [], [], [], [], [] name = "" if "name" not in args else args["name"] weights = None if "weights" not in args else args["weights"] if "n" not in args: args["n"] = 100 if "kde" not in args: args["kde"] = False if "mean" not in args: args["mean"] = False if "median" not in args: args["median"] = False if "std" not in args: args["std"] = False if "c68" not in args: args["c68"] = False if args["kde"] and args["data"].dtype != "float64": data = np.array(args["data"], dtype="float64") else: data = args["data"] len_d = len(data) np.random.seed() for i in range(args["n"]): points = np.random.choice(data, len_d, replace=True, p=weights) if args["kde"]: kde = KDEUnivariate(points) kde.fit() boot.append([kde.evaluate(x) for x in args["x"]]) if args["mean"]: mean.append(np.mean(points)) if args["median"]: median.append(np.median(points)) if args["std"]: std.append(np.std(points, ddof=1)) if args["c68"]: c68.append((np.percentile(points, 84.135) - np.percentile(points, 15.865)) / 2) if args["kde"]: out_dict[f"{name}_kde"] = boot if args["mean"]: out_dict[f"{name}_mean"] = mean if args["median"]: out_dict[f"{name}_median"] = median if args["std"]: out_dict[f"{name}_std"] = std if args["c68"]: out_dict[f"{name}_c68"] = c68 if out_q is not None: out_q.put(out_dict) else: return out_dict
[docs]def get_moments(arr: np.ndarray) -> Tuple[float, float, float, float]: r""" Computes mean and std of data, and their associated uncertainties Arguments: arr: univariate data Returns: - mean - statistical uncertainty of mean - standard deviation - statistical uncertainty of standard deviation """ n = len(arr) m = np.mean(arr) m_4 = np.mean((arr - m) ** 4) s = np.std(arr, ddof=1) s4 = s**4 se_s2 = ((m_4 - (s4 * (n - 3) / (n - 1))) / n) ** 0.25 se_s = se_s2 / (2 * s) return m, s / np.sqrt(n), s, se_s
[docs]def uncert_round(value: float, uncert: float) -> Tuple[float, float]: r""" Round value according to given uncertainty using one significant figure of the uncertainty Arguments: value: value to round uncert: uncertainty of value Returns: - rounded value - rounded uncertainty """ if uncert == math.inf: return value, uncert uncert = np.nan_to_num(uncert) if uncert == 0: return value, uncert factor = 1.0 while uncert / factor > 1: factor *= 10.0 value /= factor uncert /= factor i = 0 while uncert * (10**i) <= 1: i += 1 round_uncert = factor * round(uncert, i) round_value = factor * round(value, i) if int(round_uncert) == round_uncert: round_uncert = int(round_uncert) round_value = int(round_value) return round_value, round_uncert

Docs

Access comprehensive developer and user documentation for LUMIN

View Docs

Tutorials

Get tutorials for beginner and advanced researchers demonstrating many of the features of LUMIN

View Tutorials