Source code for tolerance

"""
Functional Tolerance Bounds using SRSF

moduleauthor:: J. Derek Tucker <jdtuck@sandia.gov>

"""

import numpy as np
import fdasrsf as fs
import fdasrsf.utility_functions as uf
import fdasrsf.fPCA as fpca
from scipy.stats import chi2
from numpy.linalg import eig
from fdasrsf.boxplots import ampbox, phbox



[docs]
def bootTB(f, time, a=0.05, p=0.99, B=500, no=5, parallel=True):
    """
    This function computes tolerance bounds for functional data containing
    phase and amplitude variation using bootstrap sampling

    :param f: numpy ndarray of shape (M,N) of N functions with M samples
    :param time: vector of size M describing the sample points
    :param a: confidence level of tolerance bound (default = 0.05)
    :param p: coverage level of tolerance bound (default = 0.99)
    :param B: number of bootstrap samples (default = 500)
    :param no: number of principal components (default = 5)
    :param parallel: enable parallel processing (default = T)
    :type f: np.ndarray
    :type time: np.ndarray

    :rtype: tuple of boxplot objects
    :return amp: amplitude tolerance bounds
    :rtype out_med: ampbox object
    :return ph: phase tolerance bounds
    :rtype out_med: phbox object
    :return out_med: alignment results
    :rtype out_med: fdawarp object

    """
    eps = np.finfo(np.double).eps
    (M, N) = f.shape

    # Align Data
    out_med = fs.fdawarp(f, time)
    out_med.srsf_align(method="median", parallel=parallel)

    # Calculate CI
    # a% tolerance bound with p%
    fn = out_med.fn
    qn = out_med.qn
    gam = out_med.gam
    q0 = out_med.q0
    print("Bootstrap Sampling")
    bootlwr_amp = np.zeros((M, B))
    bootupr_amp = np.zeros((M, B))
    bootlwr_ph = np.zeros((M, B))
    bootupr_ph = np.zeros((M, B))
    for k in range(B):
        out_med.joint_gauss_model(n=100, no=no)
        obja = ampbox(out_med)
        obja.construct_boxplot(1 - p, 0.3)
        objp = phbox(out_med)
        objp.construct_boxplot(1 - p, 0.3)
        bootlwr_amp[:, k] = obja.Q1a
        bootupr_amp[:, k] = obja.Q3a
        bootlwr_ph[:, k] = objp.Q1a
        bootupr_ph[:, k] = objp.Q3a

    # tolerance bounds
    boot_amp = np.hstack((bootlwr_amp, bootupr_amp))
    f, g, g2 = uf.gradient_spline(time, boot_amp, False)
    boot_amp_q = g / np.sqrt(abs(g) + eps)
    boot_ph = np.hstack((bootlwr_ph, bootupr_ph))
    boot_out = out_med
    boot_out.fn = boot_amp
    boot_out.qn = boot_amp_q
    boot_out.gam = boot_ph
    boot_out.rsamps = False
    amp = ampbox(boot_out)
    amp.construct_boxplot(a, 0.3)
    ph = phbox(boot_out)
    ph.construct_boxplot(a, 0.3)

    return amp, ph, out_med




[docs]
def pcaTB(f, time, a=0.5, p=0.99, no=5, parallel=True):
    """
    This function computes tolerance bounds for functional data containing
    phase and amplitude variation using fPCA

    :param f: numpy ndarray of shape (M,N) of N functions with M samples
    :param time: vector of size M describing the sample points
    :param a: confidence level of tolerance bound (default = 0.05)
    :param p: coverage level of tolerance bound (default = 0.99)
    :param no: number of principal components (default = 5)
    :param parallel: enable parallel processing (default = T)
    :type f: np.ndarray
    :type time: np.ndarray

    :rtype: tuple of boxplot objects
    :return warp: alignment data from time_warping
    :return pca: functional pca from jointFPCA
    :return tol: tolerance factor

    """

    # Align Data
    out_warp = fs.fdawarp(f, time)
    out_warp.srsf_align(method="median", parallel=parallel)

    # Calculate pca
    out_pca = fpca.fdajpca(out_warp)
    out_pca.calc_fpca(no)

    # Calculate TB
    tol = mvtol_region(out_pca.coef, a, p, 100000)

    return out_warp, out_pca, tol




[docs]
def mvtol_region(x, alpha, P, B):
    """
    Computes tolerance factor for multivariate normal

    Krishnamoorthy, K. and Mondal, S. (2006), Improved Tolerance Factors for Multivariate Normal
    Distributions, Communications in Statistics - Simulation and Computation, 35, 461–478.

    :param x: (M,N) matrix defining N variables of M samples
    :param alpha: confidence level
    :param P: coverage level
    :param B: number of bootstrap samples

    :rtype: double
    :return tol: tolerance factor

    """
    n, p = x.shape

    q_squared = chi2.rvs(1, size=(p, B)) / n
    L = np.zeros((p, B))
    for k in range(B):
        L[:, k] = eig(rwishart(n - 1, p))[0]

    c1 = (1 + q_squared) / L
    c1 = c1.sum()
    c2 = (1 + 2 * q_squared) / (L**2)
    c2 = c2.sum()
    c3 = (1 + 3 * q_squared) / (L**3)
    c3 = c3.sum()
    a = (c2**3) / (c3**2)
    T = (n - 1) * (np.sqrt(c2 / a) * (chi2.ppf(P, a) - a) + c1)
    tol = np.quantile(T, 1 - alpha)

    return tol




[docs]
def rwishart(df, p):
    """
    Computes a random wishart matrix

    :param df: degree of freedom
    :param p: number of dimensions

    :rtype: double
    :return R: matrix

    """
    R = np.zeros((p, p))
    R = R.flatten()
    R[:: p + 1] = np.sqrt(chi2.rvs(np.arange(df, df - p, -1), size=p))
    if p > 1:
        pseq = np.arange(0, p)
        tmp = [np.arange(0, x + 1) for x in np.arange(0, p - 1)]
        R[np.repeat(p * pseq, pseq) + np.concatenate(tmp).ravel()] = np.random.randn(
            int(p * (p - 1) / 2)
        )

    R = R.reshape((p, p))
    R = np.matmul(R.T, R)

    return R