Source code for experiment_design.covariance_modification

import numpy as np
from scipy.linalg import solve_triangular



[docs]
def iman_connover_transformation(
    doe: np.ndarray,
    target_correlation: np.ndarray,
    means: np.ndarray | None = None,
    standard_deviations: np.ndarray | None = None,
) -> np.ndarray:
    """
    Rearrange the values of doe to reduce correlation error while adhering to any marginal constraints of the values
    such as an |LHS|

    :param doe: Array with shape (n_sample, n_dim) representing the initial |DoE| with arbitrary
        correlation.
    :param target_correlation: Symmetric positive definite correlation matrix with shape (n_dim, n_dim) representing
        the desired correlation between variables
    :param means: Array with shape (n_dim,) representing the means of the marginal distributions. If None, it will be
        inferred from doe
    :param standard_deviations: Array with shape (n_dim,) representing the standard deviations of the marginal
        distributions. If None, it will be inferred from doe
    :return: New |DoE| with the same shape and values as doe but smaller correlation error wrt. target_correlation

    References
    ----------
    R.L. Iman and W.J. Conover (1982). “`A distribution-free approach to inducing rank correlation among input variables
    <https://www.researchgate.net/publication/243048186_A_Distribution-Free_Approach_to_Inducing_Rank_Correlation_Among_Input_Variates>`_”


    C. Bogoclu (2022). "`Local Latin Hypercube Refinement for Uncertainty Quantification and Optimization
    <https://hss-opus.ub.ruhr-uni-bochum.de/opus4/frontdoor/deliver/index/docId/9143/file/diss.pdf>`_" Chapter 4.3.2

    Examples
    --------
    >>> from experiment_design.covariance_modification import iman_connover_transformation
    >>> import numpy as np
    >>> from scipy import stats
    >>> np.random.seed(1337)
    >>> samples = stats.randint(0, 100).rvs((30, 2))
    >>> correlation_error = np.max(np.abs(np.corrcoef(samples, rowvar=False) - np.eye(2)))
    >>> new_samples = iman_connover_transformation(samples, np.eye(2))
    >>> np.max(np.abs(np.corrcoef(new_samples, rowvar=False) - np.eye(2))) < correlation_error
    True
    >>> sorted(samples[:, 0]) == sorted(new_samples[:, 0])
    True
    >>> sorted(samples[:, 1]) == sorted(new_samples[:, 1])
    True
    """
    transformed = second_moment_transformation(
        doe, target_correlation, means, standard_deviations
    )
    order = np.argsort(np.argsort(transformed, axis=0), axis=0)
    return np.take_along_axis(np.sort(doe, axis=0), order, axis=0)




[docs]
def second_moment_transformation(
    doe: np.ndarray,
    target_correlation: np.ndarray,
    means: np.ndarray | None = None,
    standard_deviations: np.ndarray | None = None,
    jitter: float = 1e-6,
) -> np.ndarray:
    """
    Second-moment transformation for achieving the target covariance

    :param doe: Array with shape (n_sample, n_dim) representing the initial design of experiment with arbitrary
        correlation.
    :param target_correlation: Symmetric positive definite correlation matrix with shape (n_dim, n_dim) representing
        the desired correlation between variables
    :param means: Array with shape (n_dim,) representing the means of the marginal distributions. If None, it will be
        inferred from doe
    :param standard_deviations: Array with shape (n_dim,) representing the standard deviations of the marginal
        distributions. If None, it will be inferred from doe
    :param jitter: A small positive constant that will be added to the diagonal of the covariance matrix in case
        it is positive semi-definite to enable Cholesky decomposition.
    :return: New |DoE| with the same shape but different values as doe, that matches the target_correlation exactly.


    Examples
    --------
    >>> from experiment_design.covariance_modification import iman_connover_transformation
    >>> import numpy as np
    >>> from scipy import stats
    >>> np.random.seed(1337)
    >>> samples = stats.norm.rvs(size=(50, 2))
    >>> new_samples = iman_connover_transformation(samples, np.eye(2))
    >>> correlation_error = np.max(np.abs(np.corrcoef(new_samples, rowvar=False) - np.eye(2)))
    >>> bool(np.isclose(correlation_error, 0, atol=1e-6))
    True

    """
    if means is None:
        means = np.mean(doe, axis=0)
    if standard_deviations is None:
        standard_deviations = np.std(doe, axis=0, keepdims=True)
        standard_deviations = standard_deviations.reshape((1, -1))
    target_covariance = (
        standard_deviations.T.dot(standard_deviations) * target_correlation
    )  # convert to covariance before Cholesky
    try:
        target_cov_upper = np.linalg.cholesky(target_covariance).T
    except np.linalg.LinAlgError:
        target_cov_upper = np.linalg.cholesky(
            target_covariance + np.eye(target_covariance.shape[0]) * jitter
        ).T
    cur_cov = np.cov(doe, rowvar=False)
    try:
        cur_cov_upper = np.linalg.cholesky(cur_cov).T
    except np.linalg.LinAlgError:
        cur_cov_upper = np.linalg.cholesky(
            cur_cov + np.eye(cur_cov.shape[0]) * jitter
        ).T

    inv_cov_upper = solve_triangular(cur_cov_upper, target_cov_upper)
    return (doe - means).dot(inv_cov_upper) + means