Source code for experiment_design.variable.variable

from dataclasses import dataclass
from typing import Any, Callable, Protocol

import numpy as np
from scipy.stats import rv_continuous, rv_discrete, uniform

# Following is ugly, but it is scipy's fault for not exposing rv_frozen
# noinspection PyProtectedMember
from scipy.stats._distn_infrastructure import rv_frozen


def _is_frozen_discrete(dist: Any) -> bool:
    """Check if dist is a rv_frozen_discrete instance"""
    return isinstance(dist, rv_frozen) and isinstance(dist.dist, rv_discrete)


def _is_frozen_continuous(dist: Any) -> bool:
    """Check if dist is a rv_frozen_continuous instance"""
    return isinstance(dist, rv_frozen) and isinstance(dist.dist, rv_continuous)


def _change_field_representation(
    dataclass_instance: dataclass, representations_to_change: dict[str, Any]
) -> str:
    """Just like the default __repr__ but supports reformatting and replacing some values."""
    final = []
    for current_field in dataclass_instance.__dataclass_fields__.values():
        if not current_field.repr:
            continue
        name = current_field.name
        value = representations_to_change.get(
            name, dataclass_instance.__getattribute__(name)
        )
        final.append(f"{name}={value}")
    return f"{dataclass_instance.__class__.__name__}({', '.join(final)})"


def _create_distribution_representation(distribution: rv_frozen) -> str:
    """Create a readable representation of rv_frozen instances"""
    args = ", ".join([str(a) for a in distribution.args])
    kwargs = ", ".join([f"{k}={v}" for k, v in distribution.kwds.items()])
    params = [a for a in [args, kwargs] if a]
    return f"{distribution.dist.name}({', '.join(params)})"


[docs] @dataclass class ContinuousVariable: """ A variable with continuous distribution :param distribution: rv_frozen instance representing the distribution. If None (default), it will be set to uniform between the passed lower_bound and upper_bound :param lower_bound: Lower bound for the variable. If None (default), left support boundary of the distribution will be used in case the distribution is bounded. Otherwise, distribution.ppf(infinite_bound_probability_tolerance) will be used. :param upper_bound: Upper bound for the variable. If None (default), right support boundary of the distribution will be used in case the distribution is bounded. Otherwise, distribution.ppf(1 - infinite_bound_probability_tolerance) will be used. """ distribution: rv_frozen | None = None lower_bound: float | None = None upper_bound: float | None = None def __post_init__(self) -> None: if self.distribution is None and None in [self.lower_bound, self.upper_bound]: raise ValueError( "Either the distribution or both " "lower_bound and upper_bound have to be set." ) if self.distribution is None: self.distribution = uniform( self.lower_bound, self.upper_bound - self.lower_bound ) if ( None not in [self.lower_bound, self.upper_bound] and self.lower_bound >= self.upper_bound ): raise ValueError("lower_bound has to be smaller than upper_bound") if not _is_frozen_continuous(self.distribution): raise ValueError("Only frozen continuous distributions are supported.")
[docs] def value_of(self, probability: float | np.ndarray) -> float | np.ndarray: """Given a probability or an array of probabilities return the corresponding value(s) using the inverse |CDF|.""" values = self.distribution.ppf(probability) if self.upper_bound is not None or self.lower_bound is not None: return np.clip(values, self.lower_bound, self.upper_bound) return values
[docs] def cdf_of(self, value: float | np.ndarray) -> float | np.ndarray: """Given a value or an array of values return the probability using the |CDF|.""" return self.distribution.cdf(value)
[docs] def finite_lower_bound( self, infinite_bound_probability_tolerance: float = 1e-6 ) -> float: """ Provide a finite lower bound of the variable even if it was not provided by the user. :param infinite_bound_probability_tolerance: If the variable is unbounded and no explicit lower_bound was passed, this will be used to extract finite bounds as described in lower_bound and upper_bound descriptions. (Default: 1e-6) """ if self.lower_bound is not None: return self.lower_bound value = self.value_of(0.0) if np.isfinite(value): return value return self.value_of(infinite_bound_probability_tolerance)
[docs] def finite_upper_bound( self, infinite_bound_probability_tolerance: float = 1e-6 ) -> float: """Provide a finite upper bound of the variable even if it was not provided by the user. :param infinite_bound_probability_tolerance: If the variable is unbounded and no explicit lower_bound was passed, this will be used to extract finite bounds as described in lower_bound and upper_bound descriptions. (Default: 1e-6) """ if self.upper_bound is not None: return self.upper_bound value = self.value_of(1.0) if np.isfinite(value): return value return self.value_of(1 - infinite_bound_probability_tolerance)
def __repr__(self) -> str: distribution_representation = _create_distribution_representation( self.distribution ) return _change_field_representation( self, {"distribution": distribution_representation} )
[docs] @dataclass class DiscreteVariable: """ A variable with discrete distribution :param distribution: rv_frozen instance representing the distribution. If None (default), it will be set to uniform between the passed lower_bound and upper_bound :param value_mapper: Given an integer, i.e. an ordinal encoding, this is expected to return the corresponding discrete value of the underlying set of possible values. (Default: lambda x: x) :param inverse_value_mapper: Given a discrete value, this is expected to return the corresponding integer value, i.e. ordinal encoding. (Default: lambda x: x) """ distribution: rv_frozen value_mapper: Callable[[float], float | int] = lambda x: x inverse_value_mapper: Callable[[float, int], float] = lambda x: x def __post_init__(self) -> None: if not _is_frozen_discrete(self.distribution): raise ValueError("Only frozen discrete distributions are supported.") self.value_mapper = np.vectorize(self.value_mapper) self.inverse_value_mapper = np.vectorize(self.inverse_value_mapper)
[docs] def value_of(self, probability: float | np.ndarray) -> float | np.ndarray: """Given a probability or an array of probabilities return the corresponding value(s) using the inverse cdf.""" values = self.distribution.ppf(probability) return self.value_mapper(values)
[docs] def cdf_of(self, values: float | np.ndarray) -> float | np.ndarray: """Given a value or an array of values return the probability using the cdf.""" return self.distribution.cdf(self.inverse_value_mapper(values))
[docs] def finite_lower_bound( self, infinite_bound_probability_tolerance: float = 1e-6 ) -> float: """ Provide a finite lower bound of the variable even if it was not provided by the user. :param infinite_bound_probability_tolerance: If the variable is unbounded and no explicit lower_bound was passed, this will be used to extract finite bounds as described in lower_bound and upper_bound descriptions. (Default: 1e-6) """ support = self.distribution.support() if np.isfinite(support[0]): return self.value_mapper(support[0]) return self.value_of(infinite_bound_probability_tolerance)
[docs] def finite_upper_bound( self, infinite_bound_probability_tolerance: float = 1e-6 ) -> float: """ Provide a finite upper bound of the variable even if it was not provided by the user. :param infinite_bound_probability_tolerance: If the variable is unbounded and no explicit lower_bound was passed, this will be used to extract finite bounds as described in lower_bound and upper_bound descriptions. (Default: 1e-6) """ support = self.distribution.support() if np.isfinite(support[-1]): return self.value_mapper(support[1]) return self.value_of(1 - infinite_bound_probability_tolerance)
def __repr__(self) -> str: distribution_representation = _create_distribution_representation( self.distribution ) return _change_field_representation( self, {"distribution": distribution_representation} )
[docs] class Variable(Protocol): """A protocol to represent the expected methods of valid Variable objects""" @property def distribution(self) -> rv_frozen: """Distribution of the variable"""
[docs] def value_of(self, probability: float | np.ndarray) -> float | np.ndarray: """Given a probability or an array of probabilities return the corresponding value(s) using the inverse cdf."""
[docs] def cdf_of(self, value: float | np.ndarray) -> float | np.ndarray: """Given a value or an array of values return the probability using the cdf."""
[docs] def finite_lower_bound( self, infinite_bound_probability_tolerance: float = 1e-6 ) -> float: """ Provide a finite upper bound of the variable even if it was not provided by the user. :param infinite_bound_probability_tolerance: If the variable is unbounded and no explicit lower_bound was passed, this will be used to extract finite bounds as described in lower_bound and upper_bound descriptions. (Default: 1e-6) """
[docs] def finite_upper_bound( self, infinite_bound_probability_tolerance: float = 1e-6 ) -> float: """ Provide a finite upper bound of the variable even if it was not provided by the user. :param infinite_bound_probability_tolerance: If the variable is unbounded and no explicit lower_bound was passed, this will be used to extract finite bounds as described in lower_bound and upper_bound descriptions. (Default: 1e-6) """
[docs] def create_variables_from_distributions( distributions: list[rv_frozen], ) -> list[ContinuousVariable | DiscreteVariable]: """ Given a list of distributions, create the corresponding continuous or discrete variables. :param distributions: Frozen scipy distributions each representing a marginal variable :return: List of variables according to the passed distributions """ variables = [] for dist in distributions: if _is_frozen_discrete(dist): variables.append(DiscreteVariable(distribution=dist)) elif _is_frozen_continuous(dist): variables.append(ContinuousVariable(distribution=dist)) else: raise ValueError( f"Each distribution must be a frozen discrete or continuous type, got {type(dist)}" ) return variables