Skip to content

QuestVar

questvar._api.QuestVar

Configurable QuEStVar analysis object.

Parameters:

Name Type Description Default
config TestConfig, dict, or None

Configuration object or dict. If None, uses defaults.

None
**kwargs Any

Override individual config fields (cv_thr, p_thr, etc.).

{}
Source code in src/questvar/_api.py
class QuestVar:
    """Configurable QuEStVar analysis object.

    Parameters
    ----------
    config : TestConfig, dict, or None
        Configuration object or dict. If None, uses defaults.
    **kwargs
        Override individual config fields (cv_thr, p_thr, etc.).
    """

    def __init__(self, config: TestConfig | dict[str, Any] | None = None, **kwargs: Any) -> None:
        if config is None:
            self.config = TestConfig(**kwargs)
        elif isinstance(config, dict):
            self.config = TestConfig.from_dict(config)
        else:
            self.config = config
        if kwargs:
            self.config = self.config.replace(**kwargs)

    @classmethod
    def from_yaml(cls, path: str) -> QuestVar:
        """Load config from a YAML file and return a QuestVar instance.

        Parameters
        ----------
        path : str
            Path to a YAML config file.

        Returns
        -------
        QuestVar
        """
        return cls(TestConfig.from_yaml(path))

    def test(
        self,
        data: pl.DataFrame | np.ndarray,
        cond_1: list[str] | list[int],
        cond_2: list[str] | list[int],
        **overrides: Any,
    ) -> TestResults:
        """Run a pairwise equivalence and difference test.

        Parameters
        ----------
        data : pl.DataFrame or np.ndarray
            Input data. Polars DataFrame with sample columns, or numpy array.
        cond_1 : list of str or list of int
            Column names (DataFrame) or indices (ndarray) for condition 1.
        cond_2 : list of str or list of int
            Column names (DataFrame) or indices (ndarray) for condition 2.
        **overrides
            Override any config field for this call only (cv_thr, p_thr, etc.).

        Returns
        -------
        TestResults

        Raises
        ------
        ValueError
            If cond_1 or cond_2 have fewer than 2 columns, share columns,
            reference missing columns, or if the data contains non-numeric
            columns. Also raised for paired analysis with unequal replicate
            counts or asymmetric missing-value patterns.
        TypeError
            If data is not a pl.DataFrame or np.ndarray.
        """
        config = replace(self.config, **overrides) if overrides else self.config

        s1_arr, s2_arr, feature_ids, c1, c2, meta = validate_and_extract(
            data,
            cond_1,
            cond_2,
            config.cv_thr,
            is_paired=config.is_paired,
        )

        s1_cv = cv_numpy(
            _to_raw_scale_for_cv(s1_arr, is_log2=config.is_log2),
            ignore_nan=config.allow_missing,
        )
        s2_cv = cv_numpy(
            _to_raw_scale_for_cv(s2_arr, is_log2=config.is_log2),
            ignore_nan=config.allow_missing,
        )
        s1_ps = make_selection_indicator(s1_cv, config.cv_thr)
        s2_ps = make_selection_indicator(s2_cv, config.cv_thr)
        keep = (s1_ps > 0) & (s2_ps > 0)

        status_all = np.full(s1_arr.shape[0], np.nan)
        info_df = pl.DataFrame(
            {
                "feature_id": pl.Series("feature_id", feature_ids),
                "s1_cv_status": s1_ps,
                "s2_cv_status": s2_ps,
                "status": status_all,
            }
        )

        if not keep.any():
            return TestResults(
                data=_empty_test_results_frame(),
                config=config,
                cond_1=c1,
                cond_2=c2,
                info=info_df,
            )

        s1_ready = s1_arr[keep]
        s2_ready = s2_arr[keep]

        if not config.is_log2:
            s1_ready = np.log2(np.maximum(s1_ready, 1e-300))
            s2_ready = np.log2(np.maximum(s2_ready, 1e-300))

        if config.is_paired:
            result_arr = run_paired(
                s1_ready,
                s2_ready,
                eq_thr=config.eq_thr,
                df_thr=config.df_thr,
                p_thr=config.p_thr,
                correction=config.correction,
            )
        else:
            result_arr = run_unpaired(
                s1_ready,
                s2_ready,
                eq_thr=config.eq_thr,
                df_thr=config.df_thr,
                p_thr=config.p_thr,
                correction=config.correction,
                equal_var=config.var_equal,
            )

        result_dict = {
            "feature_id": pl.Series("feature_id", feature_ids[keep]),
            "n1": result_arr[:, COL_N1],
            "n2": result_arr[:, COL_N2],
            "log2fc": result_arr[:, COL_LOG2FC],
            "average": result_arr[:, COL_AVERAGE],
            "df_p": result_arr[:, COL_DF_P],
            "df_adjp": result_arr[:, COL_DF_ADJP],
            "eq_p": result_arr[:, COL_EQ_P],
            "eq_adjp": result_arr[:, COL_EQ_ADJP],
            "comb_p": result_arr[:, COL_COMB_P],
            "comb_adjp": result_arr[:, COL_COMB_ADJP],
            "log10_pval": result_arr[:, COL_LOG10_P],
            "log10_adj_pval": result_arr[:, COL_LOG10_ADJP],
        }
        result_dict["status"] = pl.Series("status", result_arr[:, COL_STATUS].astype(np.int8))
        results_df = pl.DataFrame(result_dict)

        status_all[keep] = result_arr[:, COL_STATUS]

        return TestResults(
            data=results_df,
            config=config,
            cond_1=c1,
            cond_2=c2,
            info=info_df,
        )

    def compare_all_pairs(
        self,
        data: pl.DataFrame,
        condition_map: dict[str, list[str]],
        **overrides: Any,
    ) -> dict[tuple[str, str], TestResults]:
        """Run every pairwise combination from a condition map.

        Parameters
        ----------
        data : pl.DataFrame
            Input data with sample columns.
        condition_map : dict of str to list of str
            Map from condition name to list of column names.
        **overrides
            Override config fields for all comparisons.

        Returns
        -------
        dict of (str, str) to TestResults
            One TestResults per pair, keyed by (condition_1, condition_2).
        """
        from itertools import combinations

        return {
            (c1, c2): self.test(data, s1, s2, **overrides)
            for (c1, s1), (c2, s2) in combinations(condition_map.items(), 2)
        }

    def power_analysis(
        self,
        target_sei: float = 0.8,
        eq_boundaries: np.ndarray | None = None,
        n_reps_list: list[int] | None = None,
        cv_mean_list: list[float] | None = None,
        cv_thr_list: list[float] | None = None,
        n_prts_list: list[int] | None = None,
        random_seed: int | None = None,
        n_prts: int = 10000,
        n_iterations: int = 10,
        target_power: float = 0.8,
        p_thr: float = 0.05,
        df_thr: float = 1.0,
        cv_thr: float = 1.0,
        correction: str | None = "fdr",
        int_mu: float = 18.0,
        int_sd: float = 1.0,
        cv_k: float = 2.0,
        cv_theta: float = 0.5,
        n_jobs: int | None = None,
    ) -> PowerResults:
        """Run a power analysis sweep. Delegates to run_power_analysis().

        Parameters
        ----------
        target_sei : float
            Target Stable Equivalence Index. Default 0.8.
        eq_boundaries : ndarray, optional
            Equivalence boundaries to sweep.
        n_reps_list : list of int, optional
            Replicate counts to sweep.
        cv_mean_list : list of float, optional
            Mean CV values to sweep.
        cv_thr_list : list of float, optional
            CV thresholds to sweep.
        n_prts_list : list of int, optional
            Feature counts to sweep.
        random_seed : int, optional
            Base random seed for deterministic simulation.
        n_prts : int
            Features per Monte Carlo iteration. Default 10000.
        n_iterations : int
            Iterations per design point. Default 10.
        target_power : float
            Minimum power for design search. Default 0.8.
        p_thr : float
            Adjusted p-value threshold. Default 0.05.
        df_thr : float
            Difference boundary. Default 1.0.
        cv_thr : float
            CV threshold for feature selection. Default 1.0.
        correction : str or None
            Multiple testing correction method. Default "fdr".
        int_mu : float
            Mean log-intensity for simulator. Default 18.0.
        int_sd : float
            Log-intensity standard deviation. Default 1.0.
        cv_k : float
            Gamma shape for CV distribution. Default 2.0.
        cv_theta : float
            Gamma scale for CV distribution. Default 0.5.
        n_jobs : int, optional
            Parallel workers. Default uses half of CPU cores.

        Returns
        -------
        PowerResults
        """
        from questvar.power.run import run_power_analysis

        return run_power_analysis(
            target_sei=target_sei,
            eq_boundaries=eq_boundaries,
            n_reps_list=n_reps_list,
            cv_mean_list=cv_mean_list,
            cv_thr_list=cv_thr_list,
            n_prts_list=n_prts_list,
            random_seed=random_seed,
            n_prts=n_prts,
            n_iterations=n_iterations,
            target_power=target_power,
            p_thr=p_thr,
            df_thr=df_thr,
            cv_thr=cv_thr,
            correction=correction,
            int_mu=int_mu,
            int_sd=int_sd,
            cv_k=cv_k,
            cv_theta=cv_theta,
            n_jobs=n_jobs,
        )

Functions

from_yaml classmethod

from_yaml(path)

Load config from a YAML file and return a QuestVar instance.

Parameters:

Name Type Description Default
path str

Path to a YAML config file.

required

Returns:

Type Description
QuestVar
Source code in src/questvar/_api.py
@classmethod
def from_yaml(cls, path: str) -> QuestVar:
    """Load config from a YAML file and return a QuestVar instance.

    Parameters
    ----------
    path : str
        Path to a YAML config file.

    Returns
    -------
    QuestVar
    """
    return cls(TestConfig.from_yaml(path))

test

test(data, cond_1, cond_2, **overrides)

Run a pairwise equivalence and difference test.

Parameters:

Name Type Description Default
data DataFrame or ndarray

Input data. Polars DataFrame with sample columns, or numpy array.

required
cond_1 list of str or list of int

Column names (DataFrame) or indices (ndarray) for condition 1.

required
cond_2 list of str or list of int

Column names (DataFrame) or indices (ndarray) for condition 2.

required
**overrides Any

Override any config field for this call only (cv_thr, p_thr, etc.).

{}

Returns:

Type Description
TestResults

Raises:

Type Description
ValueError

If cond_1 or cond_2 have fewer than 2 columns, share columns, reference missing columns, or if the data contains non-numeric columns. Also raised for paired analysis with unequal replicate counts or asymmetric missing-value patterns.

TypeError

If data is not a pl.DataFrame or np.ndarray.

Source code in src/questvar/_api.py
def test(
    self,
    data: pl.DataFrame | np.ndarray,
    cond_1: list[str] | list[int],
    cond_2: list[str] | list[int],
    **overrides: Any,
) -> TestResults:
    """Run a pairwise equivalence and difference test.

    Parameters
    ----------
    data : pl.DataFrame or np.ndarray
        Input data. Polars DataFrame with sample columns, or numpy array.
    cond_1 : list of str or list of int
        Column names (DataFrame) or indices (ndarray) for condition 1.
    cond_2 : list of str or list of int
        Column names (DataFrame) or indices (ndarray) for condition 2.
    **overrides
        Override any config field for this call only (cv_thr, p_thr, etc.).

    Returns
    -------
    TestResults

    Raises
    ------
    ValueError
        If cond_1 or cond_2 have fewer than 2 columns, share columns,
        reference missing columns, or if the data contains non-numeric
        columns. Also raised for paired analysis with unequal replicate
        counts or asymmetric missing-value patterns.
    TypeError
        If data is not a pl.DataFrame or np.ndarray.
    """
    config = replace(self.config, **overrides) if overrides else self.config

    s1_arr, s2_arr, feature_ids, c1, c2, meta = validate_and_extract(
        data,
        cond_1,
        cond_2,
        config.cv_thr,
        is_paired=config.is_paired,
    )

    s1_cv = cv_numpy(
        _to_raw_scale_for_cv(s1_arr, is_log2=config.is_log2),
        ignore_nan=config.allow_missing,
    )
    s2_cv = cv_numpy(
        _to_raw_scale_for_cv(s2_arr, is_log2=config.is_log2),
        ignore_nan=config.allow_missing,
    )
    s1_ps = make_selection_indicator(s1_cv, config.cv_thr)
    s2_ps = make_selection_indicator(s2_cv, config.cv_thr)
    keep = (s1_ps > 0) & (s2_ps > 0)

    status_all = np.full(s1_arr.shape[0], np.nan)
    info_df = pl.DataFrame(
        {
            "feature_id": pl.Series("feature_id", feature_ids),
            "s1_cv_status": s1_ps,
            "s2_cv_status": s2_ps,
            "status": status_all,
        }
    )

    if not keep.any():
        return TestResults(
            data=_empty_test_results_frame(),
            config=config,
            cond_1=c1,
            cond_2=c2,
            info=info_df,
        )

    s1_ready = s1_arr[keep]
    s2_ready = s2_arr[keep]

    if not config.is_log2:
        s1_ready = np.log2(np.maximum(s1_ready, 1e-300))
        s2_ready = np.log2(np.maximum(s2_ready, 1e-300))

    if config.is_paired:
        result_arr = run_paired(
            s1_ready,
            s2_ready,
            eq_thr=config.eq_thr,
            df_thr=config.df_thr,
            p_thr=config.p_thr,
            correction=config.correction,
        )
    else:
        result_arr = run_unpaired(
            s1_ready,
            s2_ready,
            eq_thr=config.eq_thr,
            df_thr=config.df_thr,
            p_thr=config.p_thr,
            correction=config.correction,
            equal_var=config.var_equal,
        )

    result_dict = {
        "feature_id": pl.Series("feature_id", feature_ids[keep]),
        "n1": result_arr[:, COL_N1],
        "n2": result_arr[:, COL_N2],
        "log2fc": result_arr[:, COL_LOG2FC],
        "average": result_arr[:, COL_AVERAGE],
        "df_p": result_arr[:, COL_DF_P],
        "df_adjp": result_arr[:, COL_DF_ADJP],
        "eq_p": result_arr[:, COL_EQ_P],
        "eq_adjp": result_arr[:, COL_EQ_ADJP],
        "comb_p": result_arr[:, COL_COMB_P],
        "comb_adjp": result_arr[:, COL_COMB_ADJP],
        "log10_pval": result_arr[:, COL_LOG10_P],
        "log10_adj_pval": result_arr[:, COL_LOG10_ADJP],
    }
    result_dict["status"] = pl.Series("status", result_arr[:, COL_STATUS].astype(np.int8))
    results_df = pl.DataFrame(result_dict)

    status_all[keep] = result_arr[:, COL_STATUS]

    return TestResults(
        data=results_df,
        config=config,
        cond_1=c1,
        cond_2=c2,
        info=info_df,
    )

compare_all_pairs

compare_all_pairs(data, condition_map, **overrides)

Run every pairwise combination from a condition map.

Parameters:

Name Type Description Default
data DataFrame

Input data with sample columns.

required
condition_map dict of str to list of str

Map from condition name to list of column names.

required
**overrides Any

Override config fields for all comparisons.

{}

Returns:

Type Description
dict of (str, str) to TestResults

One TestResults per pair, keyed by (condition_1, condition_2).

Source code in src/questvar/_api.py
def compare_all_pairs(
    self,
    data: pl.DataFrame,
    condition_map: dict[str, list[str]],
    **overrides: Any,
) -> dict[tuple[str, str], TestResults]:
    """Run every pairwise combination from a condition map.

    Parameters
    ----------
    data : pl.DataFrame
        Input data with sample columns.
    condition_map : dict of str to list of str
        Map from condition name to list of column names.
    **overrides
        Override config fields for all comparisons.

    Returns
    -------
    dict of (str, str) to TestResults
        One TestResults per pair, keyed by (condition_1, condition_2).
    """
    from itertools import combinations

    return {
        (c1, c2): self.test(data, s1, s2, **overrides)
        for (c1, s1), (c2, s2) in combinations(condition_map.items(), 2)
    }

power_analysis

power_analysis(
    target_sei=0.8,
    eq_boundaries=None,
    n_reps_list=None,
    cv_mean_list=None,
    cv_thr_list=None,
    n_prts_list=None,
    random_seed=None,
    n_prts=10000,
    n_iterations=10,
    target_power=0.8,
    p_thr=0.05,
    df_thr=1.0,
    cv_thr=1.0,
    correction="fdr",
    int_mu=18.0,
    int_sd=1.0,
    cv_k=2.0,
    cv_theta=0.5,
    n_jobs=None,
)

Run a power analysis sweep. Delegates to run_power_analysis().

Parameters:

Name Type Description Default
target_sei float

Target Stable Equivalence Index. Default 0.8.

0.8
eq_boundaries ndarray

Equivalence boundaries to sweep.

None
n_reps_list list of int

Replicate counts to sweep.

None
cv_mean_list list of float

Mean CV values to sweep.

None
cv_thr_list list of float

CV thresholds to sweep.

None
n_prts_list list of int

Feature counts to sweep.

None
random_seed int

Base random seed for deterministic simulation.

None
n_prts int

Features per Monte Carlo iteration. Default 10000.

10000
n_iterations int

Iterations per design point. Default 10.

10
target_power float

Minimum power for design search. Default 0.8.

0.8
p_thr float

Adjusted p-value threshold. Default 0.05.

0.05
df_thr float

Difference boundary. Default 1.0.

1.0
cv_thr float

CV threshold for feature selection. Default 1.0.

1.0
correction str or None

Multiple testing correction method. Default "fdr".

'fdr'
int_mu float

Mean log-intensity for simulator. Default 18.0.

18.0
int_sd float

Log-intensity standard deviation. Default 1.0.

1.0
cv_k float

Gamma shape for CV distribution. Default 2.0.

2.0
cv_theta float

Gamma scale for CV distribution. Default 0.5.

0.5
n_jobs int

Parallel workers. Default uses half of CPU cores.

None

Returns:

Type Description
PowerResults
Source code in src/questvar/_api.py
def power_analysis(
    self,
    target_sei: float = 0.8,
    eq_boundaries: np.ndarray | None = None,
    n_reps_list: list[int] | None = None,
    cv_mean_list: list[float] | None = None,
    cv_thr_list: list[float] | None = None,
    n_prts_list: list[int] | None = None,
    random_seed: int | None = None,
    n_prts: int = 10000,
    n_iterations: int = 10,
    target_power: float = 0.8,
    p_thr: float = 0.05,
    df_thr: float = 1.0,
    cv_thr: float = 1.0,
    correction: str | None = "fdr",
    int_mu: float = 18.0,
    int_sd: float = 1.0,
    cv_k: float = 2.0,
    cv_theta: float = 0.5,
    n_jobs: int | None = None,
) -> PowerResults:
    """Run a power analysis sweep. Delegates to run_power_analysis().

    Parameters
    ----------
    target_sei : float
        Target Stable Equivalence Index. Default 0.8.
    eq_boundaries : ndarray, optional
        Equivalence boundaries to sweep.
    n_reps_list : list of int, optional
        Replicate counts to sweep.
    cv_mean_list : list of float, optional
        Mean CV values to sweep.
    cv_thr_list : list of float, optional
        CV thresholds to sweep.
    n_prts_list : list of int, optional
        Feature counts to sweep.
    random_seed : int, optional
        Base random seed for deterministic simulation.
    n_prts : int
        Features per Monte Carlo iteration. Default 10000.
    n_iterations : int
        Iterations per design point. Default 10.
    target_power : float
        Minimum power for design search. Default 0.8.
    p_thr : float
        Adjusted p-value threshold. Default 0.05.
    df_thr : float
        Difference boundary. Default 1.0.
    cv_thr : float
        CV threshold for feature selection. Default 1.0.
    correction : str or None
        Multiple testing correction method. Default "fdr".
    int_mu : float
        Mean log-intensity for simulator. Default 18.0.
    int_sd : float
        Log-intensity standard deviation. Default 1.0.
    cv_k : float
        Gamma shape for CV distribution. Default 2.0.
    cv_theta : float
        Gamma scale for CV distribution. Default 0.5.
    n_jobs : int, optional
        Parallel workers. Default uses half of CPU cores.

    Returns
    -------
    PowerResults
    """
    from questvar.power.run import run_power_analysis

    return run_power_analysis(
        target_sei=target_sei,
        eq_boundaries=eq_boundaries,
        n_reps_list=n_reps_list,
        cv_mean_list=cv_mean_list,
        cv_thr_list=cv_thr_list,
        n_prts_list=n_prts_list,
        random_seed=random_seed,
        n_prts=n_prts,
        n_iterations=n_iterations,
        target_power=target_power,
        p_thr=p_thr,
        df_thr=df_thr,
        cv_thr=cv_thr,
        correction=correction,
        int_mu=int_mu,
        int_sd=int_sd,
        cv_k=cv_k,
        cv_theta=cv_theta,
        n_jobs=n_jobs,
    )