Source code for causalpy.experiments.sc_results

#   Copyright 2025 - 2026 The PyMC Labs Developers
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
"""
Result classes for Synthetic Control experiment design methods.

Contains ``DressRehearsalResult``, ``PowerCurveResult``, and
``DonorPoolQualityResult`` — returned by ``SyntheticControl.validate_design()``,
``.power_analysis()``, and ``.donor_pool_quality()`` respectively.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from typing import TYPE_CHECKING

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import xarray as xr

if TYPE_CHECKING:
    from causalpy.checks.base import CheckResult



[docs]
@dataclass
class DressRehearsalResult:
    """Result of a dress-rehearsal design validation.

    Produced by :meth:`SyntheticControl.validate_design`.

    Attributes
    ----------
    injected_effect : float
        The effect that was injected into the pseudo-post window.
    effect_type : str
        Whether the injected effect was ``"relative"`` or ``"absolute"``.
    recovered_effect_mean : float
        Posterior mean of the cumulative impact in the pseudo-post window.
    recovered_effect_hdi : tuple[float, float]
        94% HDI of the cumulative impact posterior.
    hdi_covers_truth : bool
        Whether the HDI interval contains the injected truth.
    posterior_samples : xr.DataArray
        Raw posterior draws of cumulative impact.
    injected_truth : float
        The actual numeric value injected (after conversion from relative
        to absolute if applicable).
    """

    injected_effect: float
    effect_type: str
    recovered_effect_mean: float
    recovered_effect_hdi: tuple[float, float]
    hdi_covers_truth: bool
    posterior_samples: xr.DataArray
    injected_truth: float


[docs]
    def plot(self) -> tuple[plt.Figure, plt.Axes]:
        """Plot injected vs recovered effect with HDI band."""
        fig, ax = plt.subplots(figsize=(7, 4))

        samples = self.posterior_samples.values.flatten()
        ax.hist(
            samples,
            bins=40,
            density=True,
            alpha=0.6,
            color="C0",
            label="Posterior of cumulative impact",
        )
        ax.axvline(
            self.injected_truth,
            color="C3",
            lw=2,
            ls="--",
            label=f"Injected truth = {self.injected_truth:.3f}",
        )
        ax.axvline(
            self.recovered_effect_mean,
            color="C0",
            lw=2,
            label=f"Posterior mean = {self.recovered_effect_mean:.3f}",
        )
        ax.axvspan(
            self.recovered_effect_hdi[0],
            self.recovered_effect_hdi[1],
            alpha=0.2,
            color="C0",
            label="94% HDI",
        )

        status = "covers truth" if self.hdi_covers_truth else "misses truth"
        ax.set_title(f"Dress Rehearsal: HDI {status}")
        ax.set_xlabel("Cumulative impact")
        ax.set_ylabel("Density")
        ax.legend(fontsize=9)
        fig.tight_layout()
        return fig, ax



[docs]
    def summary(self) -> pd.DataFrame:
        """Return a DataFrame with recovery statistics."""
        return pd.DataFrame(
            [
                {
                    "injected_effect": self.injected_effect,
                    "effect_type": self.effect_type,
                    "injected_truth": self.injected_truth,
                    "recovered_mean": self.recovered_effect_mean,
                    "recovered_hdi_lower": self.recovered_effect_hdi[0],
                    "recovered_hdi_upper": self.recovered_effect_hdi[1],
                    "hdi_covers_truth": self.hdi_covers_truth,
                    "bias": self.recovered_effect_mean - self.injected_truth,
                    "relative_bias": (
                        (self.recovered_effect_mean - self.injected_truth)
                        / self.injected_truth
                        if self.injected_truth != 0
                        else np.nan
                    ),
                }
            ]
        )



[docs]
    def to_check_result(self) -> CheckResult:
        """Convert to a ``CheckResult`` for sensitivity pipeline integration."""
        from causalpy.checks.base import CheckResult

        return CheckResult(
            check_name="DressRehearsal",
            passed=self.hdi_covers_truth,
            table=self.summary(),
            text=(
                f"Dress rehearsal with {self.effect_type} effect "
                f"{self.injected_effect}: recovered mean "
                f"{self.recovered_effect_mean:.3f} "
                f"(HDI [{self.recovered_effect_hdi[0]:.3f}, "
                f"{self.recovered_effect_hdi[1]:.3f}]). "
                f"HDI {'covers' if self.hdi_covers_truth else 'misses'} "
                f"the injected truth ({self.injected_truth:.3f})."
            ),
            figures=[self.plot()[0]],
            metadata={
                "dress_rehearsal_result": self,
            },
        )





[docs]
@dataclass
class PowerCurveResult:
    """Result of a simulation-based Bayesian power analysis.

    Produced by :meth:`SyntheticControl.power_analysis`.

    Attributes
    ----------
    effect_sizes : list[float]
        Candidate effect sizes evaluated.
    detection_rates : list[float]
        Fraction of simulations where the criterion was met, per effect size.
    criterion : str
        The detection criterion used.
    raw_results : list[list[DressRehearsalResult]]
        Nested list: per effect size, per simulation.
    """

    effect_sizes: list[float]
    detection_rates: list[float]
    criterion: str
    raw_results: list[list[DressRehearsalResult]] = field(repr=False)


[docs]
    def plot(self) -> tuple[plt.Figure, plt.Axes]:
        """Power curve: effect size vs detection rate."""
        fig, ax = plt.subplots(figsize=(7, 4))

        ax.plot(
            self.effect_sizes,
            self.detection_rates,
            "o-",
            color="C0",
            lw=2,
            markersize=8,
        )
        ax.axhline(0.8, color="C3", ls="--", alpha=0.7, label="80% detection")
        ax.set_xlabel("Effect size")
        ax.set_ylabel("Detection rate")
        ax.set_title(f"Bayesian Power Curve (criterion: {self.criterion})")
        ax.set_ylim(-0.05, 1.05)
        ax.legend(fontsize=9)
        ax.grid(True, alpha=0.3)
        fig.tight_layout()
        return fig, ax



[docs]
    def summary(self) -> pd.DataFrame:
        """DataFrame with per-effect-size summary statistics."""
        rows = []
        for es, dr, sim_results in zip(
            self.effect_sizes, self.detection_rates, self.raw_results, strict=True
        ):
            means = [r.recovered_effect_mean for r in sim_results]
            rows.append(
                {
                    "effect_size": es,
                    "detection_rate": dr,
                    "mean_recovery": np.mean(means),
                    "median_recovery": np.median(means),
                    "n_simulations": len(sim_results),
                }
            )
        return pd.DataFrame(rows)





[docs]
@dataclass
class DonorPoolQualityResult:
    """Result of a donor pool quality assessment.

    Produced by :meth:`SyntheticControl.donor_pool_quality`.

    Attributes
    ----------
    correlation_score : float
        Mean pairwise correlation between treated and control units.
    convex_hull_coverage : float
        Fraction of pre-period time points where treated is within
        the donor envelope.
    weight_concentration : float
        Effective number of donors (1 / sum(w_i^2)), measuring
        how concentrated the Dirichlet weights are.
    per_donor_details : pd.DataFrame
        Per-donor statistics: correlation, mean weight, etc.
    """

    correlation_score: float
    convex_hull_coverage: float
    weight_concentration: float
    per_donor_details: pd.DataFrame


[docs]
    def summary(self) -> pd.DataFrame:
        """Formatted summary with per-metric scores and qualitative assessment."""
        quality = self._overall_quality()

        metrics = pd.DataFrame(
            [
                {
                    "metric": "Mean donor correlation",
                    "value": f"{self.correlation_score:.3f}",
                    "assessment": (
                        "good"
                        if self.correlation_score > 0.8
                        else "acceptable"
                        if self.correlation_score > 0.5
                        else "poor"
                    ),
                },
                {
                    "metric": "Convex hull coverage",
                    "value": f"{self.convex_hull_coverage:.1%}",
                    "assessment": (
                        "good"
                        if self.convex_hull_coverage > 0.95
                        else "acceptable"
                        if self.convex_hull_coverage > 0.80
                        else "poor"
                    ),
                },
                {
                    "metric": "Effective number of donors",
                    "value": f"{self.weight_concentration:.2f}",
                    "assessment": (
                        "good"
                        if self.weight_concentration > 3
                        else "acceptable"
                        if self.weight_concentration > 1.5
                        else "poor"
                    ),
                },
                {
                    "metric": "Overall quality",
                    "value": quality,
                    "assessment": quality,
                },
            ]
        )
        return metrics


    def _overall_quality(self) -> str:
        """Compute an overall qualitative assessment."""
        scores = []
        scores.append(
            "good"
            if self.correlation_score > 0.8
            else "acceptable"
            if self.correlation_score > 0.5
            else "poor"
        )
        scores.append(
            "good"
            if self.convex_hull_coverage > 0.95
            else "acceptable"
            if self.convex_hull_coverage > 0.80
            else "poor"
        )
        scores.append(
            "good"
            if self.weight_concentration > 3
            else "acceptable"
            if self.weight_concentration > 1.5
            else "poor"
        )

        if "poor" in scores:
            return "poor"
        if all(s == "good" for s in scores):
            return "good"
        return "acceptable"