Source code for master_thesis_code.bayesian_inference.bayesian_inference

"""Pipeline A — self-contained MWE for Hubble constant inference (dev cross-check).

This module provides :class:`BayesianInference`, a minimal working example that
estimates the Hubble constant posterior from synthetic EMRI detections and a
:class:`~master_thesis_code.datamodels.galaxy.GalaxyCatalog`.  It uses:

- An erf-based analytic GW detection probability,
- A hardcoded 10 % fractional σ(d_L) (see ``FRACTIONAL_LUMINOSITY_ERROR``),
- A synthetic galaxy catalog generated by :class:`GalaxyCatalog`.

**This pipeline is NOT invoked by ``--evaluate``.**  It exists for rapid
prototyping and cross-checking.  The production (science-grade) pipeline is
:class:`~master_thesis_code.bayesian_inference.bayesian_statistics.BayesianStatistics`
(Pipeline B), which uses the real GLADE galaxy catalog, KDE-based detection
probability, full Fisher-matrix covariance, and multiprocessing.

The ``__main__`` entry point in
:mod:`~master_thesis_code.bayesian_inference.bayesian_inference_mwe` runs
this pipeline standalone.
"""

from dataclasses import dataclass, field
from statistics import NormalDist

import numpy as np
import numpy.typing as npt
from scipy.special import erf

from master_thesis_code.constants import (
    FRACTIONAL_LUMINOSITY_ERROR,
    FRACTIONAL_MEASURED_MASS_ERROR,
    LUMINOSITY_DISTANCE_THRESHOLD_GPC,
    OMEGA_DE,
    OMEGA_M,
    SKY_LOCALIZATION_ERROR,
    H,
)
from master_thesis_code.datamodels.emri_detection import EMRIDetection
from master_thesis_code.datamodels.galaxy import GalaxyCatalog
from master_thesis_code.physical_relations import (
    dist,
    redshifted_mass_inverse,
)
from master_thesis_code.physical_relations import (
    dist_vectorized as _dist_vectorized,
)



[docs]
def dist_array(
    redshifts: npt.NDArray[np.float64],
    h: float = H,
    Omega_m: float = OMEGA_M,
    Omega_de: float = OMEGA_DE,
) -> npt.NDArray[np.float64]:
    """Vectorized luminosity distance in Gpc over an array of redshifts.

    Delegates to physical_relations.dist_vectorized for a canonical, unit-consistent
    implementation.  Returns Gpc (same unit as the scalar dist()).
    """
    return np.asarray(_dist_vectorized(redshifts, h=h, Omega_m=Omega_m, Omega_de=Omega_de))




[docs]
@dataclass
class BayesianInference:
    """Bayesian estimator for the Hubble constant H₀ from EMRI detections.

    Evaluates :math:`p(H_0 \\mid \\{d_i\\})` by marginalizing each detection over
    the galaxy-catalog redshift distribution, weighted by the GW likelihood and
    sky-localization overlap.

    Attributes:
        galaxy_catalog: Galaxy catalog providing redshift and mass distributions
            :math:`p(z)` and :math:`p(M)`.
        emri_detections: List of EMRI detections with measured luminosity distances
            and sky positions.
        luminosity_distance_threshold: LISA detection horizon in Gpc.
        number_of_redshift_steps: Number of quadrature points for redshift integrals.
        redshift_values: Redshift grid used for all integrals, set in ``__post_init__``.
        galaxy_distribution_at_redshifts: Pre-computed :math:`p(z_i \\mid \\mathrm{catalog})`
            evaluated on ``redshift_values``.
        use_bh_mass: If ``True``, include BH mass information in the likelihood.
        use_selection_effects_correction: If ``True``, divide likelihood by the
            selection-function denominator :math:`\\int p_\\mathrm{det}(z) p(z) dz`.
    """

    galaxy_catalog: GalaxyCatalog
    emri_detections: list[EMRIDetection]

    luminosity_distance_threshold: float = LUMINOSITY_DISTANCE_THRESHOLD_GPC  # Gpc
    number_of_redshift_steps: int = 1000
    redshift_values: npt.NDArray[np.float64] = field(default_factory=lambda: np.array([]))
    galaxy_distribution_at_redshifts: npt.NDArray[np.float64] = field(
        default_factory=lambda: np.array([])
    )
    galaxy_detection_mass_distribution_at_redshifts: list = field(default_factory=list)
    detection_skylocalization_weight_by_galaxy: list = field(default_factory=list)
    use_bh_mass: bool = False
    use_selection_effects_correction: bool = True

    def __post_init__(self) -> None:
        self.redshift_values = np.linspace(
            self.galaxy_catalog.redshift_lower_limit,
            self.galaxy_catalog.redshift_upper_limit,
            self.number_of_redshift_steps,
        )
        self.galaxy_distribution_at_redshifts = np.array(
            [
                self.galaxy_catalog.evaluate_galaxy_distribution(redshift)
                for redshift in self.redshift_values
            ]
        )
        self.comoving_volume_element_at_redshifts = np.array(
            [
                self.galaxy_catalog.comoving_volume_element(redshift)
                for redshift in self.redshift_values
            ]
        )
        self.galaxy_detection_mass_distribution_at_redshifts = [
            np.array(
                [
                    self.galaxy_catalog.evaluate_galaxy_mass_distribution(
                        redshifted_mass_inverse(
                            redshifted_mass=emri_detection.measured_redshifted_mass,
                            redshift=redshift,
                        )
                    )
                    for redshift in self.redshift_values
                ]
            )
            for emri_detection in self.emri_detections
        ]

        self.detection_skylocalization_weight_by_galaxy = [
            np.array(
                [
                    NormalDist(
                        mu=emri_detection.measured_right_ascension,
                        sigma=SKY_LOCALIZATION_ERROR,
                    ).pdf(galaxy.right_ascension)
                    * NormalDist(
                        mu=emri_detection.measured_declination,
                        sigma=SKY_LOCALIZATION_ERROR,
                    ).pdf(galaxy.declination)
                    for galaxy in self.galaxy_catalog.catalog
                ]
            )
            for emri_detection in self.emri_detections
        ]


[docs]
    def gw_detection_probability(self, redshift: float, hubble_constant: float) -> float:
        """Probability that a source at *redshift* is detected by LISA.

        Models the detector selection function as a cumulative normal:

        .. math::

            p_\\mathrm{det}(z) = \\frac{1}{2}\\left[1 + \\mathrm{erf}\\!
            \\left(\\frac{D_\\mathrm{thr} - d_L(z)}{\\sqrt{2}\\,\\sigma_{d_L}(z)}\\right)\\right]

        where :math:`\\sigma_{d_L} = f_\\sigma \\cdot d_L(z)` and
        :math:`f_\\sigma` is ``FRACTIONAL_LUMINOSITY_ERROR``.

        Args:
            redshift: Source redshift.
            hubble_constant: Dimensionless Hubble parameter :math:`h`.

        Returns:
            Detection probability in :math:`[0, 1]`.
        """
        return float(
            (
                1
                + erf(
                    (self.luminosity_distance_threshold - dist(redshift, hubble_constant))
                    / np.sqrt(2)
                    / FRACTIONAL_LUMINOSITY_ERROR
                    / dist(redshift, hubble_constant)
                )
            )
            / 2
        )



[docs]
    def gw_likelihood(
        self,
        measured_luminosity_distance: float,
        redshift: float,
        hubble_constant: float,
    ) -> float:
        """GW likelihood: probability of measuring *measured_luminosity_distance* given *redshift*.

        Assumes Gaussian measurement noise:

        .. math::

            \\mathcal{L}(\\hat{d}_L \\mid z, h) =
            \\mathcal{N}\\!\\left(\\hat{d}_L;\\, d_L(z, h),\\,
            f_\\sigma \\cdot d_L(z, h)\\right)

        Args:
            measured_luminosity_distance: Measured luminosity distance :math:`\\hat{d}_L` in Gpc.
            redshift: True source redshift.
            hubble_constant: Dimensionless Hubble parameter :math:`h`.

        Returns:
            Likelihood value (probability density).
        """
        mu_luminosity_distance = dist(redshift, hubble_constant)
        sigma_luminosity_distance = FRACTIONAL_LUMINOSITY_ERROR * mu_luminosity_distance

        distribution = NormalDist(mu=mu_luminosity_distance, sigma=sigma_luminosity_distance)

        return distribution.pdf(measured_luminosity_distance)



[docs]
    def likelihood(
        self,
        hubble_constant: float,
        measured_luminosity_distance: float,
        measured_redshifted_mass: float,
        detection_index: int,
    ) -> float:
        """Marginalized likelihood :math:`p(\\hat{d}_L \\mid H_0)` for one EMRI detection.

        Integrates the GW likelihood over the galaxy-catalog redshift distribution,
        weighted by sky-localization overlap and — optionally — BH mass information.
        A selection-effects correction divides by the denominator integral when
        ``use_selection_effects_correction`` is ``True``:

        .. math::

            \\mathcal{L}(H_0) = \\frac{\\int p_\\mathrm{GW}(\\hat{d}_L \\mid z, H_0)\\,
            p_\\mathrm{det}(z, H_0)\\, p(z \\mid \\mathrm{catalog})\\, dz}
            {\\int p_\\mathrm{det}(z, H_0)\\, p(z \\mid \\mathrm{catalog})\\, dz}

        Args:
            hubble_constant: Dimensionless Hubble parameter :math:`h` being evaluated.
            measured_luminosity_distance: Measured :math:`\\hat{d}_L` in Gpc.
            measured_redshifted_mass: Measured redshifted BH mass :math:`M_z` in solar masses.
            detection_index: Index into ``emri_detections`` and pre-computed weight arrays.

        Returns:
            Likelihood value (positive real number; not normalized over :math:`H_0`).
        """
        # Compute all luminosity distances at once — replaces 2000+ scalar dist() calls.
        mu_d = dist_array(self.redshift_values, hubble_constant)

        # GW detection probability: P_det(z) = (1 + erf(x)) / 2
        # where x = (D_threshold - mu_d) / (sqrt(2) * sigma_d)
        p_det_array = (
            1.0
            + erf(
                (self.luminosity_distance_threshold - mu_d)
                / (np.sqrt(2.0) * FRACTIONAL_LUMINOSITY_ERROR * mu_d)
            )
        ) / 2.0

        # GW likelihood: Gaussian PDF with mu=mu_d, sigma=sigma_d
        sigma_d = FRACTIONAL_LUMINOSITY_ERROR * mu_d
        gw_likelihood_array = np.exp(
            -0.5 * ((measured_luminosity_distance - mu_d) / sigma_d) ** 2
        ) / (sigma_d * np.sqrt(2.0 * np.pi))

        # Galaxy sky-localisation weight per redshift bin: matrix-vector product
        # galaxy_distribution_at_redshifts: (n_z, n_galaxies); weights: (n_galaxies,)
        galaxy_skylocalization_weights = (
            self.galaxy_distribution_at_redshifts

[docs]
            @ self.detection_skylocalization_weight_by_galaxy[detection_index]
        )

        if not self.use_bh_mass:
            nominator = np.trapezoid(
                gw_likelihood_array * p_det_array * galaxy_skylocalization_weights,
                self.redshift_values,
            )
        else:
            nominator = np.trapezoid(
                gw_likelihood_array
                * p_det_array
                * NormalDist(
                    mu=measured_redshifted_mass,
                    sigma=FRACTIONAL_MEASURED_MASS_ERROR * measured_redshifted_mass,
                ).pdf(measured_redshifted_mass)
                * np.array(
                    [
                        np.sum(
                            const_redshift_distribution
                            * const_redshift_mass_distribution
                            * self.detection_skylocalization_weight_by_galaxy[detection_index]
                        )
                        for const_redshift_distribution, const_redshift_mass_distribution in zip(
                            self.galaxy_distribution_at_redshifts,
                            self.galaxy_detection_mass_distribution_at_redshifts[detection_index],
                        )
                    ]
                ),
                self.redshift_values,
            )

        denominator = np.trapezoid(
            p_det_array * galaxy_skylocalization_weights,
            self.redshift_values,
        )
        if not self.use_selection_effects_correction:
            denominator = 1.0
        return float(nominator / denominator)


    def posterior(self, hubble_constant: float) -> list[float]:
        return [
            self.likelihood(
                hubble_constant=hubble_constant,
                measured_luminosity_distance=emri_detection.measured_luminosity_distance,
                measured_redshifted_mass=emri_detection.measured_redshifted_mass,
                detection_index=index,
            )
            for index, emri_detection in enumerate(self.emri_detections)
        ]