Source code for master_thesis_code.bayesian_inference.bayesian_inference

"""Pipeline A — self-contained MWE for Hubble constant inference (dev cross-check).

This module provides :class:`BayesianInference`, a minimal working example that
estimates the Hubble constant posterior from synthetic EMRI detections and a
:class:`~master_thesis_code.datamodels.galaxy.GalaxyCatalog`.  It uses:

- An erf-based analytic GW detection probability,
- A hardcoded 10 % fractional σ(d_L) (see ``FRACTIONAL_LUMINOSITY_ERROR``),
- A synthetic galaxy catalog generated by :class:`GalaxyCatalog`.

**This pipeline is NOT invoked by ``--evaluate``.**  It exists for rapid
prototyping and cross-checking.  The production (science-grade) pipeline is
:class:`~master_thesis_code.bayesian_inference.bayesian_statistics.BayesianStatistics`
(Pipeline B), which uses the real GLADE galaxy catalog, KDE-based detection
probability, full Fisher-matrix covariance, and multiprocessing.

The ``__main__`` entry point in
:mod:`~master_thesis_code.bayesian_inference.bayesian_inference_mwe` runs
this pipeline standalone.
"""

from dataclasses import dataclass, field
from statistics import NormalDist

import numpy as np
import numpy.typing as npt
from scipy.special import erf

from master_thesis_code.constants import (
    FRACTIONAL_LUMINOSITY_ERROR,
    FRACTIONAL_MEASURED_MASS_ERROR,
    LUMINOSITY_DISTANCE_THRESHOLD_GPC,
    OMEGA_DE,
    OMEGA_M,
    SKY_LOCALIZATION_ERROR,
    H,
)
from master_thesis_code.datamodels.emri_detection import EMRIDetection
from master_thesis_code.datamodels.galaxy import GalaxyCatalog
from master_thesis_code.physical_relations import (
    dist,
    redshifted_mass_inverse,
)
from master_thesis_code.physical_relations import (
    dist_vectorized as _dist_vectorized,
)


[docs] def dist_array( redshifts: npt.NDArray[np.float64], h: float = H, Omega_m: float = OMEGA_M, Omega_de: float = OMEGA_DE, ) -> npt.NDArray[np.float64]: """Vectorized luminosity distance in Gpc over an array of redshifts. Delegates to physical_relations.dist_vectorized for a canonical, unit-consistent implementation. Returns Gpc (same unit as the scalar dist()). """ return np.asarray(_dist_vectorized(redshifts, h=h, Omega_m=Omega_m, Omega_de=Omega_de))
[docs] @dataclass class BayesianInference: """Bayesian estimator for the Hubble constant H₀ from EMRI detections. Evaluates :math:`p(H_0 \\mid \\{d_i\\})` by marginalizing each detection over the galaxy-catalog redshift distribution, weighted by the GW likelihood and sky-localization overlap. Attributes: galaxy_catalog: Galaxy catalog providing redshift and mass distributions :math:`p(z)` and :math:`p(M)`. emri_detections: List of EMRI detections with measured luminosity distances and sky positions. luminosity_distance_threshold: LISA detection horizon in Gpc. number_of_redshift_steps: Number of quadrature points for redshift integrals. redshift_values: Redshift grid used for all integrals, set in ``__post_init__``. galaxy_distribution_at_redshifts: Pre-computed :math:`p(z_i \\mid \\mathrm{catalog})` evaluated on ``redshift_values``. use_bh_mass: If ``True``, include BH mass information in the likelihood. use_selection_effects_correction: If ``True``, divide likelihood by the selection-function denominator :math:`\\int p_\\mathrm{det}(z) p(z) dz`. """ galaxy_catalog: GalaxyCatalog emri_detections: list[EMRIDetection] luminosity_distance_threshold: float = LUMINOSITY_DISTANCE_THRESHOLD_GPC # Gpc number_of_redshift_steps: int = 1000 redshift_values: npt.NDArray[np.float64] = field(default_factory=lambda: np.array([])) galaxy_distribution_at_redshifts: npt.NDArray[np.float64] = field( default_factory=lambda: np.array([]) ) galaxy_detection_mass_distribution_at_redshifts: list = field(default_factory=list) detection_skylocalization_weight_by_galaxy: list = field(default_factory=list) use_bh_mass: bool = False use_selection_effects_correction: bool = True def __post_init__(self) -> None: self.redshift_values = np.linspace( self.galaxy_catalog.redshift_lower_limit, self.galaxy_catalog.redshift_upper_limit, self.number_of_redshift_steps, ) self.galaxy_distribution_at_redshifts = np.array( [ self.galaxy_catalog.evaluate_galaxy_distribution(redshift) for redshift in self.redshift_values ] ) self.comoving_volume_element_at_redshifts = np.array( [ self.galaxy_catalog.comoving_volume_element(redshift) for redshift in self.redshift_values ] ) self.galaxy_detection_mass_distribution_at_redshifts = [ np.array( [ self.galaxy_catalog.evaluate_galaxy_mass_distribution( redshifted_mass_inverse( redshifted_mass=emri_detection.measured_redshifted_mass, redshift=redshift, ) ) for redshift in self.redshift_values ] ) for emri_detection in self.emri_detections ] self.detection_skylocalization_weight_by_galaxy = [ np.array( [ NormalDist( mu=emri_detection.measured_right_ascension, sigma=SKY_LOCALIZATION_ERROR, ).pdf(galaxy.right_ascension) * NormalDist( mu=emri_detection.measured_declination, sigma=SKY_LOCALIZATION_ERROR, ).pdf(galaxy.declination) for galaxy in self.galaxy_catalog.catalog ] ) for emri_detection in self.emri_detections ]
[docs] def gw_detection_probability(self, redshift: float, hubble_constant: float) -> float: """Probability that a source at *redshift* is detected by LISA. Models the detector selection function as a cumulative normal: .. math:: p_\\mathrm{det}(z) = \\frac{1}{2}\\left[1 + \\mathrm{erf}\\! \\left(\\frac{D_\\mathrm{thr} - d_L(z)}{\\sqrt{2}\\,\\sigma_{d_L}(z)}\\right)\\right] where :math:`\\sigma_{d_L} = f_\\sigma \\cdot d_L(z)` and :math:`f_\\sigma` is ``FRACTIONAL_LUMINOSITY_ERROR``. Args: redshift: Source redshift. hubble_constant: Dimensionless Hubble parameter :math:`h`. Returns: Detection probability in :math:`[0, 1]`. """ return float( ( 1 + erf( (self.luminosity_distance_threshold - dist(redshift, hubble_constant)) / np.sqrt(2) / FRACTIONAL_LUMINOSITY_ERROR / dist(redshift, hubble_constant) ) ) / 2 )
[docs] def gw_likelihood( self, measured_luminosity_distance: float, redshift: float, hubble_constant: float, ) -> float: """GW likelihood: probability of measuring *measured_luminosity_distance* given *redshift*. Assumes Gaussian measurement noise: .. math:: \\mathcal{L}(\\hat{d}_L \\mid z, h) = \\mathcal{N}\\!\\left(\\hat{d}_L;\\, d_L(z, h),\\, f_\\sigma \\cdot d_L(z, h)\\right) Args: measured_luminosity_distance: Measured luminosity distance :math:`\\hat{d}_L` in Gpc. redshift: True source redshift. hubble_constant: Dimensionless Hubble parameter :math:`h`. Returns: Likelihood value (probability density). """ mu_luminosity_distance = dist(redshift, hubble_constant) sigma_luminosity_distance = FRACTIONAL_LUMINOSITY_ERROR * mu_luminosity_distance distribution = NormalDist(mu=mu_luminosity_distance, sigma=sigma_luminosity_distance) return distribution.pdf(measured_luminosity_distance)
[docs] def likelihood( self, hubble_constant: float, measured_luminosity_distance: float, measured_redshifted_mass: float, detection_index: int, ) -> float: """Marginalized likelihood :math:`p(\\hat{d}_L \\mid H_0)` for one EMRI detection. Integrates the GW likelihood over the galaxy-catalog redshift distribution, weighted by sky-localization overlap and — optionally — BH mass information. A selection-effects correction divides by the denominator integral when ``use_selection_effects_correction`` is ``True``: .. math:: \\mathcal{L}(H_0) = \\frac{\\int p_\\mathrm{GW}(\\hat{d}_L \\mid z, H_0)\\, p_\\mathrm{det}(z, H_0)\\, p(z \\mid \\mathrm{catalog})\\, dz} {\\int p_\\mathrm{det}(z, H_0)\\, p(z \\mid \\mathrm{catalog})\\, dz} Args: hubble_constant: Dimensionless Hubble parameter :math:`h` being evaluated. measured_luminosity_distance: Measured :math:`\\hat{d}_L` in Gpc. measured_redshifted_mass: Measured redshifted BH mass :math:`M_z` in solar masses. detection_index: Index into ``emri_detections`` and pre-computed weight arrays. Returns: Likelihood value (positive real number; not normalized over :math:`H_0`). """ # Compute all luminosity distances at once — replaces 2000+ scalar dist() calls. mu_d = dist_array(self.redshift_values, hubble_constant) # GW detection probability: P_det(z) = (1 + erf(x)) / 2 # where x = (D_threshold - mu_d) / (sqrt(2) * sigma_d) p_det_array = ( 1.0 + erf( (self.luminosity_distance_threshold - mu_d) / (np.sqrt(2.0) * FRACTIONAL_LUMINOSITY_ERROR * mu_d) ) ) / 2.0 # GW likelihood: Gaussian PDF with mu=mu_d, sigma=sigma_d sigma_d = FRACTIONAL_LUMINOSITY_ERROR * mu_d gw_likelihood_array = np.exp( -0.5 * ((measured_luminosity_distance - mu_d) / sigma_d) ** 2 ) / (sigma_d * np.sqrt(2.0 * np.pi)) # Galaxy sky-localisation weight per redshift bin: matrix-vector product # galaxy_distribution_at_redshifts: (n_z, n_galaxies); weights: (n_galaxies,) galaxy_skylocalization_weights = ( self.galaxy_distribution_at_redshifts
[docs] @ self.detection_skylocalization_weight_by_galaxy[detection_index] ) if not self.use_bh_mass: nominator = np.trapezoid( gw_likelihood_array * p_det_array * galaxy_skylocalization_weights, self.redshift_values, ) else: nominator = np.trapezoid( gw_likelihood_array * p_det_array * NormalDist( mu=measured_redshifted_mass, sigma=FRACTIONAL_MEASURED_MASS_ERROR * measured_redshifted_mass, ).pdf(measured_redshifted_mass) * np.array( [ np.sum( const_redshift_distribution * const_redshift_mass_distribution * self.detection_skylocalization_weight_by_galaxy[detection_index] ) for const_redshift_distribution, const_redshift_mass_distribution in zip( self.galaxy_distribution_at_redshifts, self.galaxy_detection_mass_distribution_at_redshifts[detection_index], ) ] ), self.redshift_values, ) denominator = np.trapezoid( p_det_array * galaxy_skylocalization_weights, self.redshift_values, ) if not self.use_selection_effects_correction: denominator = 1.0 return float(nominator / denominator)
def posterior(self, hubble_constant: float) -> list[float]: return [ self.likelihood( hubble_constant=hubble_constant, measured_luminosity_distance=emri_detection.measured_luminosity_distance, measured_redshifted_mass=emri_detection.measured_redshifted_mass, detection_index=index, ) for index, emri_detection in enumerate(self.emri_detections) ]