Source code for rail.plotting.data_extraction_funcs

"""A set of utility functions to extract data for plotting from rail files"""

from __future__ import annotations

from typing import Any

import numpy as np
import qp
import tables_io

from rail.utils.catalog_utils import CatalogConfigBase

from rail.projects import RailProject, path_funcs
from . import utility_functions



[docs]
def extract_z_true(
    filepath: str,
    colname: str = "redshift",
) -> np.ndarray:
    """Extract the true redshifts from a file

    Parameters
    ----------
    filepath: str
        Path to file with tabular data

    colname: str
        Name of the column with redshfits ['redshift']

    Returns
    -------
    redshifts: np.ndarray
        Redshifts in question

    Notes
    -----
    This assumes the redshifts are in a file that can be read by tables_io
    """
    truth_table = tables_io.read(filepath)
    return truth_table[colname]




[docs]
def extract_z_point(
    filepath: str,
    colname: str = "zmode",
) -> np.ndarray:
    """Extract the point estimates of redshifts from a file

    Parameters
    ----------
    filepath: str
        Path to file with tabular data

    colname: str
        Name of the column with point estimates ['zmode']

    Returns
    -------
    z_estimates: np.ndarray
        Redshift estimates in question

    Notes
    -----
    This assumes the point estimates are in a qp file
    """
    qp_ens = qp.read(filepath)
    z_estimates = np.squeeze(qp_ens.ancil[colname])
    return z_estimates




[docs]
def extract_mag(
    filepath: str,
    colname: str = "LSST_obs_i",
) -> np.ndarray:
    """Extract the i-mag from a file

    Parameters
    ----------
    filepath: str
        Path to file with tabular data

    colname: str
        Name of the column with redshfits ['redshift']

    Returns
    -------
    magnitude: np.ndarray
        Magnitude in question

    Notes
    -----
    This assumes the magnitude are in a file that can be read by tables_io
    """
    magnitude_table = tables_io.read(filepath)
    return magnitude_table[colname]




[docs]
def extract_magnitudes(
    filepath: str,
    template: str,
    bands: list[str],
) -> np.ndarray:
    """Extract the magntidues from a file

    Parameters
    ----------
    filepath: str
        Path to file with tabular data

    template:
        Template to make the names

    bands:
        List of the bands to apply to the template

    Returns
    -------
    magnitudes: np.ndarray
        Magnitudes in question

    Notes
    -----
    This assumes the magnitude are in a file that can be read by tables_io
    """
    magnitude_table = tables_io.read(filepath)
    magnitudes = utility_functions.get_band_values(magnitude_table, template, bands)
    return magnitudes




[docs]
def extract_z_pdf(
    filepath: str,
) -> qp.ensemble:
    """Extract the pdf estimates of redshifts from a file

    Parameters
    ----------
    filepath: str
        Path to file with tabular data

    Returns
    -------
    z_pdf: qp.ensemble
        Redshift pdf in question

    Notes
    -----
    This assumes the point estimates are in a qp file
    """
    z_pdf = qp.read(filepath)
    return z_pdf




[docs]
def extract_multiple_z_point(
    filepaths: dict[str, str],
    colname: str = "zmode",
) -> dict[str, np.ndarray]:  # pragma: no cover
    """Extract the point estimates of redshifts from several files

    Parameters
    ----------
    filepaths: dict[str, str]
        Path to file with tabular data, keys will be associatd with the various
        extracted point estimates

    colname: str
        Name of the column with point estimates ['zmode']

    Returns
    -------
    z_estimates: dict[str, np.ndarray]
        Redshift estimates in question, key by the key from input argument

    Notes
    -----
    This assumes the point estimates are in a qp file
    """
    ret_dict = {key: extract_z_point(val, colname) for key, val in filepaths.items()}
    return ret_dict




[docs]
def make_z_true_z_point_dict(
    z_true: np.ndarray,
    z_estimate: np.ndarray,
    mags: np.ndarray,
) -> dict[str, np.ndarray]:
    """Build a dictionary with true redshifts and a point_estimates

    Parameters
    ----------
    z_true:
        True Redshifts

    z_estimate:
        Point estimates

    mags:
        Magnitdues

    Returns
    -------
    out_dict: dict[str, np.ndarray]
        Dictionary with true redshift and a point estimate of the redshift
    """
    out_dict: dict[str, Any] = dict(
        truth=z_true,
        pointEstimate=z_estimate,
        magnitude=mags,
    )
    return out_dict




[docs]
def make_z_true_multi_z_point_dict(
    z_true: np.ndarray,
    z_estimates: dict[str, np.ndarray],
) -> dict[str, Any]:
    """Build a single dictionary with true redshifts and several point_estimates

    Parameters
    ----------
    z_true: np.ndarray
        True Redshifts

    z_estimates: dict[str, np.ndarray]
        Point estimates

    Returns
    -------
    out_dict: dict[str, Any]
        Dictionary with true redshift and all the point estimate of the redshift
    """
    out_dict: dict[str, Any] = dict(
        truth=z_true,
        pointEstimates=z_estimates,
    )
    return out_dict




[docs]
def get_pz_point_estimate_data(
    project: RailProject,
    selection: str,
    flavor: str,
    tag: str,
    algo: str,
) -> dict[str, np.ndarray] | None:
    """Get the true redshifts and point estimates
    for a particualar analysis selection and flavor

    Parameters
    ----------
    project: RailProject
        Object with information about the structure of the current project

    selection: str
        Data selection in question, e.g., 'gold', or 'blended'

    flavor: str
        Analysis flavor in question, e.g., 'baseline' or 'zCosmos'

    algo: str
        Algorithm we want the estimates for, e.g., 'knn', 'bpz'], etc...

    tag: str
        File tag, e.g., 'test' or 'train', or 'train_zCosmos'

    Returns
    -------
    pz_data: dict[str, np.ndarray] | None
        Data in question or None if a file is missing
    """
    z_true_path = path_funcs.get_z_true_path(project, selection, flavor, tag)
    z_estimate_path = path_funcs.get_ceci_pz_output_path(
        project, selection, flavor, algo
    )
    if z_estimate_path is None:  # pragma: no cover
        return None
    z_true_data = extract_z_true(z_true_path)
    z_estimate_data = extract_z_point(z_estimate_path)
    flavor_info = project.get_flavor(flavor)
    catalog_tag = flavor_info["catalog_tag"]
    CatalogConfigBase.apply(catalog_tag)
    catalog_class = CatalogConfigBase.active_class()
    ref_band = catalog_class.band_template.format(band=catalog_class.ref_band)
    mag_data = extract_mag(z_true_path, colname=ref_band)
    pz_data = make_z_true_z_point_dict(z_true_data, z_estimate_data, mag_data)
    return pz_data




[docs]
def get_ztrue_and_magntidues(
    project: RailProject,
    selection: str,
    flavor: str,
    tag: str,
) -> dict[str, np.ndarray] | None:
    """Get the true redshifts and observed magntidues
    for a particualar analysis selection and flavor

    Parameters
    ----------
    project: RailProject
        Object with information about the structure of the current project

    selection: str
        Data selection in question, e.g., 'gold', or 'blended'

    flavor: str
        Analysis flavor in question, e.g., 'baseline' or 'zCosmos'

    tag: str
        File tag, e.g., 'test' or 'train', or 'train_zCosmos'

    Returns
    -------
    out_data: dict[str, np.ndarray] | None
        Data in question or None if a file is missing
    """
    flavor_info = project.get_flavor(flavor)
    catalog_tag = flavor_info["catalog_tag"]
    CatalogConfigBase.apply(catalog_tag)
    catalog_class = CatalogConfigBase.active_class()

    z_true_path = path_funcs.get_z_true_path(project, selection, flavor, tag)
    z_true_data = extract_z_true(z_true_path, catalog_class.redshift_col)
    mag_data = extract_magnitudes(
        z_true_path, catalog_class.band_template, catalog_class.bandlist
    )
    out_data = dict(
        truth=z_true_data,
        magnitudes=mag_data,
        bands=catalog_class.bandlist,
    )
    return out_data




[docs]
def get_multi_pz_point_estimate_data(
    point_estimate_infos: dict[str, dict[str, Any]],
) -> dict[str, Any] | None:
    """Get the true redshifts and point estimates

    for several analysis variants

    This checks that they all have the same redshifts
    Parameters
    ----------
    point_estimate_infos: dict[str, dict[str, Any]]
        Information about how to get point estimates

    Returns
    -------
    pz_data: dict[str, Any] | None
        Data in question or None
    """
    point_estimates: dict[str, np.ndarray] = {}
    ztrue_data: np.ndarray | None = None
    ztrue_key: str | None = None
    for key, val in point_estimate_infos.items():
        the_data = get_pz_point_estimate_data(**val)
        if the_data is None:  # pragma: no cover
            continue
        if ztrue_data is None:
            ztrue_data = the_data["truth"]
            ztrue_key = key
        else:
            if not np.allclose(ztrue_data, the_data["truth"]):  # pragma: no cover
                raise ValueError(
                    f"Mismatch in truth data. data({key}) != data({ztrue_key})"
                )
        point_estimates[key] = the_data["pointEstimate"]
    if ztrue_data is None:  # pragma: no cover
        return None
    pz_data = make_z_true_multi_z_point_dict(ztrue_data, point_estimates)
    return pz_data




[docs]
def get_tomo_bins_nz_estimate_data(
    project: RailProject,
    selection: str,
    flavor: str,
    algo: str,
    classifier: str,
    summarizer: str,
) -> qp.Ensemble:
    """Get the tomographic bin n(z) estimates

    Parameters
    ----------
    project: RailProject
        Object with information about the structure of the current project

    selection: str
        Data selection in question, e.g., 'gold', or 'blended'

    flavor: str
        Analysis flavor in question, e.g., 'baseline' or 'zCosmos'

    algo: str
        Algorithm we want the estimates for, e.g., 'knn', 'bpz'], etc...

    classifier: str
        Algorithm we use to make tomograpic bin

    summarizer: str
        Algorithm we use to go from p(z) to n(z)

    Returns
    -------
    nz_data: qp.Ensemble
        Tomographic bin n(z) data
    """
    paths = path_funcs.get_ceci_nz_output_paths(
        project,
        selection,
        flavor,
        algo,
        classifier,
        summarizer,
    )

    data = qp.concatenate([extract_z_pdf(path_) for path_ in paths])
    return data




[docs]
def get_tomo_bins_true_nz_data(
    project: RailProject,
    selection: str,
    flavor: str,
    algo: str,
    classifier: str,
) -> qp.Ensemble:
    """Get the tomographic bin true n(z)

    Parameters
    ----------
    project: RailProject
        Object with information about the structure of the current project

    selection: str
        Data selection in question, e.g., 'gold', or 'blended'

    flavor: str
        Analysis flavor in question, e.g., 'baseline' or 'zCosmos'

    algo: str
        Algorithm we want the estimates for, e.g., 'knn', 'bpz'], etc...

    classifier: str
        Algorithm we use to make tomograpic bin

    Returns
    -------
    nz_data: qp.Ensemble
        Tomographic bin n(z) data
    """
    paths = path_funcs.get_ceci_true_nz_output_paths(
        project,
        selection,
        flavor,
        algo,
        classifier,
    )

    data = qp.concatenate([extract_z_pdf(path_) for path_ in paths])
    return data