Source code for cascade_at.dismod.api.fill_extract_helpers.reference_tables

import numpy as np
import pandas as pd
from numbers import Real
from typing import Dict, Optional, List

from cascade_at.dismod.constants import DensityEnum, IntegrandEnum, \
    RateEnum, enum_to_dataframe
from cascade_at.core.log import get_loggers
from cascade_at.inputs.locations import LocationDAG
from cascade_at.model.covariate import Covariate

LOG = get_loggers(__name__)


[docs]def construct_integrand_table(data_cv_from_settings: Optional[Dict[str, float]] = None, default_data_cv: float = 0.0) -> pd.DataFrame: """ Constructs the integrand table and adds data CV in the minimum_meas_cv column. Parameters ---------- data_cv_from_settings: (optional dict) key, value pair that has integrands mapped to data cv default_data_cv: (float) default value for data CV to use """ df = pd.DataFrame({ "integrand_name": enum_to_dataframe(IntegrandEnum)["name"], "minimum_meas_cv": default_data_cv }) df = df.loc[df.integrand_name != 'incidence'].copy() if data_cv_from_settings is not None: df["minimum_meas_cv"] = df["integrand_name"].apply(data_cv_from_settings.__getitem__) return df
[docs]def default_rate_table() -> pd.DataFrame: """ Constructs the default rate table with rate names and ids. """ return pd.DataFrame({ 'rate_id': [rate.value for rate in RateEnum], 'rate_name': [rate.name for rate in RateEnum], 'parent_smooth_id': np.nan, 'child_smooth_id': np.nan, 'child_nslist_id': np.nan })
def construct_age_time_table(variable_name: str, variable: np.array, data_min: Optional[float] = None, data_max: Optional[float] = None) -> pd.DataFrame: """ Constructs the age or time table with age_id and age or time_id and time. Has unique identifiers for each. Parameters: ---------- variable_name one of 'age' or 'time' variable array of ages or times data_min minimum observed in the data data_max max observed in the data """ LOG.info(f"Constructing {variable_name} table.") if data_min < np.min(variable): variable = np.append(variable, data_min) if data_max > np.max(variable): variable = np.append(variable, data_max) variable = variable[np.unique(variable.round(decimals=14), return_index=True)[1]] variable.sort() if variable[-1] - variable[0] < 1: variable = np.append(variable, variable[-1] + 1) df = pd.DataFrame(dict(id=range(len(variable)), var=variable)) df.rename(columns={'id': f'{variable_name}_id', 'var': variable_name}, inplace=True) return df
[docs]def construct_node_table(location_dag: LocationDAG) -> pd.DataFrame: """ Constructs the node table from a location DAG's to_dataframe() method. Parameters ---------- location_dag location hierarchy object """ LOG.info("Constructing node table.") node = location_dag.to_dataframe() node = node.reset_index(drop=True) node["node_id"] = node.index p_node = node[["node_id", "location_id"]].rename( columns={"location_id": "parent_id", "node_id": "parent"} ) node = node.merge(p_node, on="parent_id", how="left") node.rename(columns={ "name": "node_name", "location_id": "c_location_id" }, inplace=True) node = node[['node_id', 'node_name', 'parent', 'c_location_id']] return node
[docs]def construct_covariate_table(covariates: List[Covariate]) -> pd.DataFrame: """ Constructs the covariate table from a list of Covariate objects. """ covariates_reordered = list() lookup = {search.name: search for search in covariates} for special in ["sex", "one"]: if special in lookup: covariates_reordered.append(lookup[special]) del lookup[special] for remaining in sorted(lookup.keys()): covariates_reordered.append(lookup[remaining]) LOG.info(f"Writing covariates {', '.join(c.name for c in covariates_reordered)}") null_references = list() for check_ref_col in covariates_reordered: if not isinstance(check_ref_col.reference, Real): null_references.append(check_ref_col.name) if null_references: raise ValueError(f"Covariate columns without reference values {null_references}.") covariate_rename = dict() for covariate_idx, covariate_obj in enumerate(covariates_reordered): covariate_rename[covariate_obj.name] = f"x_{covariate_idx}" covariate_table = pd.DataFrame({ "covariate_id": np.arange(len(covariates_reordered)), "covariate_name": [covariate_rename[col.name] for col in covariates_reordered], "c_covariate_name": [col.name for col in covariates_reordered], "reference": np.array([col.reference for col in covariates_reordered], dtype=float), "max_difference": np.array([col.max_difference for col in covariates_reordered], dtype=float) }) return covariate_table
[docs]def construct_density_table() -> pd.DataFrame: """ Constructs the default density table. """ return pd.DataFrame({ 'density_name': [x.name for x in DensityEnum] })