Source code for cascade_at.dismod.api.fill_extract_helpers.data_tables

import numpy as np
import pandas as pd

from cascade_at.core.log import get_loggers
from cascade_at.dismod.api.fill_extract_helpers import utils
from cascade_at.dismod.constants import DensityEnum, IntegrandEnum, \
    INTEGRAND_TO_WEIGHT

LOG = get_loggers(__name__)

DEFAULT_DENSITY = ["uniform", 0, -np.inf, np.inf]


[docs]def prep_data_avgint(df: pd.DataFrame, node_df: pd.DataFrame, covariate_df: pd.DataFrame):
    """
    Preps both the data table and the avgint table by
    mapping locations to nodes and covariates to names.

    Putting it in the same function because it does the same stuff,
    but data and avgint need to be called separately because dismod requires
    different columns.

    Parameters
    ----------
    df
        The data frame to map
    node_df
        The node table from dismod db
    covariate_df
        The covariate table from dismod db
    """
    data = df.copy()
    data = utils.map_locations_to_nodes(df=data, node_df=node_df)
    data = utils.map_covariate_names(df=data, covariate_df=covariate_df)

    data.reset_index(inplace=True, drop=True)
    return data


[docs]def construct_data_table(df: pd.DataFrame, node_df: pd.DataFrame,
                         covariate_df: pd.DataFrame, ages: np.ndarray, times: np.ndarray):
    """
    Constructs the data table from input df.

    Parameters
    ----------
    df
        data frame of inputs that have been prepped for dismod
    node_df
        the dismod node table
    covariate_df
        the dismod covariate table
    ages
    times
    """
    LOG.info("Constructing data table.")

    data = df.copy()
    data = prep_data_avgint(
        df=data,
        node_df=node_df,
        covariate_df=covariate_df
    )
    data["data_name"] = data.index.astype(str)

    data["density_id"] = data["density"].apply(lambda x: DensityEnum[x].value)
    data["integrand_id"] = data["measure"].apply(lambda x: IntegrandEnum[x].value)
    data["weight_id"] = data["measure"].apply(lambda x: INTEGRAND_TO_WEIGHT[x].value)
    data["subgroup_id"] = 0

    columns = data.columns
    data = data[[
        'data_name', 'integrand_id', 'density_id', 'node_id', 'weight_id', 'subgroup_id',
        'hold_out', 'meas_value', 'meas_std', 'eta', 'nu',
        'age_lower', 'age_upper', 'time_lower', 'time_upper'
    ] + [x for x in columns if x.startswith('x_')]]

    data = data.loc[(data.time_lower >= times.min()) & (data.time_upper <= times.max())].copy()
    data = data.loc[(data.age_lower >= ages.min()) & (data.age_upper <= ages.max())].copy()
    return data


[docs]def construct_gbd_avgint_table(df: pd.DataFrame,
                               node_df: pd.DataFrame,
                               covariate_df: pd.DataFrame,
                               integrand_df: pd.DataFrame,
                               ages: np.ndarray,
                               times: np.ndarray) -> pd.DataFrame:
    """
    Constructs the avgint table using the output df
    from the inputs.to_avgint() method.

    Parameters
    ----------
    df
        The data frame to construct the avgint table from, that has things like
        ages, times, nodes (locations), sexes, etc.
    node_df
        dismod node data frame
    covariate_df
        dismod covariate data frame
    integrand_df
        dismod integrand data frame
    ages
        array of ages for the model
    times
        array of times for the model
    """
    LOG.info("Constructing the avgint table.")
    avgint = df.copy()
    avgint = prep_data_avgint(
        df=avgint,
        node_df=node_df,
        covariate_df=covariate_df
    )
    avgint_df = pd.DataFrame()
    for i in integrand_df.integrand_name.unique():
        if i == 'mtstandard' or i == 'relrisk':
            continue
        df = avgint.copy()
        df['measure'] = i
        avgint_df = avgint_df.append(df)

    avgint_df = avgint_df.reset_index(drop=True)

    avgint_df["integrand_id"] = avgint_df["measure"].apply(lambda x: IntegrandEnum[x].value)
    avgint_df["weight_id"] = avgint_df["measure"].apply(lambda x: INTEGRAND_TO_WEIGHT[x].value)
    avgint_df["subgroup_id"] = 0

    avgint_df = avgint_df[[
        'integrand_id', 'node_id', 'weight_id', 'subgroup_id', 'c_location_id',
        'age_group_id', 'year_id', 'sex_id',
        'age_lower', 'age_upper', 'time_lower', 'time_upper'
    ] + [x for x in avgint_df.columns if x.startswith('x_')]]
    
    gbd_id_cols = ['sex_id', 'age_group_id', 'year_id']
    avgint_df.rename(columns={x: 'c_' + x for x in gbd_id_cols}, inplace=True)

    avgint_df = avgint_df.loc[(avgint_df.time_lower >= times.min()) & (avgint_df.time_upper <= times.max())].copy()
    avgint_df = avgint_df.loc[(avgint_df.age_lower >= ages.min()) & (avgint_df.age_upper <= ages.max())].copy()

    return avgint_df