Source code for cascade_at.dismod.api.fill_extract_helpers.data_tables

import numpy as np
import pandas as pd

from cascade_at.core.log import get_loggers
from cascade_at.dismod.api.fill_extract_helpers import utils
from cascade_at.dismod.constants import DensityEnum, IntegrandEnum, \
    INTEGRAND_TO_WEIGHT

LOG = get_loggers(__name__)

DEFAULT_DENSITY = ["uniform", 0, -np.inf, np.inf]


[docs]def prep_data_avgint(df: pd.DataFrame, node_df: pd.DataFrame, covariate_df: pd.DataFrame): """ Preps both the data table and the avgint table by mapping locations to nodes and covariates to names. Putting it in the same function because it does the same stuff, but data and avgint need to be called separately because dismod requires different columns. Parameters ---------- df The data frame to map node_df The node table from dismod db covariate_df The covariate table from dismod db """ data = df.copy() data = utils.map_locations_to_nodes(df=data, node_df=node_df) data = utils.map_covariate_names(df=data, covariate_df=covariate_df) data.reset_index(inplace=True, drop=True) return data
[docs]def construct_data_table(df: pd.DataFrame, node_df: pd.DataFrame, covariate_df: pd.DataFrame, ages: np.ndarray, times: np.ndarray): """ Constructs the data table from input df. Parameters ---------- df data frame of inputs that have been prepped for dismod node_df the dismod node table covariate_df the dismod covariate table ages times """ LOG.info("Constructing data table.") data = df.copy() data = prep_data_avgint( df=data, node_df=node_df, covariate_df=covariate_df ) data["data_name"] = data.index.astype(str) data["density_id"] = data["density"].apply(lambda x: DensityEnum[x].value) data["integrand_id"] = data["measure"].apply(lambda x: IntegrandEnum[x].value) data["weight_id"] = data["measure"].apply(lambda x: INTEGRAND_TO_WEIGHT[x].value) data["subgroup_id"] = 0 columns = data.columns data = data[[ 'data_name', 'integrand_id', 'density_id', 'node_id', 'weight_id', 'subgroup_id', 'hold_out', 'meas_value', 'meas_std', 'eta', 'nu', 'age_lower', 'age_upper', 'time_lower', 'time_upper' ] + [x for x in columns if x.startswith('x_')]] data = data.loc[(data.time_lower >= times.min()) & (data.time_upper <= times.max())].copy() data = data.loc[(data.age_lower >= ages.min()) & (data.age_upper <= ages.max())].copy() return data
[docs]def construct_gbd_avgint_table(df: pd.DataFrame, node_df: pd.DataFrame, covariate_df: pd.DataFrame, integrand_df: pd.DataFrame, ages: np.ndarray, times: np.ndarray) -> pd.DataFrame: """ Constructs the avgint table using the output df from the inputs.to_avgint() method. Parameters ---------- df The data frame to construct the avgint table from, that has things like ages, times, nodes (locations), sexes, etc. node_df dismod node data frame covariate_df dismod covariate data frame integrand_df dismod integrand data frame ages array of ages for the model times array of times for the model """ LOG.info("Constructing the avgint table.") avgint = df.copy() avgint = prep_data_avgint( df=avgint, node_df=node_df, covariate_df=covariate_df ) avgint_df = pd.DataFrame() for i in integrand_df.integrand_name.unique(): if i == 'mtstandard' or i == 'relrisk': continue df = avgint.copy() df['measure'] = i avgint_df = avgint_df.append(df) avgint_df = avgint_df.reset_index(drop=True) avgint_df["integrand_id"] = avgint_df["measure"].apply(lambda x: IntegrandEnum[x].value) avgint_df["weight_id"] = avgint_df["measure"].apply(lambda x: INTEGRAND_TO_WEIGHT[x].value) avgint_df["subgroup_id"] = 0 avgint_df = avgint_df[[ 'integrand_id', 'node_id', 'weight_id', 'subgroup_id', 'c_location_id', 'age_group_id', 'year_id', 'sex_id', 'age_lower', 'age_upper', 'time_lower', 'time_upper' ] + [x for x in avgint_df.columns if x.startswith('x_')]] gbd_id_cols = ['sex_id', 'age_group_id', 'year_id'] avgint_df.rename(columns={x: 'c_' + x for x in gbd_id_cols}, inplace=True) avgint_df = avgint_df.loc[(avgint_df.time_lower >= times.min()) & (avgint_df.time_upper <= times.max())].copy() avgint_df = avgint_df.loc[(avgint_df.age_lower >= ages.min()) & (avgint_df.age_upper <= ages.max())].copy() return avgint_df