Source code for cascade_at.dismod.api.fill_extract_helpers.grid_tables

import numpy as np
import pandas as pd
from typing import Dict

from cascade_at.core.log import get_loggers
from cascade_at.dismod.api.fill_extract_helpers import utils, reference_tables
from cascade_at.dismod.constants import DensityEnum, IntegrandEnum, \
    WeightEnum, MulCovEnum, RateEnum
from cascade_at.model.var import Var
from cascade_at.model.model import Model

LOG = get_loggers(__name__)

DEFAULT_DENSITY = ["uniform", 0, -np.inf, np.inf]


[docs]def construct_weight_grid_tables(weights: Dict[str, Var], age_df, time_df) -> (pd.DataFrame, pd.DataFrame): """ Constructs the weight and weight_grid tables." Parameters ---------- weights There are four kinds of weights: "constant", "susceptible", "with_condition", and "total". No other weights are used. age_df Age data frame from dismod db time_df Time data frame from dismod db Returns ------- Tuple of the weight table and the weight grid table """ LOG.info("Constructing weight and weight grid tables.") names = [w.name for w in WeightEnum] weight = pd.DataFrame({ 'weight_id': [w.value for w in WeightEnum], 'weight_name': names, 'n_age': [len(weights[name].ages) for name in names], 'n_time': [len(weights[name].times) for name in names] }) weight_grid = [] for w in WeightEnum: LOG.info(f"Writing weight {w.name}.") one_grid = weights[w.name].grid[["age", "time", "mean"]].rename(columns={"mean": "weight"}) one_grid["weight_id"] = w.value weight_grid.append(one_grid) weight_grid = pd.concat(weight_grid).reset_index(drop=True) weight_grid = utils.convert_age_time_to_id( df=weight_grid, age_df=age_df, time_df=time_df ) weight_grid["weight_grid_id"] = weight_grid.index return weight, weight_grid
def _add_prior_smooth_entries(grid_name, grid, num_existing_priors, num_existing_grids, age_df, time_df): """ Adds prior smooth grid entries to the smooth grid table and any other tables it needs to be added to. Called from inside of ``construct_model_tables`` only. """ age_count, time_count = (len(grid.ages), len(grid.times)) prior_df = grid.priors assert len(prior_df) == (age_count * time_count + 1) * 3 # Get the densities for the priors prior_df.loc[prior_df.density.isnull(), ["density", "mean", "lower", "upper"]] = DEFAULT_DENSITY prior_df["density_id"] = prior_df["density"].apply(lambda x: DensityEnum[x].value) prior_df["prior_id"] = prior_df.index + num_existing_priors prior_df["assigned"] = prior_df.density.notna() prior_df.rename(columns={"name": "prior_name"}, inplace=True) # Assign names to each of the priors null_names = prior_df.prior_name.isnull() prior_df.loc[~null_names, "prior_name"] = ( prior_df.loc[~null_names, "prior_name"].astype(str) + " " + prior_df.loc[~null_names, "prior_id"].astype(str) ) prior_df.loc[null_names, "prior_name"] = prior_df.loc[null_names, "prior_id"].apply( lambda pid: f"{grid_name}_{pid}" ) # Convert to age and time ID for prior table prior_df = utils.convert_age_time_to_id( df=prior_df, age_df=age_df, time_df=time_df ) # Create the simple smooth data frame smooth_df = pd.DataFrame({ "smooth_name": [grid_name], "n_age": [age_count], "n_time": [time_count], "mulstd_value_prior_id": [np.nan], "mulstd_dage_prior_id": [np.nan], "mulstd_dtime_prior_id": [np.nan] }) # Create the grid entries # TODO: Pass in the value prior ID instead from posterior to prior long_table = prior_df.loc[prior_df.age_id.notna()][["age_id", "time_id", "prior_id", "kind"]] grid_df = long_table[["age_id", "time_id"]].sort_values(["age_id", "time_id"]).drop_duplicates() for kind in ["value", "dage", "dtime"]: grid_values = long_table.loc[long_table.kind == kind].drop("kind", axis="columns") grid_values.rename(columns={"prior_id": f"{kind}_prior_id"}, inplace=True) grid_df = grid_df.merge(grid_values, on=["age_id", "time_id"]) grid_df = grid_df.sort_values(["age_id", "time_id"], axis=0).reindex() grid_df["const_value"] = np.nan grid_df["smooth_grid_id"] = grid_df.index + num_existing_grids prior_df = prior_df[[ 'prior_id', 'prior_name', 'lower', 'upper', 'mean', 'std', 'eta', 'nu', 'density_id' ]].sort_values(by='prior_id').reset_index(drop=True) return prior_df, smooth_df, grid_df
[docs]def construct_subgroup_table() -> pd.DataFrame: """ Constructs the default subgroup table. If we want to actually use the subgroup table, need to build this in. """ return pd.DataFrame.from_dict({ 'subgroup_id': [0], 'subgroup_name': ['world'], 'group_id': [0], 'group_name': ['world'] })
[docs]def construct_model_tables(model: Model, location_df: pd.DataFrame, age_df: pd.DataFrame, time_df: pd.DataFrame, covariate_df: pd.DataFrame) -> Dict[str, pd.DataFrame]: """ Main function that loops through the items from a model object, which include rate, random_effect, alpha, beta, and gamma and constructs the modeling tables in dismod db. Each of these are "grid" vars, so they need entries in prior, smooth, and smooth_grid. This function returns those tables. It also constructs the rate, integrand, and mulcov tables (alpha, beta, gamma), plus nslist and nslist_pair tables. Parameters ---------- model A model object that has rate information location_df A location / node data frame age_df An age data frame for dismod time_df A time data frame for dismod covariate_df A covariate data frame for dismod Returns ------- A dictionary of data frames for each table name, includes: rate, prior, smooth, smooth_grid, mulcov, nslist, nslist_pair, and subgroup tables """ def compress_priors(rate_name, grid, prior, prior_id): # Remove identical priors from the prior table, and remap the prior ids prior_cols = ['value_prior_id', 'dage_prior_id', 'dtime_prior_id'] for col in prior_cols: pids = grid[col].unique() prior.loc[prior.prior_id.isin(pids), col] = True cols = list(set(prior.columns) - set(['prior_id', 'prior_name'])) + prior_cols grps = sorted(prior.fillna(-999).groupby(cols), key=lambda x: x[1].prior_id.min()) pid = [(prior_id + i, g.prior_id.min(), g.prior_id.unique()) for i,(k,g) in enumerate(grps)] pmap = {v:k for k,v,ids in pid} prior = prior.loc[prior.prior_id.isin(list(zip(*pid))[1])] prior['prior_id'] = prior['prior_id'].replace(pmap) prior['prior_name'] = [f'{rate_name}_{pid}' for pid in prior.prior_id] for k,v,ids in pid: for col in prior_cols: grid.loc[grid[col].isin(ids), col] = k prior.drop(columns = prior_cols, inplace=True) return grid, prior nslist = {} smooth_table = pd.DataFrame() prior_table = pd.DataFrame() grid_table = pd.DataFrame() mulcov_table = pd.DataFrame() nslist_pair_table = pd.DataFrame() rate_table = reference_tables.default_rate_table() subgroup_table = construct_subgroup_table() covariate_index = dict(covariate_df[["c_covariate_name", "covariate_id"]].to_records(index=False)) if "rate" in model: LOG.info("Adding rates...") for rate_name, grid in model["rate"].items(): """ Loop through each of the rates and add entries into the prior, and smooth tables. Also put an entry in the rate table so we know the parent smooth ID. """ LOG.info(f"Adding rate {rate_name}") prior, smooth, grid = _add_prior_smooth_entries( grid_name=rate_name, grid=grid, num_existing_priors=len(prior_table), num_existing_grids=len(grid_table), age_df=age_df, time_df=time_df ) if not 'omega' in rate_name: grid, prior = compress_priors(rate_name, grid, prior, len(prior_table)) smooth_id = len(smooth_table) smooth['smooth_id'] = smooth_id grid['smooth_id'] = smooth_id smooth_table = smooth_table.append(smooth) prior_table = prior_table.append(prior) grid_table = grid_table.append(grid) rate_table.loc[rate_table.rate_id == RateEnum[rate_name].value, "parent_smooth_id"] = smooth_id if "random_effect" in model: LOG.info("Adding random effects...") for (rate_name, child_location), grid in model["random_effect"].items(): """ Loop through each of the random effects and add entries into the prior and smooth tables. """ LOG.info(f"Adding random effect for rate {rate_name}") grid_name = f"{rate_name}_re" if child_location is not None: grid_name = grid_name + f"_{child_location}" prior, smooth, grid = _add_prior_smooth_entries( grid_name=grid_name, grid=grid, num_existing_priors=len(prior_table), num_existing_grids=len(grid_table), age_df=age_df, time_df=time_df ) smooth_id = len(smooth_table) smooth["smooth_id"] = smooth_id grid["smooth_id"] = smooth_id smooth_table = smooth_table.append(smooth) prior_table = prior_table.append(prior) grid_table = grid_table.append(grid) if child_location is None: rate_table.loc[rate_table.rate_id == RateEnum[rate_name].value, "child_smooth_id"] = smooth_id else: # If we are doing this for a child location, then we want to make entries in the # nslist and nslist_pair tables node_id = location_df[location_df.c_location_id == child_location].node_id.iloc[0] if rate_name not in nslist: ns_id = len(nslist) nslist[rate_name] = ns_id else: ns_id = nslist[rate_name] rate_table.loc[rate_table.rate_id == RateEnum[rate_name].value, "child_nslist_id"] = ns_id nslist_pair_table = nslist_pair_table.append(pd.DataFrame({ 'nslist_id': [ns_id], 'node_id': [node_id], 'smooth_id': [smooth_id] })) potential_mulcovs = ["alpha", "beta", "gamma"] mulcovs = [x for x in potential_mulcovs if x in model] for m in mulcovs: LOG.info(f"Looking for mulcovs {m}...") for (covariate, rate_or_integrand), grid in model[m].items(): LOG.info(f"Adding covariate {covariate} on {rate_or_integrand}.") grid_name = f"{m}_{rate_or_integrand}_{covariate}" prior, smooth, grid = _add_prior_smooth_entries( grid_name=grid_name, grid=grid, num_existing_priors=len(prior_table), num_existing_grids=len(grid_table), age_df=age_df, time_df=time_df ) smooth_id = len(smooth_table) smooth["smooth_id"] = smooth_id grid["smooth_id"] = smooth_id prior_table = prior_table.append(prior) smooth_table = smooth_table.append(smooth) grid_table = grid_table.append(grid) mulcov = pd.DataFrame({ "mulcov_type": [MulCovEnum[m].value], "rate_id": [np.nan], "integrand_id": [np.nan], "covariate_id": [covariate_index[covariate]], "group_smooth_id": [smooth_id] }) if m == "alpha": mulcov["rate_id"] = RateEnum[rate_or_integrand].value elif m in ["beta", "gamma"]: mulcov["integrand_id"] = IntegrandEnum[rate_or_integrand].value else: raise RuntimeError(f"Unknown mulcov type {m}.") mulcov_table = mulcov_table.append(mulcov) mulcov_table.reset_index(inplace=True, drop=True) mulcov_table["mulcov_id"] = mulcov_table.index mulcov_table["group_id"] = 0 mulcov_table["subgroup_smooth_id"] = np.nan nslist_table = pd.DataFrame.from_records( data=list(nslist.items()), columns=["nslist_name", "nslist_id"] ) nslist_pair_table.reset_index(inplace=True, drop=True) nslist_pair_table["nslist_pair_id"] = nslist_pair_table.index return { 'rate': rate_table, 'prior': prior_table, 'smooth': smooth_table, 'smooth_grid': grid_table, 'mulcov': mulcov_table, 'nslist': nslist_table, 'nslist_pair': nslist_pair_table, 'subgroup': subgroup_table }