Source code for cascade_at.dismod.api.fill_extract_helpers.posterior_to_prior
import pandas as pd
from typing import Dict, List
import numpy as np
from scipy import stats
from cascade_at.dismod.api.fill_extract_helpers.utils import vec_to_midpoint
from cascade_at.model.utilities.grid_helpers import expand_grid
from cascade_at.dismod.constants import RateToIntegrand, IntegrandEnum, INTEGRAND_TO_WEIGHT
from cascade_at.inputs.utilities.gbd_ids import format_age_time
from cascade_at.dismod.integrand_mappings import RATE_TO_INTEGRAND, integrand_to_gbd_measures
from cascade_at.model.smooth_grid import SmoothGrid
[docs]def get_prior_avgint_grid(grids: Dict[str, Dict[str, np.ndarray]],
sexes: List[int],
locations: List[int],
midpoint: bool = False) -> pd.DataFrame:
"""
Get a data frame to use for setting up posterior predictions on a grid.
The grids are specified in the grids parameter.
Will still need to have covariates added to it, and prep data from
dismod.api.data_tables.prep_data_avgint to convert nodes and covariate names
before it can be input into the avgint table in a database.
Parameters
---------
grids
A dictionary of grids with keys for each integrand,
which are dictionaries for "age" and "time".
sexes
A list of sexes
locations
A list of locations
midpoint
Whether to midpoint the grid lower and upper values (recommended for rates).
Returns
-------
Dataframe with columns
"avgint_id", "integrand_id", "location_id", "weight_id", "subgroup_id",
"age_lower", "age_upper", "time_lower", "time_upper", "sex_id"
"""
posterior_dfs = pd.DataFrame()
for k, v in grids.items():
if midpoint:
time = vec_to_midpoint(v['time'])
age = vec_to_midpoint(v['age'])
else:
time = v['time']
age = v['age']
posterior_df = expand_grid({
'age_lower': age,
'time_lower': time,
'location_id': locations,
'sex_id': sexes
})
posterior_df['time_upper'] = posterior_df['time_lower']
posterior_df['age_upper'] = posterior_df['age_lower']
posterior_df['rate'] = k
posterior_df['integrand'] = posterior_df['rate'].map(RateToIntegrand)
posterior_df['integrand_id'] = posterior_df['integrand'].apply(
lambda x: IntegrandEnum[x].value
)
posterior_df['weight_id'] = posterior_df["integrand"].apply(
lambda x: INTEGRAND_TO_WEIGHT[x].value
)
posterior_df['subgroup_id'] = 0
posterior_dfs = posterior_dfs.append(posterior_df)
return posterior_dfs[[
"integrand_id", "location_id", "weight_id", "subgroup_id",
"age_lower", "age_upper", "time_lower", "time_upper", "sex_id"
]]
[docs]def format_rate_grid_for_ihme(rates: Dict[str, SmoothGrid], gbd_round_id: int,
location_id: int, sex_id: int) -> pd.DataFrame:
"""
Formats a grid of mean, upper, and lower for a prior rate
for the IHME database. **Only does this for Gaussian priors.**
Parameters
----------
rates
A dictionary of SmoothGrids, keyed by primary rates like "iota"
gbd_round_id
the GBD round
location_id
the location ID to append to this data frame
sex_id
the sex ID to append to this data frame
Returns
-------
A data frame formatted for the IHME databases
"""
dfs = []
for rate, smooth_grid in rates.items():
df = smooth_grid.value.grid.copy()
if df.empty:
continue
df['age_lower'] = df['age']
df['age_upper'] = df['age']
df['time_lower'] = df['time']
df['time_upper'] = df['time']
df = format_age_time(df=df, gbd_round_id=gbd_round_id)
group_cols = ['age', 'time']
# TODO: Once we can upgrade to pandas 1.1.0, then we can use the groupby(..., dropna=False)
# feature, which we need because eta and nu can be null and that's ok, but pandas drops them.
# In the meantime, we will group on age and time which means we're looping over each row,
# which in some cases will be x 30 more computation than necessary.
# Once we upgrade, use the group_cols below and it will skip duplicate computation.
# group_cols = ['mean', 'std', 'lower', 'upper', 'density', 'eta', 'nu']
for name, group in df.groupby(group_cols):
at_row = smooth_grid.value[group.iloc[0]['age'], group.iloc[0]['time']].quantiles([0.025, 0.975])
df.loc[group.index, 'lower'] = at_row[0]
df.loc[group.index, 'upper'] = at_row[1]
df['integrand'] = RATE_TO_INTEGRAND[rate].name
df = integrand_to_gbd_measures(df=df, integrand_col='integrand')
df['location_id'] = location_id
df['sex_id'] = sex_id
dfs.append(df)
return pd.concat(dfs, axis=0, sort=False).reset_index()[[
'location_id', 'year_id', 'age_group_id', 'sex_id', 'measure_id',
'mean', 'upper', 'lower'
]]