#!/usr/bin/env python
import logging
import sys
from pathlib import Path
from typing import Union, List, Dict, Any, Optional, Tuple
import os
import numpy as np
import pandas as pd
from cascade_at.core import CascadeATError
from cascade_at.context.model_context import Context
from cascade_at.core.log import get_loggers, LEVELS
from cascade_at.dismod.api.dismod_extractor import DismodExtractor
from cascade_at.dismod.api.dismod_filler import DismodFiller
from cascade_at.dismod.api.run_dismod import run_dismod_commands
from cascade_at.executor.args.arg_utils import ArgumentList
from cascade_at.executor.args.args import DmCommands, DmOptions, ParentLocationID, SexID
from cascade_at.executor.args.args import ModelVersionID, BoolArg, LogLevel, StrArg, IntArg
from cascade_at.inputs.measurement_inputs import MeasurementInputs
from cascade_at.model.grid_alchemy import Alchemy
from cascade_at.saver.results_handler import ResultsHandler
from cascade_at.settings.settings_config import SettingsConfig
from cascade_at.model.priors import Gaussian, _Prior
from cascade_at.dismod.api.fill_extract_helpers.posterior_to_prior import format_rate_grid_for_ihme
LOG = get_loggers(__name__)
ARG_LIST = ArgumentList([
ModelVersionID(),
ParentLocationID(),
SexID(),
DmCommands(),
DmOptions(),
BoolArg('--fill', help='whether or not to fill the dismod database with data'),
BoolArg('--prior-samples', help='whether or not the prior came from samples or just a mean fit'),
IntArg('--prior-parent', help='the location ID of the parent database to grab the prior for'),
IntArg('--prior-sex', help='the sex ID of the parent database to grab prior for'),
IntArg('--prior-mulcov', help='the model version id where mulcov stats is passed in', required=False),
BoolArg('--save-fit', help='whether or not to save the fit'),
BoolArg('--save-prior', help='whether or not to save the prior'),
LogLevel(),
StrArg('--test-dir', help='if set, will save files to the directory specified')
])
class DismodDBError(CascadeATError):
"""Raised when there is an error with running the dismod_db script."""
pass
[docs]def get_prior(path: Union[str, Path], location_id: int, sex_id: int,
rates: List[str], samples: bool = True) -> Dict[str, Dict[str, np.ndarray]]:
"""
Gets priors from a path to a database for a given location ID and sex ID.
"""
child_prior = DismodExtractor(path=path).gather_draws_for_prior_grid(
location_id=location_id,
sex_id=sex_id,
rates=rates,
samples=samples
)
return child_prior
[docs]def get_mulcov_priors(model_version_id: int) -> Dict[Tuple[str, str, str], _Prior]:
"""
Read in covariate multiplier statistics from a specific model version ID
and returns a dictionary with a prior object for that covariate
multiplier type, covariate name, and rate or integrand.
Parameters
----------
model_version_id
The model version ID to pull covariate multiplier statistics from
"""
convert_type = {'rate_value': 'alpha', 'meas_value': 'beta', 'meas_noise': 'gamma'}
mulcov_prior = {}
ctx = Context(model_version_id=model_version_id)
path = os.path.join(ctx.outputs_dir, 'mulcov_stats.csv')
if not os.path.exists(path):
return {}
mulcov_stats_df = pd.read_csv(path)
if mulcov_stats_df.empty:
return {}
for _, row in mulcov_stats_df.iterrows():
if row['rate_name'] != 'none':
mulcov_prior[
(convert_type[row['mulcov_type']], row['c_covariate_name'], row['rate_name'])
] = Gaussian(mean=row['mean'], standard_deviation=row['std'])
if row['integrand_name'] != 'none':
mulcov_prior[
(convert_type[row['mulcov_type']], row['c_covariate_name'], row['integrand_name'])
] = Gaussian(mean=row['mean'], standard_deviation=row['std'])
return mulcov_prior
[docs]def fill_database(path: Union[str, Path], settings: SettingsConfig,
inputs: MeasurementInputs, alchemy: Alchemy,
parent_location_id: int, sex_id: int, child_prior: Dict[str, Dict[str, np.ndarray]],
mulcov_prior: Dict[Tuple[str, str, str], _Prior],
options: Dict[str, Any]) -> DismodFiller:
"""
Fill a DisMod database at the specified path with the inputs, model, and settings
specified, for a specific parent and sex ID, with options to override the priors.
"""
df = DismodFiller(
path=path, settings_configuration=settings, measurement_inputs=inputs,
grid_alchemy=alchemy, parent_location_id=parent_location_id, sex_id=sex_id,
child_prior=child_prior, mulcov_prior=mulcov_prior,
)
df.fill_for_parent_child(**options)
return df
[docs]def save_predictions(db_file: Union[str, Path],
model_version_id: int, gbd_round_id: int,
out_dir: Path,
locations: Optional[List[int]] = None,
sexes: Optional[List[int]] = None,
sample: bool = False,
predictions: Optional[pd.DataFrame] = None) -> None:
"""
Save the fit from this dismod database for a specific location and sex to be
uploaded later on.
"""
LOG.info("Extracting results from DisMod SQLite Database.")
da = DismodExtractor(path=db_file)
predictions = da.format_predictions_for_ihme(
locations=locations, sexes=sexes, gbd_round_id=gbd_round_id,
samples=sample, predictions=predictions
)
LOG.info(f"Saving the results to {out_dir}.")
rh = ResultsHandler()
rh.save_draw_files(df=predictions, directory=out_dir,
add_summaries=True, model_version_id=model_version_id)
[docs]def dismod_db(model_version_id: int, parent_location_id: int, sex_id: int = None,
dm_commands: List[str] = [], dm_options: Dict[str, Union[int, str, float]] = {},
prior_samples: bool = False,
prior_parent: Optional[int] = None, prior_sex: Optional[int] = None,
prior_mulcov_model_version_id: Optional[int] = None,
test_dir: Optional[str] = None, fill: bool = False,
save_fit: bool = True, save_prior: bool = True) -> None:
"""
Creates a dismod database using the saved inputs and the file
structure specified in the context. Alternatively it will
skip the filling stage and move straight to the command
stage if you don't pass --fill.
Then runs an optional set of commands on the database passed
in the --commands argument.
Also passes an optional argument --options as a dictionary to
the dismod database to fill/modify the options table.
Parameters
----------
model_version_id
The model version ID
parent_location_id
The parent location for the database
sex_id
The parent sex for the database
dm_commands
A list of commands to pass to the run_dismod_commands function, executed
directly on the dismod database
dm_options
A dictionary of options to pass to the the dismod option table
prior_samples
Whether the prior was derived from samples or not
prior_mulcov_model_version_id
The model version ID to use for pulling covariate multiplier
statistics as priors for this fit
prior_parent
An optional parent location ID that specifies where to pull the prior
information from.
prior_sex
An optional parent sex ID that specifies where to pull the prior information from.
test_dir
A test directory to create the database in rather than the database
specified by the IHME file system context.
fill
Whether or not to fill the database with new inputs based on the model_version_id,
parent_location_id, and sex_id. If not filling, this script can be used
to just execute commands on the database instead.
save_fit
Whether or not to save the fit from this database as the parent fit.
save_prior
Whether or not to save the prior for the parent as the parent's prior.
"""
if test_dir is not None:
context = Context(model_version_id=model_version_id,
configure_application=False,
root_directory=test_dir)
else:
context = Context(model_version_id=model_version_id)
inputs, alchemy, settings = context.read_inputs()
if sex_id is None:
sex_id = settings.model.drill_sex
db_path = context.db_file(location_id=parent_location_id, sex_id=sex_id)
# If we want to override the rate priors with posteriors from a previous
# database, pass them in here.
if prior_parent or prior_sex:
if not (prior_parent and prior_sex):
raise DismodDBError("Need to pass both prior parent and sex or neither.")
prior_db = context.db_file(location_id=prior_parent, sex_id=prior_sex)
child_prior = get_prior(
path=prior_db,
location_id=parent_location_id, sex_id=sex_id,
rates=[r.rate for r in settings.rate],
samples=prior_samples
)
else:
child_prior = None
if prior_mulcov_model_version_id is not None:
LOG.info(f'Passing mulcov prior from model version id = {prior_mulcov_model_version_id}')
mulcov_priors = get_mulcov_priors(prior_mulcov_model_version_id)
else:
mulcov_priors = None
if fill:
filler = fill_database(
path=db_path, inputs=inputs, alchemy=alchemy, settings=settings,
parent_location_id=parent_location_id, sex_id=sex_id,
child_prior=child_prior, options=dm_options,
mulcov_prior=mulcov_priors,
)
if save_prior:
priors_to_save = format_rate_grid_for_ihme(
rates=filler.parent_child_model['rate'],
gbd_round_id=settings.gbd_round_id,
location_id=parent_location_id,
sex_id=sex_id
)
rh = ResultsHandler()
rh.save_summary_files(
df=priors_to_save, directory=context.prior_dir,
model_version_id=model_version_id
)
if dm_commands:
run_dismod_commands(dm_file=str(db_path), commands=dm_commands)
if save_fit:
save_predictions(
db_file=context.db_file(location_id=parent_location_id, sex_id=sex_id),
model_version_id=model_version_id,
gbd_round_id=settings.gbd_round_id,
out_dir=context.fit_dir
)
def main():
args = ARG_LIST.parse_args(sys.argv[1:])
logging.basicConfig(level=LEVELS[args.log_level])
dismod_db(
model_version_id=args.model_version_id,
parent_location_id=args.parent_location_id,
sex_id=args.sex_id,
dm_commands=args.dm_commands,
dm_options=args.dm_options,
fill=args.fill,
prior_samples=args.prior_samples,
prior_parent=args.prior_parent,
prior_sex=args.prior_sex,
prior_mulcov_model_version_id=args.prior_mulcov,
test_dir=args.test_dir,
save_fit=args.save_fit,
save_prior=args.save_prior,
)
if __name__ == '__main__':
main()