Source code for cascade_at.executor.dismod_db

#!/usr/bin/env python
import logging
import sys
from pathlib import Path
from typing import Union, List, Dict, Any, Optional, Tuple
import os
import numpy as np
import pandas as pd

from cascade_at.core import CascadeATError
from cascade_at.context.model_context import Context
from cascade_at.core.log import get_loggers, LEVELS
from cascade_at.dismod.api.dismod_extractor import DismodExtractor
from cascade_at.dismod.api.dismod_filler import DismodFiller
from cascade_at.dismod.api.run_dismod import run_dismod_commands

from cascade_at.executor.args.arg_utils import ArgumentList
from cascade_at.executor.args.args import DmCommands, DmOptions, ParentLocationID, SexID
from cascade_at.executor.args.args import ModelVersionID, BoolArg, LogLevel, StrArg, IntArg
from cascade_at.inputs.measurement_inputs import MeasurementInputs
from cascade_at.model.grid_alchemy import Alchemy
from cascade_at.saver.results_handler import ResultsHandler
from cascade_at.settings.settings_config import SettingsConfig
from cascade_at.model.priors import Gaussian, _Prior
from cascade_at.dismod.api.fill_extract_helpers.posterior_to_prior import format_rate_grid_for_ihme


LOG = get_loggers(__name__)


ARG_LIST = ArgumentList([
    ModelVersionID(),
    ParentLocationID(),
    SexID(),
    DmCommands(),
    DmOptions(),
    BoolArg('--fill', help='whether or not to fill the dismod database with data'),
    BoolArg('--prior-samples', help='whether or not the prior came from samples or just a mean fit'),
    IntArg('--prior-parent', help='the location ID of the parent database to grab the prior for'),
    IntArg('--prior-sex', help='the sex ID of the parent database to grab prior for'),
    IntArg('--prior-mulcov', help='the model version id where mulcov stats is passed in', required=False),
    BoolArg('--save-fit', help='whether or not to save the fit'),
    BoolArg('--save-prior', help='whether or not to save the prior'),
    LogLevel(),
    StrArg('--test-dir', help='if set, will save files to the directory specified')
])


class DismodDBError(CascadeATError):
    """Raised when there is an error with running the dismod_db script."""
    pass


[docs]def get_prior(path: Union[str, Path], location_id: int, sex_id: int, rates: List[str], samples: bool = True) -> Dict[str, Dict[str, np.ndarray]]: """ Gets priors from a path to a database for a given location ID and sex ID. """ child_prior = DismodExtractor(path=path).gather_draws_for_prior_grid( location_id=location_id, sex_id=sex_id, rates=rates, samples=samples ) return child_prior
[docs]def get_mulcov_priors(model_version_id: int) -> Dict[Tuple[str, str, str], _Prior]: """ Read in covariate multiplier statistics from a specific model version ID and returns a dictionary with a prior object for that covariate multiplier type, covariate name, and rate or integrand. Parameters ---------- model_version_id The model version ID to pull covariate multiplier statistics from """ convert_type = {'rate_value': 'alpha', 'meas_value': 'beta', 'meas_noise': 'gamma'} mulcov_prior = {} ctx = Context(model_version_id=model_version_id) path = os.path.join(ctx.outputs_dir, 'mulcov_stats.csv') if not os.path.exists(path): return {} mulcov_stats_df = pd.read_csv(path) if mulcov_stats_df.empty: return {} for _, row in mulcov_stats_df.iterrows(): if row['rate_name'] != 'none': mulcov_prior[ (convert_type[row['mulcov_type']], row['c_covariate_name'], row['rate_name']) ] = Gaussian(mean=row['mean'], standard_deviation=row['std']) if row['integrand_name'] != 'none': mulcov_prior[ (convert_type[row['mulcov_type']], row['c_covariate_name'], row['integrand_name']) ] = Gaussian(mean=row['mean'], standard_deviation=row['std']) return mulcov_prior
[docs]def fill_database(path: Union[str, Path], settings: SettingsConfig, inputs: MeasurementInputs, alchemy: Alchemy, parent_location_id: int, sex_id: int, child_prior: Dict[str, Dict[str, np.ndarray]], mulcov_prior: Dict[Tuple[str, str, str], _Prior], options: Dict[str, Any]) -> DismodFiller: """ Fill a DisMod database at the specified path with the inputs, model, and settings specified, for a specific parent and sex ID, with options to override the priors. """ df = DismodFiller( path=path, settings_configuration=settings, measurement_inputs=inputs, grid_alchemy=alchemy, parent_location_id=parent_location_id, sex_id=sex_id, child_prior=child_prior, mulcov_prior=mulcov_prior, ) df.fill_for_parent_child(**options) return df
[docs]def save_predictions(db_file: Union[str, Path], model_version_id: int, gbd_round_id: int, out_dir: Path, locations: Optional[List[int]] = None, sexes: Optional[List[int]] = None, sample: bool = False, predictions: Optional[pd.DataFrame] = None) -> None: """ Save the fit from this dismod database for a specific location and sex to be uploaded later on. """ LOG.info("Extracting results from DisMod SQLite Database.") da = DismodExtractor(path=db_file) predictions = da.format_predictions_for_ihme( locations=locations, sexes=sexes, gbd_round_id=gbd_round_id, samples=sample, predictions=predictions ) LOG.info(f"Saving the results to {out_dir}.") rh = ResultsHandler() rh.save_draw_files(df=predictions, directory=out_dir, add_summaries=True, model_version_id=model_version_id)
[docs]def dismod_db(model_version_id: int, parent_location_id: int, sex_id: int = None, dm_commands: List[str] = [], dm_options: Dict[str, Union[int, str, float]] = {}, prior_samples: bool = False, prior_parent: Optional[int] = None, prior_sex: Optional[int] = None, prior_mulcov_model_version_id: Optional[int] = None, test_dir: Optional[str] = None, fill: bool = False, save_fit: bool = True, save_prior: bool = True) -> None: """ Creates a dismod database using the saved inputs and the file structure specified in the context. Alternatively it will skip the filling stage and move straight to the command stage if you don't pass --fill. Then runs an optional set of commands on the database passed in the --commands argument. Also passes an optional argument --options as a dictionary to the dismod database to fill/modify the options table. Parameters ---------- model_version_id The model version ID parent_location_id The parent location for the database sex_id The parent sex for the database dm_commands A list of commands to pass to the run_dismod_commands function, executed directly on the dismod database dm_options A dictionary of options to pass to the the dismod option table prior_samples Whether the prior was derived from samples or not prior_mulcov_model_version_id The model version ID to use for pulling covariate multiplier statistics as priors for this fit prior_parent An optional parent location ID that specifies where to pull the prior information from. prior_sex An optional parent sex ID that specifies where to pull the prior information from. test_dir A test directory to create the database in rather than the database specified by the IHME file system context. fill Whether or not to fill the database with new inputs based on the model_version_id, parent_location_id, and sex_id. If not filling, this script can be used to just execute commands on the database instead. save_fit Whether or not to save the fit from this database as the parent fit. save_prior Whether or not to save the prior for the parent as the parent's prior. """ if test_dir is not None: context = Context(model_version_id=model_version_id, configure_application=False, root_directory=test_dir) else: context = Context(model_version_id=model_version_id) inputs, alchemy, settings = context.read_inputs() if sex_id is None: sex_id = settings.model.drill_sex db_path = context.db_file(location_id=parent_location_id, sex_id=sex_id) # If we want to override the rate priors with posteriors from a previous # database, pass them in here. if prior_parent or prior_sex: if not (prior_parent and prior_sex): raise DismodDBError("Need to pass both prior parent and sex or neither.") prior_db = context.db_file(location_id=prior_parent, sex_id=prior_sex) child_prior = get_prior( path=prior_db, location_id=parent_location_id, sex_id=sex_id, rates=[r.rate for r in settings.rate], samples=prior_samples ) else: child_prior = None if prior_mulcov_model_version_id is not None: LOG.info(f'Passing mulcov prior from model version id = {prior_mulcov_model_version_id}') mulcov_priors = get_mulcov_priors(prior_mulcov_model_version_id) else: mulcov_priors = None if fill: filler = fill_database( path=db_path, inputs=inputs, alchemy=alchemy, settings=settings, parent_location_id=parent_location_id, sex_id=sex_id, child_prior=child_prior, options=dm_options, mulcov_prior=mulcov_priors, ) if save_prior: priors_to_save = format_rate_grid_for_ihme( rates=filler.parent_child_model['rate'], gbd_round_id=settings.gbd_round_id, location_id=parent_location_id, sex_id=sex_id ) rh = ResultsHandler() rh.save_summary_files( df=priors_to_save, directory=context.prior_dir, model_version_id=model_version_id ) if dm_commands: run_dismod_commands(dm_file=str(db_path), commands=dm_commands) if save_fit: save_predictions( db_file=context.db_file(location_id=parent_location_id, sex_id=sex_id), model_version_id=model_version_id, gbd_round_id=settings.gbd_round_id, out_dir=context.fit_dir )
def main(): args = ARG_LIST.parse_args(sys.argv[1:]) logging.basicConfig(level=LEVELS[args.log_level]) dismod_db( model_version_id=args.model_version_id, parent_location_id=args.parent_location_id, sex_id=args.sex_id, dm_commands=args.dm_commands, dm_options=args.dm_options, fill=args.fill, prior_samples=args.prior_samples, prior_parent=args.prior_parent, prior_sex=args.prior_sex, prior_mulcov_model_version_id=args.prior_mulcov, test_dir=args.test_dir, save_fit=args.save_fit, save_prior=args.save_prior, ) if __name__ == '__main__': main()