from typing import List, Optional
import pandas as pd
from cascade_at.core.db import elmo
from cascade_at.core.log import get_loggers
from cascade_at.dismod.integrand_mappings import make_integrand_map
from cascade_at.inputs.base_input import BaseInput
from cascade_at.inputs.demographics import Demographics
from cascade_at.inputs.uncertainty import stdev_from_crosswalk_version
from cascade_at.inputs.utilities import gbd_ids
from cascade_at.inputs.utilities.transformations import RELABEL_INCIDENCE_MAP
LOG = get_loggers(__name__)
[docs]class CrosswalkVersion(BaseInput):
def __init__(self, crosswalk_version_id: int, exclude_outliers: bool,
demographics: Demographics, conn_def: str, gbd_round_id: int):
"""
Pulls and formats all of the data from a crosswalk version in the epi database.
Parameters
----------
crosswalk_version_id
The crosswalk version to pull from
exclude_outliers
whether to exclude outliers
conn_def
database connection definition
gbd_round_id
The GBD round
demographics
The demographics object
"""
super().__init__(gbd_round_id=gbd_round_id)
self.crosswalk_version_id = crosswalk_version_id
self.exclude_outliers = exclude_outliers
self.demographics = demographics
self.conn_def = conn_def
self.raw = None
[docs] def get_raw(self):
"""
Pulls the raw crosswalk version from the database.
These are the observations that will be used in the bundle.
"""
LOG.info(f"Getting crosswalk version for {self.crosswalk_version_id}.")
import sys
if 'darwin' in sys.platform:
LOG.error(f"FIXME gma -- this call to elmo.get_crosswalk_version ought to contain an error_log_path argument.")
LOG.error(f"FIXME gma -- START -- This call somehow switches logging from stdout to a socket.")
self.raw = elmo.get_crosswalk_version(crosswalk_version_id=self.crosswalk_version_id)
if 'darwin' in sys.platform:
LOG.error(f"FIXME gma -- END -- Now logging to a socket. LOG.handlers: {LOG.handlers}")
return self
[docs] @staticmethod
def map_to_integrands(df: pd.DataFrame, relabel_incidence: int):
"""
Maps the data from the IHME databases to the integrands expected by DisMod AT.
Parameters
----------
df
A data frame to map to integrands
relabel_incidence
A relabel incidence code.
Can be found in :py:class:`~cascade_at.inputs.utilities.transformations.RELABEL_INCIDENCE_MAP`
"""
integrand_map = make_integrand_map()
if any(df.measure_id == 17):
LOG.info(
f"Found case fatality rate, measure_id=17, in data. Ignoring it because it does not "
f"map to a Dismod-AT integrand and cannot be used by the model."
)
df = df[df.measure_id != 17]
try:
df["measure"] = df.measure_id.apply(lambda k: integrand_map[k].name)
except KeyError as ke:
raise RuntimeError(
f"The bundle data uses measure {str(ke)} which does not map "
f"to an integrand. The map is {integrand_map}."
)
measure_dict = {measure: measure for measure in df.measure.unique().tolist()}
measure_dict.update(RELABEL_INCIDENCE_MAP[relabel_incidence])
df["measure"] = df["measure"].map(measure_dict)
if any(df.measure == 'incidence'):
LOG.error(f"Found incidence, measure_id=6, in data. Should be Tincidence or Sincidence.")
raise ValueError("Measure ID cannot be 6 for incidence. Must be S or Tincidence.")
return df