diff --git a/avoidable_admissions/data/validate.py b/avoidable_admissions/data/validate.py index 8874224..84a6fa5 100644 --- a/avoidable_admissions/data/validate.py +++ b/avoidable_admissions/data/validate.py @@ -205,6 +205,10 @@ class Config: ], ), "opertn_count": pa.Column(int, nullable=False, checks=[pa.Check.ge(0)]), + "opertn_cat": pa.Column(str, nullable=False, checks=[pa.Check.isin(['Yes', 'No', 'Missing'])]), + + "comorb_count": pa.Column(int, nullable=False, checks=[pa.Check.ge(0)]), + "comorb_cat": pa.Column(str, nullable=False, checks=[pa.Check.isin(['Yes', 'No', 'Missing'])]), } ) diff --git a/avoidable_admissions/features/admitted_care_features.py b/avoidable_admissions/features/admitted_care_features.py index 8b04e41..911e481 100644 --- a/avoidable_admissions/features/admitted_care_features.py +++ b/avoidable_admissions/features/admitted_care_features.py @@ -140,9 +140,27 @@ def _procedures(df: pd.DataFrame) -> pd.DataFrame: .count(axis=1) ) + rules = { + 'Yes': df['opertn_count'] > 0, + 'No': df['opertn_count'] <= 0, + 'Missing': df['opertn_count'].isna() + } + + df['opertn_cat'] = np.select( + list(rules.values()), list(rules.keys()), default='Missing' + ) + + return df + +def _comorbidities(df: pd.DataFrame) -> pd.DataFrame: + diag_cols = [f'diag_{i:02d}' for i in range(2, 21)] + df['comorb_count'] = df[diag_cols].count(axis=1) + df['comorb_cat'] = df['comorb_count'].apply(lambda x: 'Yes' if x > 0 else 'No') + return df + def build_all(df: pd.DataFrame) -> pd.DataFrame: df = ( @@ -157,6 +175,7 @@ def build_all(df: pd.DataFrame) -> pd.DataFrame: .pipe(_dismeth) .pipe(_acsc_code) .pipe(_procedures) + .pipe(_comorbidities) ) return df