From 5f082eef4db788ab4413a810c1d24579516ed239 Mon Sep 17 00:00:00 2001 From: mbi6245 Date: Tue, 16 Jul 2024 14:17:57 -0700 Subject: [PATCH] added latex equation to percentage_change_trans and updated bivariate transformations user guide in sphinx docs --- docs/user_guide/percentage_change.rst | 55 +++++++++++++++++++--- docs/user_guide/simple_transformations.rst | 2 +- src/distrx/transforms.py | 2 +- 3 files changed, 51 insertions(+), 8 deletions(-) diff --git a/docs/user_guide/percentage_change.rst b/docs/user_guide/percentage_change.rst index fce23ec..e6b0550 100644 --- a/docs/user_guide/percentage_change.rst +++ b/docs/user_guide/percentage_change.rst @@ -1,7 +1,50 @@ -Percentage Change -============= +========================= +Bivariate Transformations +========================= -Currently percentage change is implemented in two ways. You can either provide raw data in the form -of observations from 2 separate, equally sized samples (as you would have from an experiment) or -raw counts with the separate, not necessarily equal sample sizes (as you would have from incidence -counts at two separate times) \ No newline at end of file +There are currently 2 bivariate transformations implemented in distrx: + * percentage change + * ratio + +These transformations are implemented using the first order delta method. See INSERT CONCEPTS for +derivation if desired. Note that all functions are in terms of sample statistics (e.g. mean), not +raw counts, even though some functions do take counts as input. + +Example: Percentage Change +-------------------------- + +Suppose we have samples in 2 different years measuring the incidence of cancer cases in each year +in various state counties. The data may look something like the following, + +.. csv-table:: + :header: county, cases_1, sample_1, cases_2, sample_2 + :widths: 10, 10, 10, 10, 10 + :align: center + + "King", 252, 400, 258, 250 + "Snohomish", 12, 300, 90, 500 + "Pierce", 505, 1000, 219, 1000 + "Kitsap", 88, 124, 67, 204 + +and our goal is to find the percentage change in the prevalence of cancer with its appropriate SE. + +The first step is to import the required function from the distrx package. + +.. code-block:: python + + from distrx import transform_univariate + +Different transformation functions can be chosen through specifying a string parameter of which +transform you would like to apply to your data. In this case, it is the following. + +.. code-block:: python + + mu_tx, sigma_tx = transform_bivariate(c_x=df["cases_1"], + n_x=df["sample_1"], + c_y=df["cases_2"], + n_y=df["sample_2"], + transform="percentage_change") + +``mu_tx`` and ``sigma_tx`` are simply the percentage change for each county and their corresponding +standard errors, respectively. ``sigma_tx`` has already been scaled the appropriate sample size so +we **should not** scale it additionally to obtain a confidence interval. diff --git a/docs/user_guide/simple_transformations.rst b/docs/user_guide/simple_transformations.rst index 9d755ca..2430373 100644 --- a/docs/user_guide/simple_transformations.rst +++ b/docs/user_guide/simple_transformations.rst @@ -16,7 +16,7 @@ order Taylor expansion of the transformation function. Example: Log Transform ---------------------- -Assume that we have some means and standard errors (SEs) of systolic blood pressure (SBP) from +Suppose that we have some means and standard errors (SEs) of systolic blood pressure (SBP) from several different samples. The data may look something like the following, .. csv-table:: diff --git a/src/distrx/transforms.py b/src/distrx/transforms.py index ce3d9c7..ea7d6e0 100644 --- a/src/distrx/transforms.py +++ b/src/distrx/transforms.py @@ -192,7 +192,7 @@ def percentage_change_trans( .. math:: - \frac{p_y}{p_x} - 1, \sigma * \exp(\mu) + \frac{p_y}{p_x} - 1, \sqrt{\frac{\sigma_y^2}{n_y\mu_x^2} + \frac{\mu_y^2\sigma_x^2}{n_x\mu_x^4}} Parameters ----------