create 2 versions of transform percentage change function, add jupyte…

…r notebook with simulations
ihmeuw-msca · Jun 20, 2024 · fd2bbd4
1 parent e07bc06
commit fd2bbd4
Show file tree

Hide file tree

Showing 4 changed files with 326 additions and 5 deletions.
diff --git a/.vscode/launch.json b/.vscode/launch.json
@@ -0,0 +1,15 @@
+{
+  // Use IntelliSense to learn about possible attributes.
+  // Hover to view descriptions of existing attributes.
+  // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+  "version": "0.2.0",
+  "configurations": [
+    {
+      "name": "Python Debugger: Current File",
+      "type": "debugpy",
+      "request": "launch",
+      "program": "${file}",
+      "console": "integratedTerminal"
+    }
+  ]
+}
diff --git a/simulations.ipynb b/simulations.ipynb
diff --git a/src/distrx/transforms.py b/src/distrx/transforms.py
@@ -278,6 +278,7 @@ def transform_percentage_change(
     sigma_xy = cov[0, 1]
 
     delta_hat = (mu_y - mu_x) / mu_x
+    # TODO: add option instead of doing this by default
     bias_corr = (mu_y * sigma2_x) / ((n * mu_x) ** 2)
     p_hat = delta_hat + bias_corr
 
@@ -290,7 +291,42 @@ def transform_percentage_change(
     return p_hat, np.sqrt(sigma_trans)
 
 
-def transform_percentage_change_counts(
+def transform_percentage_change_counts1(
+    c_x: int, n_x: int, c_y: int, n_y: int
+) -> float:
+    """alternative percentage change transformation with only counts provided
+
+    Parameters
+    ----------
+    c_x : int
+        raw count in one sample (e.g. of incidence)
+    n_x : int
+        sample size
+    c_y : int
+        raw count in second sample (e.g. of incidence)
+    n_y : int
+        sample size
+
+    Returns
+    -------
+    sigma_trans: array_like
+        standard errors in the transform space
+    """
+    mu_x = c_x / n_x
+    mu_y = c_y / n_y
+    # sigma2_x = (c_x * (1 - mu_x) ** 2 + (n_x - c_x) * mu_x**2) / (n_x - 1)
+    # sigma2_y = (c_y * (1 - mu_y) ** 2 + (n_y - c_y) * mu_y**2) / (n_y - 1)
+    sigma2_x = n_x * mu_x * (1 - mu_x)
+    sigma2_y = n_y * mu_y * (1 - mu_y)
+
+    # sigma_trans = (sigma2_y / mu_x**2) + (mu_y**2 * sigma2_x / (mu_x**4))
+    sigma_trans = (sigma2_y / c_x**2) + (c_y**2 * sigma2_x / (c_x**4))
+    print(sigma2_x, sigma2_y)
+
+    return ((c_y / c_x) - 1), np.sqrt(sigma_trans)
+
+
+def transform_percentage_change_counts2(
     c_x: int, n_x: int, c_y: int, n_y: int
 ) -> float:
     """alternative percentage change transformation with only counts provided
@@ -314,11 +350,13 @@ def transform_percentage_change_counts(
     mu_x = c_x / n_x
     mu_y = c_y / n_y
     sigma2_x = (c_x * (1 - mu_x) ** 2 + (n_x - c_x) * mu_x**2) / (n_x - 1)
-    sigma2_y = (c_y * (1 - mu_y) ** 2 + (n_x - c_y) * mu_y**2) / (n_y - 1)
+    sigma2_y = (c_y * (1 - mu_y) ** 2 + (n_y - c_y) * mu_y**2) / (n_y - 1)
+    # print("look", sigma2_x, sigma2_y)
 
     sigma_trans = (sigma2_y / mu_x**2) + (mu_y**2 * sigma2_x / (mu_x**4))
+    # sigma_trans = (sigma2_y / c_x**2) + (c_y**2 * sigma2_x / (c_x**4))
 
-    return sigma_trans
+    return ((mu_y / mu_x) - 1), np.sqrt(sigma_trans)
 
 
 def _check_input(

diff --git a/tests/test_transforms.py b/tests/test_transforms.py
@@ -1,3 +1,4 @@
+# TODO: CHANGE TESTS TO INCORPORATE POINT ESTIMATE
 """Tests for transforms.py module."""
 
 import numpy as np
@@ -7,7 +8,7 @@
     delta_method,
     transform_data,
     transform_percentage_change,
-    transform_percentage_change_counts,
+    transform_percentage_change_counts2,
 )
 
 TRANSFORM_DICT = {
@@ -107,7 +108,8 @@ def test_percentage_change():
 def test_percentage_change_counts():
     x = np.random.choice([0, 1], size=1000, p=[0.1, 0.9])
     y = np.random.choice([0, 1], size=1100, p=[0.2, 0.8])
-    sigma = transform_percentage_change_counts(
+    mu, sigma = transform_percentage_change_counts2(
         (x == 1).sum(), len(x), (y == 1).sum(), len(y)
     )
+    assert -1 <= mu and mu < np.infty