add functionality to handle differnet input types, testing, and more …

…simulation code for transform_percentage_change_counts
ihmeuw-msca · Jun 27, 2024 · 76371ce · 76371ce
1 parent c3d93ae
commit 76371ce
Show file tree

Hide file tree

Showing 3 changed files with 307 additions and 73 deletions.
diff --git a/simulations.ipynb b/simulations.ipynb
diff --git a/src/distrx/transforms.py b/src/distrx/transforms.py
@@ -15,6 +15,7 @@
 
 import numpy as np
 import numpy.typing as npt
+from mrtool import MRBRT, LinearCovModel, MRData
 
 
 class FirstOrder:
@@ -291,8 +292,15 @@ def transform_percentage_change(
     return p_hat, np.sqrt(sigma_trans)
 
 
+def handle_input_pct(c_x, n_x, c_y, n_y):
+    return np.array([c_x]), np.array([n_x]), np.array([c_y]), np.array([n_y])
+
+
 def transform_percentage_change_counts1(
-    c_x: int, n_x: int, c_y: int, n_y: int
+    c_x: np.ndarray,
+    n_x: np.ndarray,
+    c_y: np.ndarray,
+    n_y: np.ndarray,
 ) -> float:
     """alternative percentage change transformation with only counts provided
 
@@ -312,18 +320,31 @@ def transform_percentage_change_counts1(
     sigma_trans: array_like
         standard errors in the transform space
     """
+    c_x, n_x, c_y, n_y = handle_input_pct(c_x, n_x, c_y, n_y)
+
     mu_x = c_x / n_x
     mu_y = c_y / n_y
-    # sigma2_x = (c_x * (1 - mu_x) ** 2 + (n_x - c_x) * mu_x**2) / (n_x - 1)
-    # sigma2_y = (c_y * (1 - mu_y) ** 2 + (n_y - c_y) * mu_y**2) / (n_y - 1)
-    sigma2_x = n_x * mu_x * (1 - mu_x)
-    sigma2_y = n_y * mu_y * (1 - mu_y)
+    # typical sample var calc
+    sigma2_x = (c_x * (1 - mu_x) ** 2 + (n_x - c_x) * mu_x**2) / (n_x - 1)
+    sigma2_y = (c_y * (1 - mu_y) ** 2 + (n_y - c_y) * mu_y**2) / (n_y - 1)
 
-    # sigma_trans = (sigma2_y / mu_x**2) + (mu_y**2 * sigma2_x / (mu_x**4))
-    sigma_trans = (sigma2_y / c_x**2) + (c_y**2 * sigma2_x / (c_x**4))
-    print(sigma2_x, sigma2_y)
+    # var for p assuming binomial model
+    # sigma2_x = mu_x * (1 - mu_x) / n_x
+    # sigma2_y = mu_y * (1 - mu_y) / n_y
 
-    return ((c_y / c_x) - 1), np.sqrt(sigma_trans)
+    # var for p assuming beta prior
+    # sigma2_x = (
+    #     (mu_x * c_x + 1) * (c_x - mu_x * c_x + 1) / ((c_x + 2) ** 2 * (c_x + 3))
+    # )
+    # sigma2_y = (
+    #     (mu_y * c_y + 1) * (c_y - mu_y * c_y + 1) / ((c_y + 2) ** 2 * (c_y + 3))
+    # )
+
+    sigma_trans = (sigma2_y / mu_x**2) + (mu_y**2 * sigma2_x / (mu_x**4))
+    # sigma_trans = (sigma2_y / c_x**2) + (c_y**2 * sigma2_x / (c_x**4))
+    # print(sigma2_x, sigma2_y)
+
+    return ((mu_y / mu_x) - 1), np.sqrt(sigma_trans)
 
 
 def transform_percentage_change_counts2(
@@ -349,14 +370,30 @@ def transform_percentage_change_counts2(
     """
     mu_x = c_x / n_x
     mu_y = c_y / n_y
-    sigma2_x = (c_x * (1 - mu_x) ** 2 + (n_x - c_x) * mu_x**2) / (n_x - 1)
-    sigma2_y = (c_y * (1 - mu_y) ** 2 + (n_y - c_y) * mu_y**2) / (n_y - 1)
-    # print("look", sigma2_x, sigma2_y)
+    rat = mu_y / (mu_x + mu_y)
+    Rl = np.log(rat / (1 - rat))
+    # variance of p assuming binomial model, somewhat reasonable coverage w/o scaling CI
+    sigma2_x = mu_x * (1 - mu_x) / n_x
+    sigma2_y = mu_y * (1 - mu_y) / n_y
 
-    sigma_trans = (sigma2_y / mu_x**2) + (mu_y**2 * sigma2_x / (mu_x**4))
+    # standard sample var calculation, extreme overcoverage
+    # sigma2_x = (c_x * (1 - mu_x) ** 2 + (n_x - c_x) * mu_x**2) / (n_x - 1)
+    # sigma2_y = (c_y * (1 - mu_y) ** 2 + (n_y - c_y) * mu_y**2) / (n_y - 1)
+
+    # assumed beta prior sample var calculation, extreme overcoverage
+    # sigma2_x = (
+    #     (mu_x * c_x + 1) * (c_x - mu_x * c_x + 1) / ((c_x + 2) ** 2 * (c_x + 3))
+    # )
+    # sigma2_y = (
+    #     (mu_y * c_y + 1) * (c_y - mu_y * c_y + 1) / ((c_y + 2) ** 2 * (c_y + 3))
+    # )
+
+    # sigma_trans = (sigma2_y / mu_x**2) + (mu_y**2 * sigma2_x / (mu_x**4))
     # sigma_trans = (sigma2_y / c_x**2) + (c_y**2 * sigma2_x / (c_x**4))
+    sigma2_Rl = (sigma2_x / (mu_x**2)) + (sigma2_y / (mu_y**2))
+    sigma_tx = sigma2_Rl * np.exp(2 * Rl)
 
-    return ((mu_y / mu_x) - 1), np.sqrt(sigma_trans)
+    return ((mu_y / mu_x) - 1), np.sqrt(sigma_tx)
 
 
 def _check_input(

diff --git a/tests/test_transforms.py b/tests/test_transforms.py
@@ -1,13 +1,14 @@
 """Tests for transforms.py module."""
 
 import numpy as np
+import pandas as pd
 import pytest
 
 from distrx.transforms import (
     delta_method,
     transform_data,
     transform_percentage_change,
-    transform_percentage_change_counts2,
+    transform_percentage_change_counts1,
 )
 
 TRANSFORM_DICT = {
@@ -107,8 +108,31 @@ def test_percentage_change():
 def test_percentage_change_counts():
     x = np.random.choice([0, 1], size=1000, p=[0.1, 0.9])
     y = np.random.choice([0, 1], size=1100, p=[0.2, 0.8])
-    mu, sigma = transform_percentage_change_counts2(
+    mu, sigma = transform_percentage_change_counts1(
         (x == 1).sum(), len(x), (y == 1).sum(), len(y)
     )
-    assert -1 <= mu and mu < np.infty
+    assert -1 <= mu and mu < np.inf
     assert 0 < sigma and sigma < 1
+
+
+def test_percentage_change_input():
+    # scalar input
+    c_x, n_x = 100, 1000
+    c_y, n_y = 200, 1050
+    # with pytest.raises(TypeError):
+    transform_percentage_change_counts1(c_x, n_x, c_y, n_y)
+
+    # base list input
+    c_x = [100, 200]
+    n_x = [1000, 1000]
+    c_y = [300, 400]
+    n_y = [1050, 1050]
+    # with pytest.raises(TypeError):
+    transform_percentage_change_counts1(c_x, n_x, c_y, n_y)
+
+    # dataframe input
+    df = pd.DataFrame({"c_x": c_x, "n_x": n_x, "c_y": c_y, "n_y": n_y})
+    # with pytest.raises(TypeError):
+    transform_percentage_change_counts1(
+        df["c_x"], df["n_x"], df["c_y"], df["n_y"]
+    )