Skip to content

Commit

Permalink
add functionality to handle differnet input types, testing, and more …
Browse files Browse the repository at this point in the history
…simulation code for transform_percentage_change_counts
  • Loading branch information
mbi6245 committed Jun 27, 2024
1 parent c3d93ae commit 76371ce
Show file tree
Hide file tree
Showing 3 changed files with 307 additions and 73 deletions.
285 changes: 229 additions & 56 deletions simulations.ipynb

Large diffs are not rendered by default.

65 changes: 51 additions & 14 deletions src/distrx/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import numpy as np
import numpy.typing as npt
from mrtool import MRBRT, LinearCovModel, MRData


class FirstOrder:
Expand Down Expand Up @@ -291,8 +292,15 @@ def transform_percentage_change(
return p_hat, np.sqrt(sigma_trans)


def handle_input_pct(c_x, n_x, c_y, n_y):
return np.array([c_x]), np.array([n_x]), np.array([c_y]), np.array([n_y])


def transform_percentage_change_counts1(
c_x: int, n_x: int, c_y: int, n_y: int
c_x: np.ndarray,
n_x: np.ndarray,
c_y: np.ndarray,
n_y: np.ndarray,
) -> float:
"""alternative percentage change transformation with only counts provided
Expand All @@ -312,18 +320,31 @@ def transform_percentage_change_counts1(
sigma_trans: array_like
standard errors in the transform space
"""
c_x, n_x, c_y, n_y = handle_input_pct(c_x, n_x, c_y, n_y)

mu_x = c_x / n_x
mu_y = c_y / n_y
# sigma2_x = (c_x * (1 - mu_x) ** 2 + (n_x - c_x) * mu_x**2) / (n_x - 1)
# sigma2_y = (c_y * (1 - mu_y) ** 2 + (n_y - c_y) * mu_y**2) / (n_y - 1)
sigma2_x = n_x * mu_x * (1 - mu_x)
sigma2_y = n_y * mu_y * (1 - mu_y)
# typical sample var calc
sigma2_x = (c_x * (1 - mu_x) ** 2 + (n_x - c_x) * mu_x**2) / (n_x - 1)
sigma2_y = (c_y * (1 - mu_y) ** 2 + (n_y - c_y) * mu_y**2) / (n_y - 1)

# sigma_trans = (sigma2_y / mu_x**2) + (mu_y**2 * sigma2_x / (mu_x**4))
sigma_trans = (sigma2_y / c_x**2) + (c_y**2 * sigma2_x / (c_x**4))
print(sigma2_x, sigma2_y)
# var for p assuming binomial model
# sigma2_x = mu_x * (1 - mu_x) / n_x
# sigma2_y = mu_y * (1 - mu_y) / n_y

return ((c_y / c_x) - 1), np.sqrt(sigma_trans)
# var for p assuming beta prior
# sigma2_x = (
# (mu_x * c_x + 1) * (c_x - mu_x * c_x + 1) / ((c_x + 2) ** 2 * (c_x + 3))
# )
# sigma2_y = (
# (mu_y * c_y + 1) * (c_y - mu_y * c_y + 1) / ((c_y + 2) ** 2 * (c_y + 3))
# )

sigma_trans = (sigma2_y / mu_x**2) + (mu_y**2 * sigma2_x / (mu_x**4))
# sigma_trans = (sigma2_y / c_x**2) + (c_y**2 * sigma2_x / (c_x**4))
# print(sigma2_x, sigma2_y)

return ((mu_y / mu_x) - 1), np.sqrt(sigma_trans)


def transform_percentage_change_counts2(
Expand All @@ -349,14 +370,30 @@ def transform_percentage_change_counts2(
"""
mu_x = c_x / n_x
mu_y = c_y / n_y
sigma2_x = (c_x * (1 - mu_x) ** 2 + (n_x - c_x) * mu_x**2) / (n_x - 1)
sigma2_y = (c_y * (1 - mu_y) ** 2 + (n_y - c_y) * mu_y**2) / (n_y - 1)
# print("look", sigma2_x, sigma2_y)
rat = mu_y / (mu_x + mu_y)
Rl = np.log(rat / (1 - rat))
# variance of p assuming binomial model, somewhat reasonable coverage w/o scaling CI
sigma2_x = mu_x * (1 - mu_x) / n_x
sigma2_y = mu_y * (1 - mu_y) / n_y

sigma_trans = (sigma2_y / mu_x**2) + (mu_y**2 * sigma2_x / (mu_x**4))
# standard sample var calculation, extreme overcoverage
# sigma2_x = (c_x * (1 - mu_x) ** 2 + (n_x - c_x) * mu_x**2) / (n_x - 1)
# sigma2_y = (c_y * (1 - mu_y) ** 2 + (n_y - c_y) * mu_y**2) / (n_y - 1)

# assumed beta prior sample var calculation, extreme overcoverage
# sigma2_x = (
# (mu_x * c_x + 1) * (c_x - mu_x * c_x + 1) / ((c_x + 2) ** 2 * (c_x + 3))
# )
# sigma2_y = (
# (mu_y * c_y + 1) * (c_y - mu_y * c_y + 1) / ((c_y + 2) ** 2 * (c_y + 3))
# )

# sigma_trans = (sigma2_y / mu_x**2) + (mu_y**2 * sigma2_x / (mu_x**4))
# sigma_trans = (sigma2_y / c_x**2) + (c_y**2 * sigma2_x / (c_x**4))
sigma2_Rl = (sigma2_x / (mu_x**2)) + (sigma2_y / (mu_y**2))
sigma_tx = sigma2_Rl * np.exp(2 * Rl)

return ((mu_y / mu_x) - 1), np.sqrt(sigma_trans)
return ((mu_y / mu_x) - 1), np.sqrt(sigma_tx)


def _check_input(
Expand Down
30 changes: 27 additions & 3 deletions tests/test_transforms.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
"""Tests for transforms.py module."""

import numpy as np
import pandas as pd
import pytest

from distrx.transforms import (
delta_method,
transform_data,
transform_percentage_change,
transform_percentage_change_counts2,
transform_percentage_change_counts1,
)

TRANSFORM_DICT = {
Expand Down Expand Up @@ -107,8 +108,31 @@ def test_percentage_change():
def test_percentage_change_counts():
x = np.random.choice([0, 1], size=1000, p=[0.1, 0.9])
y = np.random.choice([0, 1], size=1100, p=[0.2, 0.8])
mu, sigma = transform_percentage_change_counts2(
mu, sigma = transform_percentage_change_counts1(
(x == 1).sum(), len(x), (y == 1).sum(), len(y)
)
assert -1 <= mu and mu < np.infty
assert -1 <= mu and mu < np.inf
assert 0 < sigma and sigma < 1


def test_percentage_change_input():
# scalar input
c_x, n_x = 100, 1000
c_y, n_y = 200, 1050
# with pytest.raises(TypeError):
transform_percentage_change_counts1(c_x, n_x, c_y, n_y)

# base list input
c_x = [100, 200]
n_x = [1000, 1000]
c_y = [300, 400]
n_y = [1050, 1050]
# with pytest.raises(TypeError):
transform_percentage_change_counts1(c_x, n_x, c_y, n_y)

# dataframe input
df = pd.DataFrame({"c_x": c_x, "n_x": n_x, "c_y": c_y, "n_y": n_y})
# with pytest.raises(TypeError):
transform_percentage_change_counts1(
df["c_x"], df["n_x"], df["c_y"], df["n_y"]
)

0 comments on commit 76371ce

Please sign in to comment.