diff --git a/docs/user_guide/percentage_change.rst b/docs/user_guide/percentage_change.rst index e6b0550..d57fd53 100644 --- a/docs/user_guide/percentage_change.rst +++ b/docs/user_guide/percentage_change.rst @@ -32,7 +32,7 @@ The first step is to import the required function from the distrx package. .. code-block:: python - from distrx import transform_univariate + from distrx import transform_bivariate Different transformation functions can be chosen through specifying a string parameter of which transform you would like to apply to your data. In this case, it is the following. @@ -47,4 +47,5 @@ transform you would like to apply to your data. In this case, it is the followin ``mu_tx`` and ``sigma_tx`` are simply the percentage change for each county and their corresponding standard errors, respectively. ``sigma_tx`` has already been scaled the appropriate sample size so -we **should not** scale it additionally to obtain a confidence interval. +we **should not** scale it additionally with some function of othe sample size to obtain a +confidence interval. diff --git a/src/distrx/transforms.py b/src/distrx/transforms.py index ea7d6e0..1a61082 100644 --- a/src/distrx/transforms.py +++ b/src/distrx/transforms.py @@ -62,7 +62,7 @@ def log_trans( .. math:: - \log(mu), \frac{\sigma}{\mu} \cdot \frac{1}{\sqrt{n}} + \\log(mu), \\frac{\\sigma}{\\mu} \\cdot \\frac{1}{\\sqrt{n}} Parameters ---------- @@ -86,7 +86,7 @@ def logit_trans( .. math:: - \log(\frac{\mu}{1 - \mu}), \frac{\sigma}{\mu \cdot (1 - \mu)} \cdot \frac{1}{\sqrt{n}} + \\log(\\frac{\\mu}{1 - \\mu}), \\frac{\\sigma}{\\mu \\cdot (1 - \\mu)} \\cdot \\frac{1}{\\sqrt{n}} Parameters ---------- @@ -110,7 +110,7 @@ def exp_trans( .. math:: - \exp(\mu), \sigma \cdot \exp(\mu) \cdot \frac{1}{\sqrt{n}} + \\exp(\\mu), \\sigma \\cdot \\exp(\\mu) \\cdot \\frac{1}{\\sqrt{n}} Parameters ---------- @@ -134,7 +134,7 @@ def expit_trans( .. math:: - \frac{1}{1 + \exp(-\mu)}, \sigma \cdot \frac{\exp(\mu)}{(1 + \exp(\mu))^2} \cdot \frac{1}{\sqrt{n}} + \\frac{1}{1 + \\exp(-\\mu)}, \\sigma \\cdot \\frac{\\exp(\\mu)}{(1 + \\exp(\\mu))^2} \\cdot \\frac{1}{\\sqrt{n}} Parameters ---------- @@ -192,7 +192,7 @@ def percentage_change_trans( .. math:: - \frac{p_y}{p_x} - 1, \sqrt{\frac{\sigma_y^2}{n_y\mu_x^2} + \frac{\mu_y^2\sigma_x^2}{n_x\mu_x^4}} + \\frac{p_y}{p_x} - 1, \\sqrt{\\frac{\\sigma_y^2}{n_y\\mu_x^2} + \\frac{\\mu_y^2\\sigma_x^2}{n_x\\mu_x^4}} Parameters ---------- diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 97ad940..ddc7d60 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -4,14 +4,9 @@ import pandas as pd import pytest -from distrx.transforms import ( - delta_method, - transform_data, - transform_percentage_change, - transform_percentage_change_experiment, -) - -TRANSFORM_DICT = { +from distrx.transforms import transform_bivariate, transform_univariate + +UNIVARIATE_TRANSFORM_DICT = { "log": [np.log, lambda x: 1.0 / x], "logit": [lambda x: np.log(x / (1.0 - x)), lambda x: 1.0 / (x * (1.0 - x))], "exp": [np.exp, np.exp], @@ -20,96 +15,98 @@ lambda x: np.exp(-x) / (1.0 + np.exp(-x)) ** 2, ], } -TRANSFORM_LIST = list(TRANSFORM_DICT.keys()) -FUNCTION_LIST = [transform_data, delta_method] +UNIVARIATE_TRANSFORM_LIST = list(UNIVARIATE_TRANSFORM_DICT.keys()) + +# BIVARIATE_TRANSFORM_DICT = { +# "percentage change": [np.] +# } +BIVARIATE_TRANSFORM_LIST = ["percentage_change"] +# FUNCTION_LIST = [transform_data, delta_method] VALS = [0.1] * 2 +N = len(VALS) -@pytest.mark.parametrize("transform", TRANSFORM_LIST) +@pytest.mark.parametrize("transform", UNIVARIATE_TRANSFORM_LIST) def test_method_name_valid(transform): """Raise ValueError for invalue `method`.""" with pytest.raises(ValueError): - transform_data(VALS, VALS, transform, method="dummy") + transform_univariate(VALS, VALS, N, transform, method="dummy") -@pytest.mark.parametrize("function", FUNCTION_LIST) -@pytest.mark.parametrize("transform", TRANSFORM_LIST) -def test_input_len_match(function, transform): +@pytest.mark.parametrize("transform", UNIVARIATE_TRANSFORM_LIST) +def test_input_len_match(transform): """Raise ValueError if lengths of input vectors don't match.""" with pytest.raises(ValueError): - function(VALS, VALS * 2, transform) + transform_univariate(VALS, VALS * 2, N, transform) -@pytest.mark.parametrize("function", FUNCTION_LIST) -@pytest.mark.parametrize("transform", TRANSFORM_LIST) -def test_sigma_negative(function, transform): +@pytest.mark.parametrize("transform", UNIVARIATE_TRANSFORM_LIST) +def test_sigma_negative(transform): """Raise ValueError if `sigma` contains negative values.""" vals = VALS + [-0.1] with pytest.raises(ValueError): - function(vals, vals, transform) + transform_univariate(vals, vals, N, transform) -@pytest.mark.parametrize("function", FUNCTION_LIST) -@pytest.mark.parametrize("transform", TRANSFORM_LIST) -def test_sigma_zero(function, transform): +@pytest.mark.parametrize("transform", UNIVARIATE_TRANSFORM_LIST) +def test_sigma_zero(transform): """Display warning if `sigma` contains zeros.""" vals = VALS + [0.0] with pytest.warns(UserWarning): - function(vals, vals, transform) + transform_univariate(vals, vals, N, transform) -@pytest.mark.parametrize("function", FUNCTION_LIST) -def test_transform_name_valid(function): +def test_transform_name_valid(): """Raise ValueError for invalid `transform`.""" with pytest.raises(ValueError): - function(VALS, VALS, "dummy") + transform_univariate(VALS, VALS, N, "dummy") + transform_bivariate(VALS, VALS, VALS, VALS, "dummy") -@pytest.mark.parametrize("function", FUNCTION_LIST) -@pytest.mark.parametrize("transform", TRANSFORM_LIST) -def test_output_type(function, transform): +@pytest.mark.parametrize("transform", UNIVARIATE_TRANSFORM_LIST) +def test_output_type(transform): """Output should be numpy arrays.""" - mu, sigma = function(VALS, VALS, transform) + mu, sigma = transform_univariate(VALS, VALS, N, transform) assert isinstance(mu, np.ndarray) assert isinstance(sigma, np.ndarray) -@pytest.mark.parametrize("function", FUNCTION_LIST) -@pytest.mark.parametrize("transform", TRANSFORM_LIST) -def test_outout_len_match(function, transform): +@pytest.mark.parametrize("transform", UNIVARIATE_TRANSFORM_LIST) +def test_outout_len_match(transform): """Length of output vectors should match.""" - mu, sigma = function(VALS, VALS, transform) + mu, sigma = transform_univariate(VALS, VALS, N, transform) assert len(mu) == len(sigma) -@pytest.mark.parametrize("transform", TRANSFORM_LIST) +@pytest.mark.parametrize("transform", UNIVARIATE_TRANSFORM_LIST) def test_delta_result(transform): """Check expected results.""" mu = np.random.uniform(0.1, 1.0, size=10) sigma = np.random.uniform(0.1, 1.0, size=10) - mu_ref = TRANSFORM_DICT[transform][0](mu) - sigma_ref = sigma * TRANSFORM_DICT[transform][1](mu) - mu_trans, sigma_trans = delta_method(mu, sigma, transform) + mu_ref = UNIVARIATE_TRANSFORM_DICT[transform][0](mu) + sigma_ref = sigma * UNIVARIATE_TRANSFORM_DICT[transform][1](mu) + mu_trans, sigma_trans = transform_univariate(mu, sigma, N, transform) assert np.allclose(mu_trans, mu_ref) assert np.allclose(sigma_trans, sigma_ref) -def test_percentage_change(): - x = np.random.normal(1, 0.1, 1000) - y = np.random.normal(1.1, 0.1, 1000) - z = np.random.normal(1, 0.1, 1001) - p, sigma = transform_percentage_change_experiment(x, y) - assert 0 < p and p < 1 - assert 0 < sigma and sigma < 1 - with pytest.raises(ValueError): - transform_percentage_change_experiment(x, z) +# TODO: DEPRECATE +# def test_percentage_change(): +# x = np.random.normal(1, 0.1, 1000) +# y = np.random.normal(1.1, 0.1, 1000) +# z = np.random.normal(1, 0.1, 1001) +# p, sigma = transform_percentage_change_experiment(x, y) +# assert 0 < p and p < 1 +# assert 0 < sigma and sigma < 1 +# with pytest.raises(ValueError): +# transform_percentage_change_experiment(x, z) def test_percentage_change_counts(): x = np.random.choice([0, 1], size=1000, p=[0.1, 0.9]) y = np.random.choice([0, 1], size=1100, p=[0.2, 0.8]) - mu, sigma = transform_percentage_change( - (x == 1).sum(), len(x), (y == 1).sum(), len(y) + mu, sigma = transform_bivariate( + (x == 1).sum(), len(x), (y == 1).sum(), len(y), "percentage_change" ) assert -1 <= mu and mu < np.inf assert 0 < sigma and sigma < 1 @@ -120,7 +117,7 @@ def test_percentage_change_input(): c_x, n_x = 100, 1000 c_y, n_y = 200, 1050 # with pytest.raises(TypeError): - transform_percentage_change(c_x, n_x, c_y, n_y) + transform_bivariate(c_x, n_x, c_y, n_y, "percentage_change") # base list input c_x = [100, 200] @@ -128,9 +125,11 @@ def test_percentage_change_input(): c_y = [300, 400] n_y = [1050, 1050] # with pytest.raises(TypeError): - transform_percentage_change(c_x, n_x, c_y, n_y) + transform_bivariate(c_x, n_x, c_y, n_y, "percentage_change") # dataframe input df = pd.DataFrame({"c_x": c_x, "n_x": n_x, "c_y": c_y, "n_y": n_y}) # with pytest.raises(TypeError): - transform_percentage_change(df["c_x"], df["n_x"], df["c_y"], df["n_y"]) + transform_bivariate( + df["c_x"], df["n_x"], df["c_y"], df["n_y"], "percentage_change" + )