hoxo-m · mierzejk · Aug 22, 2023 · Aug 23, 2023 · Aug 24, 2023 · Aug 24, 2023
diff --git a/README.md b/README.md
@@ -305,7 +305,26 @@ plt.show()
 
 ![](README_files/figure-gfm/compare-2d-5.png)<!-- -->
 
-## 5. References
+## 5. `DensityRatio.alpha_normalize` function
+This is a supplementary function, that by applying the following linear transformation changes the lower `0` bound (infimum) of its `values` argument in the range of `[0, 1]` to `[alpha, 1]` :
+```math
+x' = (1 - alpha) * x + alpha =  x + alpha * (1 - x)
+```
+The function returns an array whose values are elements of the `[alpha, alpha^-1]` bounded set; the upper `alpha^-1` bound (supremum) is due to properties of the alpha-relative density ratio estimator. As long as the outcome is comprised of positive values only, when the `densratio_obj.alpha_normalize` function is applied to the `densratio_obj.compute_density_ratio` result, it renders the density ratio estimation eligible to be further processed on the logarithmic scale. 
+
+### 5.1. Implementation specifics
+To preserve vital estimator properties, there are 2 invariants always met by the function results:
+1. The number of unique values must not changed.
+2. The order of output values remains the same as in the input argument (as determined by [`numpy.argsort`](https://numpy.org/doc/stable/reference/generated/numpy.argsort.html)).
+
+Due to floating‑point arithmetic, if two consecutive input numbers that are not equal are to be transformed to the same outcome, in order to satisfy the constraints in the list above and yield different values as well, the [`numpy.nextafter`](https://numpy.org/doc/stable/reference/generated/numpy.nextafter.html) is employed in the direction of `0`.  
+By virtue of this implementation approach, in extreme cases there is a possibility of output values that:
+- are less than `alpha`, or
+- are nonpositive.
+
+Should it happen, a relevant warning is issued.
+
+## 6. References
 
 \[1\] Hido, S., Tsuboi, Y., Kashima, H., Sugiyama, M., & Kanamori, T.
 **Statistical outlier detection using direct density ratio estimation.**
@@ -322,7 +341,7 @@ in Machine Learning.** Cambridge University Press 2012.
 Detection in Time-Series Data by Relative Density-Ratio Estimation**
 Neural Networks, 2013.
 
-## 6. Related Work
+## 7. Related Work
 
 -   densratio for R <https://github.com/hoxo-m/densratio>
 -   pykliep <https://github.com/srome/pykliep>
diff --git a/README.rmd b/README.rmd
@@ -251,7 +251,26 @@ plt.title("Estimated Alpha-Relative Density Ratio")
 plt.show()
 ```
 
-## 5. References
+## 5. `DensityRatio.alpha_normalize` function
+This is a supplementary function, that by applying the following linear transformation changes the lower `0` bound (infimum) of its `values` argument in the range of `[0, 1]` to `[alpha, 1]` :
+```math
+x' = (1 - alpha) * x + alpha =  x + alpha * (1 - x)
+```
+The function returns an array whose values are elements of the `[alpha, alpha^-1]` bounded set; the upper `alpha^-1` bound (supremum) is due to properties of the alpha-relative density ratio estimator. As long as the outcome is comprised of positive values only, when the `densratio_obj.alpha_normalize` function is applied to the `densratio_obj.compute_density_ratio` result, it renders the density ratio estimation eligible to be further processed on the logarithmic scale.
+
+### 5.1. Implementation specifics
+To preserve vital estimator properties, there are 2 invariants always met by the function results:
+1. The number of unique values must not changed.
+2. The order of output values remains the same as in the input argument (as determined by [`numpy.argsort`](https://numpy.org/doc/stable/reference/generated/numpy.argsort.html)).
+
+Due to floating‑point arithmetic, if two consecutive input numbers that are not equal are to be transformed to the same outcome, in order to satisfy the constraints in the list above and yield different values as well, the [`numpy.nextafter`](https://numpy.org/doc/stable/reference/generated/numpy.nextafter.html) is employed in the direction of `0`.
+By virtue of this implementation approach, in extreme cases there is a possibility of output values that:
+- are less than `alpha`, or
+- are nonpositive.
+
+Should it happen, a relevant warning is issued.
+
+## 6. References
 
 [1] Hido, S., Tsuboi, Y., Kashima, H., Sugiyama, M., & Kanamori, T.
 **Statistical outlier detection using direct density ratio estimation.**
@@ -268,7 +287,7 @@ Cambridge University Press 2012.
 **Change-Point Detection in Time-Series Data by Relative Density-Ratio Estimation**
 Neural Networks, 2013.
 
-## 6. Related Work
+## 7. Related Work
 
 - densratio for R https://github.com/hoxo-m/densratio
 - pykliep https://github.com/srome/pykliep
diff --git a/densratio/RuLSIF.py b/densratio/RuLSIF.py
@@ -16,6 +16,7 @@
 from warnings import warn
 from .density_ratio import DensityRatio, KernelInfo
 from .helpers import guvectorize_compute, np_float, to_ndarray
+from .helpers import alpha_normalize as static_alpha_normalize
 
 
 def RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num=100, verbose=True):
@@ -87,6 +88,21 @@ def alpha_density_ratio(coordinates):
 
         return alpha_density_ratio
 
+    def alpha_normalize(values: array) -> array:
+        """
+        Normalizes values less than 1 so the minimum value to replace 0 is symmetrical to alpha^-1
+        with respect to the natural logarithm.
+
+        Arguments:
+            values (numpy.array): A vector to normalize.
+
+        Returns:
+            Normalized numpy.array object that preserves the order and the number of unique input argument values.
+        """
+
+        return static_alpha_normalize(values, alpha)
+
+
     # Compute the approximate alpha-relative PE-divergence, given samples x and y from the respective distributions.
     def alpha_PE_divergence(x, y):
         # This is Y, in Reference 1.
@@ -112,23 +128,24 @@ def alpha_KL_divergence(x, y):
         x = to_ndarray(x)
 
         # Obtain alpha-relative density ratio at these points.
-        g_x = alpha_density_ratio(x)
+        g_x = alpha_normalize(alpha_density_ratio(x))
 
         # Compute the alpha-relative KL-divergence.
         n = x.shape[0]
-        divergence = log(g_x).sum(axis=0) / n
+        divergence = log(g_x).sum(axis=0) / n if (g_x > 0.).all() else '[not calculated]'
         return divergence
 
     alpha_PE = alpha_PE_divergence(x, y)
     alpha_KL = alpha_KL_divergence(x, y)
 
     if verbose:
         print("Approximate alpha-relative PE-divergence = {:03.2f}".format(alpha_PE))
-        print("Approximate alpha-relative KL-divergence = {:03.2f}".format(alpha_KL))
+        alpha_KL_format = '{}' if isinstance(alpha_KL, str) else '{:03.2f}'
+        print("Approximate alpha-relative KL-divergence =", alpha_KL_format.format(alpha_KL))
 
     kernel_info = KernelInfo(kernel_type="Gaussian", kernel_num=kernel_num, sigma=sigma, centers=centers)
     result = DensityRatio(method="RuLSIF", alpha=alpha, theta=theta, lambda_=lambda_, alpha_PE=alpha_PE, alpha_KL=alpha_KL,
-                          kernel_info=kernel_info, compute_density_ratio=alpha_density_ratio)
+                          kernel_info=kernel_info, compute_density_ratio=alpha_density_ratio, alpha_normalize=alpha_normalize)
 
     if verbose:
         print("RuLSIF completed.")

diff --git a/densratio/__init__.py b/densratio/__init__.py
@@ -1,7 +1,9 @@
 from warnings import filterwarnings
 from .core import densratio
+from .helpers import alpha_normalize
 from .RuLSIF import set_compute_kernel_target
 
 
 filterwarnings('default', message='\'numba\'', category=ImportWarning, module='densratio')
-__all__ = ['densratio', 'set_compute_kernel_target']
+filterwarnings('always', message='Normalized vector contains', category=RuntimeWarning, module=r'densratio\.helpers')
+__all__ = ['alpha_normalize', 'densratio', 'set_compute_kernel_target']
diff --git a/densratio/density_ratio.py b/densratio/density_ratio.py
@@ -4,7 +4,7 @@
 
 class DensityRatio:
     """Density Ratio."""
-    def __init__(self, method, alpha, theta, lambda_, alpha_PE, alpha_KL, kernel_info, compute_density_ratio):
+    def __init__(self, method, alpha, theta, lambda_, alpha_PE, alpha_KL, kernel_info, compute_density_ratio, alpha_normalize):
         self.method = method
         self.alpha = alpha
         self.theta = theta
@@ -13,6 +13,7 @@ def __init__(self, method, alpha, theta, lambda_, alpha_PE, alpha_KL, kernel_inf
         self.alpha_KL = alpha_KL
         self.kernel_info = kernel_info
         self.compute_density_ratio = compute_density_ratio
+        self.alpha_normalize = alpha_normalize
 
     def __str__(self):
         return """

diff --git a/densratio/helpers.py b/densratio/helpers.py
@@ -1,4 +1,7 @@
+import numpy as np
+
 from numpy import array, matrix, ndarray, result_type
+from warnings import warn
 
 
 np_float = result_type(float)
@@ -33,3 +36,54 @@ def to_ndarray(x):
         raise ValueError("Cannot transform to numpy.matrix.")
     else:
         return to_ndarray(array(x))
+
+
+def alpha_normalize(values: ndarray, alpha: float) -> ndarray:
+    """
+    Normalizes values less than 1 so the minimum value to replace 0 is symmetrical to alpha^-1
+    with respect to the natural logarithm.
+
+    Arguments:
+        values (numpy.array): A vector to normalize.
+        alpha (float): The nonnegative normalization term.
+
+    Returns:
+        Normalized numpy.array object that preserves the order and the number of unique input argument values.
+    """
+    values = np.asarray(values)
+    if values.ndim != 1:
+        raise ValueError('\'values\' must a 1d vector.')
+
+    if alpha <= 0.:
+        if alpha < 0.:
+            warn('\'alpha\' is negative, normalization aborted.', RuntimeWarning)
+
+        return values
+
+    a = 1. - alpha
+    inserted = last_value = 1.
+    outcome = np.empty(values.shape, dtype=values.dtype)
+
+    values_argsort = np.argsort(values)
+    for i in np.flip(values_argsort):
+        value = values[i]
+        if value >= 1.:
+            outcome[i] = value
+            continue
+
+        if value < last_value:
+            new_value = inserted - a * (last_value - value)
+            inserted = np.nextafter(inserted, 0.) if new_value == inserted else new_value
+            last_value = value
+        else:
+            assert value == last_value
+
+        outcome[i] = inserted
+
+    if inserted <= 0.:
+        warn(f'Normalized vector contains at least one nonpositive [min={inserted}] value.', RuntimeWarning)
+    elif inserted < alpha:
+        warn(f'Normalized vector contains at least one value [min={inserted}] less than alpha [{alpha}].',
+             RuntimeWarning)
+
+    return outcome
diff --git a/tests/test_helpers.py b/tests/test_helpers.py
@@ -1,10 +1,13 @@
 import unittest
 
+import numpy as np
 from numpy import array, matrix
 from numpy.testing import assert_array_equal
 from pandas import DataFrame
 from .context import helpers
 
+
+# noinspection PyMethodMayBeStatic
 class BasicTestSuite(unittest.TestCase):
     """Tests for Helper Functions."""
 
@@ -46,5 +49,51 @@ def test_to_ndarray_pandas_DataFrame(self):
         x = helpers.to_ndarray(x)
         assert_array_equal(x, array([[1,2],[3,4]]))
 
+    def test_alpha_normalize_0d_input(self):
+        with self.assertRaises(ValueError):
+            helpers.alpha_normalize(np.array(None), 1E-3)
+
+    def test_alpha_normalize_2d_input(self):
+        with self.assertRaises(ValueError):
+            helpers.alpha_normalize(np.arange(4).reshape(2, -1), 1E-3)
+
+    def test_alpha_normalize_zero_alpha(self):
+        values = np.random.rand(5)
+        assert_array_equal(values, helpers.alpha_normalize(values, 0))
+
+    def test_alpha_normalize_negative_alpha(self):
+        values = np.random.rand(5)
+        with self.assertWarns(RuntimeWarning):
+            assert_array_equal(values, helpers.alpha_normalize(values, -1E-3))
+
+    def test_alpha_normalize_lower_changed(self):
+        alpha = .05
+        lower, upper = np.arange(0, 1, alpha), np.arange(1, 21)
+        normalized_values = helpers.alpha_normalize(np.concatenate((lower, upper)), alpha)
+        self.assertRaises(AssertionError, assert_array_equal, lower, normalized_values[:lower.size])
+        assert_array_equal(upper, normalized_values[-upper.size:])
+
+    def test_alpha_normalize_equal_unique_count(self):
+        alpha = .05
+        values = np.concatenate((np.arange(0, 1, alpha), np.arange(1, 21)))
+        normalized_values = helpers.alpha_normalize(values, alpha)
+        self.assertEqual(np.unique(values).size, np.unique(normalized_values).size)
+
+    def test_alpha_normalize_same_argsort(self):
+        alpha = .05
+        values = np.concatenate((np.arange(0, 1, alpha), np.arange(1, 21)))
+        np.random.shuffle(values)
+        normalized_values = helpers.alpha_normalize(values, alpha)
+        self.assertTrue((np.argsort(values) == np.argsort(normalized_values)).all())
+
+    def test_alpha_normalize_alpha_warning(self):
+        values = np.array([0, np.nextafter(0, 1), 1])
+        self.assertWarns(RuntimeWarning, helpers.alpha_normalize, values, .5)
+
+    def test_alpha_normalize_nonpositive_warning(self):
+        values = np.array([0, np.nextafter(0, 1), 1])
+        self.assertWarns(RuntimeWarning, helpers.alpha_normalize, values, values[1])
+
+
 if __name__ == '__main__':
     unittest.main()