Merge pull request #4 from ameya98/master

Implementation of RuLSIF
hoxo-m · Apr 5, 2019 · 7b7f512 · 7b7f512
2 parents dd9106c + 7134332
commit 7b7f512
Show file tree

Hide file tree

Showing 10 changed files with 480 additions and 157 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -18,8 +18,9 @@ before_install:
 
 # command to install dependencies
 install:
-  - conda install --yes python=$TRAVIS_PYTHON_VERSION numpy scipy nose pandas
   - conda create --yes -n=travis python=$TRAVIS_PYTHON_VERSION
+  - source activate travis
+  - conda install --yes numpy scipy nose pandas matplotlib
   - pip install coveralls
   - python setup.py install
 

diff --git a/densratio/RuLSIF.py b/densratio/RuLSIF.py
@@ -0,0 +1,191 @@
+"""
+Relative Unconstrained Least-Squares Fitting (RuLSIF): A Python Implementation
+References:
+    'Change-point detection in time-series data by relative density-ratio estimation'
+        Song Liu, Makoto Yamada, Nigel Collier and Masashi Sugiyama,
+        Neural Networks 43 (2013) 72-83.
+
+    'A Least-squares Approach to Direct Importance Estimation'
+        Takafumi Kanamori, Shohei Hido, and Masashi Sugiyama,
+        Journal of Machine Learning Research 10 (2009) 1391-1445.
+"""
+
+from numpy import inf, array, exp, matrix, diag, multiply, ones, asarray, log
+from numpy.random import randint
+from numpy.linalg import norm, solve
+from .density_ratio import DensityRatio, KernelInfo
+from .helpers import to_numpy_matrix
+
+
+def RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num=100, verbose=True):
+    """
+    Estimation of the alpha-Relative Density Ratio p(x)/p_alpha(y) by RuLSIF
+    (Relative Unconstrained Least-Square Importance Fitting)
+
+    p_alpha(y) = alpha * p(y) + (1 - alpha) * q(y)
+
+    Arguments:
+        x (numpy.matrix): Sample from p(x).
+        y (numpy.matrix): Sample from q(y).
+        alpha (float): Mixture parameter.
+        sigma_range (list<float>): Search range of Gaussian kernel bandwidth.
+        lambda_range (list<float>): Search range of regularization parameter.
+        kernel_num (int): Number of kernels. (Default 100)
+        verbose (bool): Indicator to print messages (Default True)
+
+    Returns:
+        densratio.DensityRatio object which has `compute_density_ratio()`.
+    """
+
+    # Number of samples.
+    nx = x.shape[0]
+    ny = y.shape[0]
+
+    # Number of kernel functions.
+    kernel_num = min(kernel_num, nx)
+
+    # Randomly take a subset of x, to identify centers for the kernels.
+    centers = x[randint(nx, size=kernel_num)]
+
+    if verbose:
+        print("RuLSIF starting...")
+
+    if sigma_range.size == 1 and lambda_range.size == 1:
+        sigma = sigma_range[0]
+        lambda_ = lambda_range[0]
+    else:
+        if verbose:
+            print("Searching for the optimal sigma and lambda...")
+
+        # Grid-search cross-validation for optimal kernel and regularization parameters.
+        opt_params = search_sigma_and_lambda(x, y, alpha, centers, sigma_range, lambda_range, verbose)
+        sigma = opt_params["sigma"]
+        lambda_ = opt_params["lambda"]
+
+        if verbose:
+            print("Found optimal sigma = {:.3f}, lambda = {:.3f}.".format(sigma, lambda_))
+
+    if verbose:
+        print("Optimizing alpha...")
+
+    phi_x = compute_kernel_Gaussian(x, centers, sigma)
+    phi_y = compute_kernel_Gaussian(y, centers, sigma)
+    H = alpha * (phi_x.T.dot(phi_x) / nx) + (1 - alpha) * (phi_y.T.dot(phi_y) / ny)
+    h = phi_x.mean(axis=0).T
+    theta = asarray(solve(H + diag(array(lambda_).repeat(kernel_num)), h)).ravel()
+
+    # No negative coefficients.
+    theta[theta < 0] = 0
+
+    # Compute the alpha-relative density ratio, at the given coordinates.
+    def alpha_density_ratio(coordinates):
+        coordinates = to_numpy_matrix(coordinates)
+        phi_x = compute_kernel_Gaussian(coordinates, centers, sigma)
+        alpha_density_ratio = asarray(phi_x.dot(matrix(theta).T)).ravel()
+        return alpha_density_ratio
+
+    # Compute the approximate alpha-relative PE-divergence, given samples x and y from the respective distributions.
+    def PE_divergence(x, y):
+        # This is Y, in Reference 1.
+        x = to_numpy_matrix(x)
+        phi_x = compute_kernel_Gaussian(x, centers, sigma)
+
+        # Obtain alpha-relative density ratio at these points.
+        g_x = alpha_density_ratio(x)
+
+        # This is Y', in Reference 1.
+        y = to_numpy_matrix(y)
+        phi_y = compute_kernel_Gaussian(y, centers, sigma)
+
+        # Obtain alpha-relative density ratio at these points.
+        g_y = alpha_density_ratio(y)
+
+        # Compute the alpha-relative PE-divergence as given in Reference 1.
+        n = x.shape[0]
+        divergence = (-alpha*(g_x.T.dot(g_x))/2 - (1-alpha)*(g_y.T.dot(g_y))/2 + g_x.sum(axis=0)) / n
+        return divergence
+
+    # Compute the approximate alpha-relative KL-divergence, given samples x and y from the respective distributions.
+    def KL_divergence(x, y):
+        # This is Y, in Reference 1.
+        x = to_numpy_matrix(x)
+        phi_x = compute_kernel_Gaussian(x, centers, sigma)
+
+        # Obtain alpha-relative density ratio at these points.
+        g_x = alpha_density_ratio(x)
+
+        # Compute the alpha-relative KL-divergence.
+        n = x.shape[0]
+        divergence = log(g_x).sum(axis=0) / n
+        return divergence
+
+    if verbose:
+        print("Approximate alpha-relative PE-divergence = {:03.2f}".format(PE_divergence(x, y)))
+        print("Approximate alpha-relative KL-divergence = {:03.2f}".format(KL_divergence(x, y)))
+
+    kernel_info = KernelInfo(kernel_type="Gaussian RBF", kernel_num=kernel_num, sigma=sigma, centers=centers)
+    result = DensityRatio(method="RuLSIF", alpha=alpha, lambda_=lambda_, kernel_info=kernel_info, compute_density_ratio=alpha_density_ratio)
+
+    if verbose:
+        print("RuLSIF completed.")
+
+    return result
+
+
+# Gridsearch for the optimal parameters sigma and lambda by leave-one-out cross-validation. See Reference 2.
+def search_sigma_and_lambda(x, y, alpha, centers, sigma_range, lambda_range, verbose):
+    nx = x.shape[0]
+    ny = y.shape[0]
+    n_min = min(nx, ny)
+    kernel_num = centers.shape[0]
+
+    score_new = inf
+    sigma_new = 0
+    lambda_new = 0
+
+    for sigma in sigma_range:
+
+        phi_x = compute_kernel_Gaussian(x, centers, sigma)
+        phi_y = compute_kernel_Gaussian(y, centers, sigma)
+        H = alpha * (phi_x.T.dot(phi_x) / nx) + (1 - alpha) * (phi_y.T.dot(phi_y) / ny)
+        h = phi_x.mean(axis=0).T
+        phi_x = phi_x[:n_min].T
+        phi_y = phi_y[:n_min].T
+
+        for lambda_ in lambda_range:
+            B = H + diag(array(lambda_ * (ny - 1) / ny).repeat(kernel_num))
+            B_inv_X = solve(B, phi_y)
+            X_B_inv_X = multiply(phi_y, B_inv_X)
+            denom = (ny * ones(n_min) - ones(kernel_num).dot(X_B_inv_X)).A1
+            B0 = solve(B, h.dot(matrix(ones(n_min)))) + B_inv_X.dot(diag(h.T.dot(B_inv_X).A1 / denom))
+            B1 = solve(B, phi_x) + B_inv_X.dot(diag(ones(kernel_num).dot(multiply(phi_x, B_inv_X)).A1))
+            B2 = (ny - 1) * (nx * B0 - B1) / (ny * (nx - 1))
+            B2[B2 < 0] = 0
+            r_y = multiply(phi_y, B2).sum(axis=0).T
+            r_x = multiply(phi_x, B2).sum(axis=0).T
+
+            # Squared loss of RuLSIF, without regularization term.
+            # Directly related to the negative of the PE-divergence.
+            score = (r_y.T.dot(r_y).A1 / 2 - r_x.sum(axis=0)) / n_min
+
+            if verbose:
+                print("sigma = %.5f, lambda = %.5f, score = %.5f" % (sigma, lambda_, score))
+
+            if score < score_new:
+                score_new = score
+                sigma_new = sigma
+                lambda_new = lambda_
+
+    return {"sigma": sigma_new, "lambda": lambda_new}
+
+
+# Returns a 2D numpy matrix of kernel evaluated at the gridpoints with coordinates from x_list and y_list.
+def compute_kernel_Gaussian(x_list, y_list, sigma):
+    result = [[kernel_Gaussian(x, y, sigma) for y in y_list] for x in x_list]
+    result = matrix(result)
+    return result
+
+
+# Returns the Gaussian kernel evaluated at (x, y) with parameter sigma.
+def kernel_Gaussian(x, y, sigma):
+    return exp(- norm(x - y) / (2 * sigma * sigma))
diff --git a/densratio/core.py b/densratio/core.py
@@ -8,18 +8,17 @@
 """
 
 from numpy import linspace
-from .uLSIF import uLSIF
+from .RuLSIF import RuLSIF
 from .helpers import to_numpy_matrix
 
-def densratio(x, y, sigma_range = "auto", lambda_range = "auto",
-        kernel_num = 100, verbose = True):
-    """Estimate Density Ratio p(x)/q(y)
 
-    Args:
-        x: sample from p(x).
-        y: sample from p(y).
+def densratio(x, y, alpha=0, sigma_range="auto", lambda_range="auto", kernel_num=100, verbose=True):
+    """ Estimate alpha-mixture Density Ratio p(X)/(alpha*p(X) + (1 - alpha)*q(Y))
 
-    Kwargs:
+    Arguments:
+        x: sample from p(X).
+        y: sample from q(Y).
+        alpha: Default 0 - corresponds to ordinary density ratio.
         sigma_range: search range of Gaussian kernel bandwidth.
             Default "auto" means 10^-3, 10^-2, ..., 10^9.
         lambda_range: search range of regularization parameter for uLSIF.
@@ -31,15 +30,15 @@ def densratio(x, y, sigma_range = "auto", lambda_range = "auto",
         densratio.DensityRatio object which has `compute_density_ratio()`.
 
     Raises:
-        ValueError: dimension of x != dimension of y
+        ValueError: if dimension of x != dimension of y
 
     Usage::
       >>> from scipy.stats import norm
       >>> from densratio import densratio
 
-      >>> x = norm.rvs(size = 200, loc = 1, scale = 1./8)
-      >>> y = norm.rvs(size = 200, loc = 1, scale = 1./2)
-      >>> result = densratio(x, y)
+      >>> x = norm.rvs(size=200, loc=1, scale=1./8)
+      >>> y = norm.rvs(size=200, loc=1, scale=1./2)
+      >>> result = densratio(x, y, alpha=0.7)
       >>> print(result)
 
       >>> density_ratio = result.compute_density_ratio(y)
@@ -53,11 +52,9 @@ def densratio(x, y, sigma_range = "auto", lambda_range = "auto",
         raise ValueError("x and y must be same dimensions.")
 
     if not sigma_range or sigma_range == "auto":
-        sigma_range = 10 ** linspace(-3, 1, 9)
+        sigma_range = 10 ** linspace(-3, 9, 13)
     if not lambda_range or lambda_range == "auto":
-        lambda_range = 10 ** linspace(-3, 1, 9)
+        lambda_range = 10 ** linspace(-3, 9, 13)
 
-    result = uLSIF(x = x, y = y,
-                   sigma_range = sigma_range, lambda_range = lambda_range,
-                   kernel_num = kernel_num, verbose = verbose)
-    return(result)
+    result = RuLSIF(x, y, alpha, sigma_range, lambda_range, kernel_num, verbose)
+    return result
diff --git a/densratio/density_ratio.py b/densratio/density_ratio.py
@@ -3,6 +3,7 @@
 from pprint import pformat
 from re import sub
 
+
 class DensityRatio:
     """Density Ratio."""
     def __init__(self, method, alpha, lambda_, kernel_info, compute_density_ratio):
@@ -26,8 +27,8 @@ def __str__(self):
 
 The Function to Estimate Density Ratio:
   compute_density_ratio(x)
-"""[1:-1] % dict(method = self.method, kernel_info = self.kernel_info,
-                alpha = my_format(self.alpha), lambda_ = self.lambda_)
+"""[1:-1] % dict(method=self.method, kernel_info=self.kernel_info, alpha=my_format(self.alpha), lambda_=self.lambda_)
+
 
 class KernelInfo:
     """Kernel Information."""
@@ -43,8 +44,8 @@ def __str__(self):
   Number of kernels: %(kernel_num)s
   Bandwidth(sigma): %(sigma)s
   Centers: %(centers)s
-"""[1:-1] % dict(kernel_type = self.kernel_type, kernel_num = self.kernel_num,
-                sigma = self.sigma, centers = my_format(self.centers))
+"""[1:-1] % dict(kernel_type=self.kernel_type, kernel_num=self.kernel_num, sigma=self.sigma, centers=my_format(self.centers))
+
 
 def my_format(str):
     return sub(r"\s+" , " ", (pformat(str).split("\n")[0] + ".."))
diff --git a/densratio/helpers.py b/densratio/helpers.py
@@ -2,9 +2,11 @@
 
 from numpy import array, matrix, ndarray
 
+
 def is_numeric(x):
     return isinstance(x, int) or isinstance(x, float)
 
+
 def to_numpy_matrix(x):
     if isinstance(x, matrix):
         return x