Skip to content

Commit

Permalink
Merge pull request #13 from jameschapman19/_cca_tutorial_visualisation
Browse files Browse the repository at this point in the history
partial correlation and simple sns pair plot
  • Loading branch information
raamana committed Dec 13, 2021
2 parents 6591dac + 520a3f0 commit c4d0f70
Show file tree
Hide file tree
Showing 7 changed files with 424 additions and 14 deletions.
2 changes: 2 additions & 0 deletions confounds/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
if version_info.major >= 3:
from confounds.base import Residualize, Augment, DummyDeconfounding, \
ConfoundsException
from confounds.visualize import pairs
from confounds.metrics import partial_correlation,prediction_partial_correlation
else:
raise NotImplementedError('confounds library requires Python 3 or higher! ')

Expand Down
114 changes: 114 additions & 0 deletions confounds/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,117 @@
degree of harmonization achieved (e.g. reduction in variance of means/medians)
"""

import numpy as np
from scipy import stats

from confounds import Residualize


def partial_correlation(X, C=None):
"""
Calculates the pairwise partial correlations between all of the variables in X with respect to some confounding
variables C
Can be used as a measure of the strength of the relationship between two variables of interest while controlling
for some other variables.
Parameters
----------
X : {array-like, sparse matrix}, shape (n_samples, n_features)
The training input samples.
C : {array-like, sparse matrix}, shape (n_samples, n_confounds)
Array of confounding variables.
Returns
-------
partial_correlations : ndarray
Returns the pairwise partial correlations of each variable in X
Examples
----------
Observing the pairwise correlations of different MRI brain voxels in X in neuroimaging
studies while controlling for the effects of the scanner type and age of participants in C.
"""
resx = Residualize()
resx.fit(X, y=C)
deconfound_X = resx.transform(X, y=C)
return np.corrcoef(deconfound_X, rowvar=False)


def partial_correlation_t_test(partial_correlations, n, g):
"""
Calculates the t-statistic and p-value for pairwise partial correlations.
The statistical significance of the partial correlation can be obtained
parametrically using a Student‘s t distribution.
This is equivalent to fitting and testing the significance of a linear
regression model with model predictions p and confounds C as covariates
References
-----------
Dinga R, Schmaal L, Penninx BW, Veltman DJ, Marquand AF. Controlling for effects of
confounding variables on machine learning predictions. BioRxiv. 2020 Jan 1.
Parameters
----------
partial_correlations : {array-like, sparse matrix}, shape (n_features, n_features)
Correlations of variables with respect to some confounds
n : {int}
Number of samples
g : {int}
Number of confounding variables
Returns
-------
t_statistic : ndarray
Returns the associated t-statistics for these pairwise partial correlations
p_value : ndarray
Returns the associated p-values for these pairwise partial correlations
"""
partial_correlations[partial_correlations == 1] = 1 - 1e-7
#The degrees of freedom for the test are the number of samples minus the number of confounding variables minus 2
df = n - 2 - g
#We conduct a t-test and compute its significance
t_statistic = partial_correlations * np.sqrt(df / (1 - partial_correlations ** 2))
p_value = stats.t.sf(np.abs(t_statistic), df=df)
return t_statistic, p_value


def prediction_partial_correlation(predictions, targets, confounds):
"""
Returns the partial correlation between predictions and targets after residualizing the effect of confounds.
Also calculates the t-statistic and p-value for the statistical significance of this partial correlation which
is a measure of the predictive power of the model controlling for the effect of confounds.
References
-----------
Dinga R, Schmaal L, Penninx BW, Veltman DJ, Marquand AF. Controlling for effects of
confounding variables on machine learning predictions. BioRxiv. 2020 Jan 1.
Parameters
----------
predictions : {array-like, sparse matrix}, shape (n_samples, n_features)
The training input samples.
targets : {array-like, sparse matrix}, shape (n_samples, n_features)
The training input samples.
confounds : {array-like, sparse matrix}, shape (n_samples, n_features)
The training input samples.
Returns
-------
corr_p : float
The partial correlation of the predictions and targets with respect to the confounds
"""
if predictions.ndim == 1:
predictions = predictions[:, np.newaxis]
if targets.ndim == 1:
targets = targets[:, np.newaxis]
assert (predictions.shape == targets.shape), f"Dimensions of predictions and targets do not match. " \
f"predictions shape {predictions.shape}," \
f"targets shape {targets.shape}"
p = predictions.shape[1]
corr_p = partial_correlation(np.hstack((predictions, targets)), C=confounds)
# Just extract the partials between predictions and associated targets
return np.diag(corr_p[:p, p:])
48 changes: 40 additions & 8 deletions confounds/tests/test_confounds.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,18 @@

"""Tests for `confounds` package."""

import numpy as np
import os

from numpy.testing import assert_almost_equal
import numpy as np
from numpy.testing import assert_almost_equal, assert_array_less
from sklearn.datasets import make_classification, make_sparse_uncorrelated
from sklearn.linear_model import LinearRegression
from sklearn.utils.estimator_checks import check_estimator

from confounds import prediction_partial_correlation
from confounds.base import Augment, DummyDeconfounding, Residualize
from confounds.combat import ComBat
from confounds.metrics import partial_correlation_t_test, partial_correlation


def test_estimator_API():
Expand Down Expand Up @@ -86,14 +89,14 @@ def test_combat():
n_features = 2

# One effect of interest that we want to keep
X = rs.normal(size=(n_subj_per_batch + n_subj_per_batch, ))
X = rs.normal(size=(n_subj_per_batch + n_subj_per_batch,))

# Global mean and noise parameters
grand_mean = np.array([0.3, 0.2])
eps = rs.normal(scale=1e-5, size=(n_subj_per_batch*2, n_features))
eps = rs.normal(scale=1e-5, size=(n_subj_per_batch * 2, n_features))

# Batch parameters
batch = [1]*n_subj_per_batch + [2]*n_subj_per_batch
batch = [1] * n_subj_per_batch + [2] * n_subj_per_batch
shift_1 = np.array([0.05, -0.1])
shift_2 = np.array([-0.3, 0.15])
eps_1 = rs.normal(scale=1e-1, size=(n_subj_per_batch, n_features))
Expand All @@ -109,8 +112,8 @@ def test_combat():
Y += grand_mean

# Add dependence with the effect of interest
Y[:, 0] += 0.2*X
Y[:, 1] += -0.16*X
Y[:, 0] += 0.2 * X
Y[:, 1] += -0.16 * X

# Add global noise
Y += eps
Expand Down Expand Up @@ -147,7 +150,7 @@ def test_combat():
# Test that batches no longer have different variances
p_scale_after = np.array([bartlett(y[:n_subj_per_batch],
y[n_subj_per_batch:])[1]
for y in Y_trans.T]
for y in Y_trans.T]
)
assert np.all(p_scale_after > 0.05)

Expand Down Expand Up @@ -186,3 +189,32 @@ def test_combat_bladder():
batch=batch,
effects_interest=effects_interest)
assert np.allclose(Y_combat_effects, bladder_test['Y_combat_effects'])


def test_partial_correlation():
"""check that partial correlations are less than correlations"""
n_samples = 100
train_all, train_y = make_classification(n_features=5)
train_X, train_confounds = splitter_X_confounds(train_all, 2)
# check that partial correlation with no confounds is the same as correlation using np.corrcoef
assert_almost_equal(np.corrcoef(train_X, rowvar=False),
partial_correlation(train_X, C=np.zeros((train_X.shape[0], 1))))
# check that a linear regression fit using all variables has a lower r**2 partial correlation.
lr = LinearRegression().fit(train_all, train_y)
pred = lr.predict(train_all)
# return the partial correlation of predictions after removing confounds
corr_p = prediction_partial_correlation(pred, train_y, train_confounds)
assert_array_less(corr_p,lr.score(train_all, train_y))


def test_partial_correlation_t_test():
C = np.random.normal(size=(100, 1))
C_dummy = np.zeros_like(C)
X = C + np.random.normal(0,0.1,size=(100, 10))
corr_p = partial_correlation(X, C=C_dummy)
t_statistic, statistical_significance = partial_correlation_t_test(corr_p, X.shape[0], C_dummy.shape[1])
corr_pd = partial_correlation(X, C=C)
t_statistic, statistical_significance_d = partial_correlation_t_test(corr_pd, X.shape[0], C.shape[1])
#checks that the partial correlations of variables with respect to confounds have are lower
idx=np.triu_indices_from(statistical_significance,1)
assert_array_less(statistical_significance[idx], statistical_significance_d[idx])
8 changes: 4 additions & 4 deletions confounds/visualize.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
Module for the visualization for the confounds and their effects.
"""
import seaborn as sns


def samplets(features,
Expand All @@ -23,8 +24,7 @@ def samplets(features,


def pairs(confounds,
color_by=None,
group_by=None):
color_by=None):
"""
Plots each confound against the rest, one pair at a time, colored and grouped
by other confounds (which are excluded in plots).
Expand All @@ -36,8 +36,8 @@ def pairs(confounds,
Disease vs. Age, vs. Gender on the third row.
"""

raise NotImplementedError()
pp = sns.pairplot(confounds, hue=color_by)
return pp


def before_after():
Expand Down
Loading

0 comments on commit c4d0f70

Please sign in to comment.