import sys
import time
import numpy as np
from scipy import stats
if sys.platform.startswith('linux'):
from . import correlation_openmp
elif sys.platform.startswith('win32'):
pass
elif sys.platform.startswith('darwin'):
pass
def _reference_correlation(semantics, activations, *, verbose=False):
"""
calculates the correlations between the semantics and the activations.
Returns
-------
np.array (n_outcomes, n_events)
The first column contains all correlations between the first event and
all possible outcomes in the semantcs.
The first column reads like:
0. correlation between first event and first outcome in the semantic
(gold standard) space.
1. correlation between first event and second outcome ...
...
"""
assert semantics.shape[0] == activations.shape[0], ("number of vector dimensions in semantics and activations"
" need to be the same")
n_outcomes = semantics.shape[1]
n_events = activations.shape[1]
correlations = np.zeros((n_outcomes, n_events))
start_time = time.time()
for ii in range(n_events):
for jj in range(n_outcomes):
correlations[jj, ii], _ = stats.pearsonr(semantics[:, jj], activations[:, ii])
if verbose:
print(f"time needed for correlations: {time.time() - start_time}")
return correlations
[docs]def correlation(semantics, activations, *, verbose=False, allow_nan=False):
"""
calculates the correlations between the semantics and the activations.
Returns
-------
np.array (n_outcomes, n_events)
The first column contains all correlations between the first event and
all possible outcomes in the semantcs.
The first column reads like:
0. correlation between first event and first outcome in the semantic
(gold standard) space.
1. correlation between first event and second outcome ...
...
"""
if not sys.platform.startswith('linux'):
raise NotImplementedError("OpenMP is linux only at the moment.")
assert semantics.shape[0] == activations.shape[0], ("number of vector dimensions in semantics and activations"
"need to be the same")
n_outcomes = semantics.shape[1]
n_vec_dims, n_events = activations.shape
semantics_means = np.zeros((n_outcomes,))
semantics_stds = np.zeros((n_outcomes,))
activations_means = np.zeros((n_events,))
activations_stds = np.zeros((n_events,))
if verbose:
start_time = time.time()
for jj in range(n_outcomes):
semantics_means[jj] = np.mean(semantics[:, jj])
semantics_stds[jj] = np.std(semantics[:, jj], ddof=1)
for ii in range(n_events):
activations_means[ii] = np.mean(activations[:, ii])
activations_stds[ii] = np.std(activations[:, ii], ddof=1)
if verbose:
print(f"time needed for stds and means: {time.time() - start_time}")
if not allow_nan:
if np.any(semantics_stds == 0) or np.any(np.isnan(semantics_stds)):
raise ValueError('Standard deviations of semantics are not different to zero or nan.')
if np.any(activations_stds == 0) or np.any(np.isnan(activations_stds)):
raise ValueError('Standard deviations of activations are not different to zero or nan.')
if verbose:
start_time = time.time()
correlations = correlation_openmp.correlation(semantics, activations, semantics_means,
semantics_stds, activations_means, activations_stds)
if verbose:
print(f"time needed for correlations: {time.time() - start_time}")
return correlations