Source code for pyndl.correlation

import sys
import time

import numpy as np

from scipy import stats

if sys.platform.startswith('linux'):
    from . import correlation_openmp
elif sys.platform.startswith('win32'):
    pass
elif sys.platform.startswith('darwin'):
    pass


def _reference_correlation(semantics, activations, *, verbose=False):
    """
    calculates the correlations between the semantics and the activations.

    Returns
    -------
    np.array (n_outcomes, n_events)

    The first column contains all correlations between the first event and
    all possible outcomes in the semantcs.

    The first column reads like:

    0. correlation between first event and first outcome in the semantic
       (gold standard) space.
    1. correlation between first event and second outcome ...
    ...

    """
    assert semantics.shape[0] == activations.shape[0], ("number of vector dimensions in semantics and activations"
                                                        " need to be the same")
    n_outcomes = semantics.shape[1]
    n_events = activations.shape[1]

    correlations = np.zeros((n_outcomes, n_events))

    start_time = time.time()
    for ii in range(n_events):
        for jj in range(n_outcomes):
            correlations[jj, ii], _ = stats.pearsonr(semantics[:, jj], activations[:, ii])
    if verbose:
        print(f"time needed for correlations:  {time.time() - start_time}")

    return correlations


[docs]def correlation(semantics, activations, *, verbose=False, allow_nan=False): """ calculates the correlations between the semantics and the activations. Returns ------- np.array (n_outcomes, n_events) The first column contains all correlations between the first event and all possible outcomes in the semantcs. The first column reads like: 0. correlation between first event and first outcome in the semantic (gold standard) space. 1. correlation between first event and second outcome ... ... """ if not sys.platform.startswith('linux'): raise NotImplementedError("OpenMP is linux only at the moment.") assert semantics.shape[0] == activations.shape[0], ("number of vector dimensions in semantics and activations" "need to be the same") n_outcomes = semantics.shape[1] n_vec_dims, n_events = activations.shape semantics_means = np.zeros((n_outcomes,)) semantics_stds = np.zeros((n_outcomes,)) activations_means = np.zeros((n_events,)) activations_stds = np.zeros((n_events,)) if verbose: start_time = time.time() for jj in range(n_outcomes): semantics_means[jj] = np.mean(semantics[:, jj]) semantics_stds[jj] = np.std(semantics[:, jj], ddof=1) for ii in range(n_events): activations_means[ii] = np.mean(activations[:, ii]) activations_stds[ii] = np.std(activations[:, ii], ddof=1) if verbose: print(f"time needed for stds and means: {time.time() - start_time}") if not allow_nan: if np.any(semantics_stds == 0) or np.any(np.isnan(semantics_stds)): raise ValueError('Standard deviations of semantics are not different to zero or nan.') if np.any(activations_stds == 0) or np.any(np.isnan(activations_stds)): raise ValueError('Standard deviations of activations are not different to zero or nan.') if verbose: start_time = time.time() correlations = correlation_openmp.correlation(semantics, activations, semantics_means, semantics_stds, activations_means, activations_stds) if verbose: print(f"time needed for correlations: {time.time() - start_time}") return correlations