"""Provide JIDT estimators."""
from pkg_resources import resource_filename
import numpy as np
from abc import abstractmethod
from idtxl.estimator import Estimator
from . import idtxl_exceptions as ex
from . import idtxl_utils as utils
try:
import jpype as jp
except ImportError as err:
ex.package_missing(err, 'Jpype is not available on this system. Install it'
' from https://pypi.python.org/pypi/JPype1 to use '
'JAVA/JIDT-powered CMI estimation.')
[docs]class JidtEstimator(Estimator):
"""Abstract class for implementation of JIDT estimators.
Abstract class for implementation of JIDT estimators, child classes
implement estimators for mutual information (MI), conditional mutual
information (CMI), active information storage (AIS), transfer entropy (TE)
using the Kraskov-Grassberger-Stoegbauer estimator for continuous data,
plug-in estimators for discrete data, and Gaussian estimators for
continuous Gaussian data.
References:
- Lizier, Joseph T. (2014). JIDT: an information-theoretic toolkit for
studying the dynamics of complex systems. Front Robot AI, 1(11).
- Kraskov, A., Stoegbauer, H., & Grassberger, P. (2004). Estimating mutual
information. Phys Rev E, 69(6), 066138.
- Lizier, Joseph T., Mikhail Prokopenko, and Albert Y. Zomaya. (2012).
Local measures of information storage in complex distributed computation.
Inform Sci, 208, 39-54.
- Schreiber, T. (2000). Measuring information transfer. Phys Rev Lett,
85(2), 461.
Set common estimation parameters for JIDT estimators. For usage of these
estimators see documentation for the child classes.
Args:
settings : dict [optional]
set estimator parameters:
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
"""
def __init__(self, settings=None):
"""Set default estimator settings."""
settings.setdefault('local_values', False)
settings.setdefault('debug', False)
self.settings = settings.copy()
def _start_jvm(self):
"""Start JAVA virtual machine if it is not running."""
jar_location = resource_filename(__name__, 'infodynamics.jar')
if not jp.isJVMStarted():
jp.startJVM(jp.getDefaultJVMPath(), '-ea', ('-Djava.class.path=' +
jar_location))
def _set_te_defaults(self, settings):
"""Set defaults for transfer entropy estimation."""
try:
history_target = settings['history_target']
except KeyError:
raise RuntimeError('No target history was provided for TE '
'estimation.')
settings.setdefault('history_source', history_target)
settings.setdefault('tau_target', 1)
settings.setdefault('tau_source', 1)
settings.setdefault('source_target_delay', 1)
assert type(settings['tau_target']) is int, (
'Target tau has to be an integer.')
assert type(settings['tau_source']) is int, (
'Source tau has to be an integer.')
assert type(settings['history_target']) is int, (
'Target history has to be an integer.')
assert type(settings['history_source']) is int, (
'Source history has to be an integer.')
assert type(settings['source_target_delay']) is int, (
'Source-target delay has to be an integer.')
assert settings['tau_target'] >= 1, 'Target tau must be >= 1'
assert settings['tau_source'] >= 1, 'Source tau must be >= 1'
assert settings['history_target'] >= 0, 'Target history must be >= 0'
assert settings['history_source'] >= 1, 'Source history must be >= 1'
assert settings['source_target_delay'] >= 0, (
'Source-target delay must be >= 0')
return settings
[docs] def is_parallel(self):
return False
[docs]class JidtKraskov(JidtEstimator):
"""Abstract class for implementation of JIDT Kraskov-estimators.
Abstract class for implementation of JIDT Kraskov-estimators, child classes
implement estimators for mutual information (MI), conditional mutual
information (CMI), actice information storage (AIS), transfer entropy (TE)
using the Kraskov-Grassberger-Stoegbauer estimator for continuous data.
See parent class for references.
Set common estimation parameters for JIDT Kraskov-estimators. For usage of
these estimators see documentation for the child classes.
Args:
CalcClass : JAVA class
JAVA class returned by jpype.JPackage
settings : dict [optional]
set estimator parameters:
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
- kraskov_k : int [optional] - no. nearest neighbours for KNN
search (default=4)
- normalise : bool [optional] - z-standardise data (default=False)
- theiler_t : int [optional] - no. next temporal neighbours ignored
in KNN and range searches (default=0)
- noise_level : float [optional] - random noise added to the data
(default=1e-8)
- num_threads : int | str [optional] - number of threads used for
estimation (default='USE_ALL', note that this uses *all*
available threads on the current machine)
- algorithm_num : int [optional] - which Kraskov algorithm (1 or 2)
to use (default=1). Only applied at this method for TE and AIS
(is already applied for MI/CMI). Note that default algorithm of 1
here is different to the default ALG_NUM argument for the JIDT
AIS KSG estimator.
"""
def __init__(self, CalcClass, settings=None):
# Set default estimator settings.
settings.setdefault('kraskov_k', str(4))
settings.setdefault('normalise', 'false')
settings.setdefault('theiler_t', str(0))
settings.setdefault('noise_level', 1e-8)
settings.setdefault('num_threads', 'USE_ALL')
settings.setdefault('algorithm_num', 1)
assert type(settings['algorithm_num']) is int, (
'Algorithm number must be an integer.')
assert (settings['algorithm_num'] == 1) or (settings['algorithm_num'] == 2), (
'Algorithm number must be 1 or 2')
super().__init__(settings)
# Set properties of JIDT's estimator object.
self.calc = CalcClass()
self.calc.setProperty('ALG_NUM', str(self.settings['algorithm_num']))
self.calc.setProperty('NORMALISE',
str(self.settings['normalise']).lower())
self.calc.setProperty('k', str(self.settings['kraskov_k']))
self.calc.setProperty('DYN_CORR_EXCL', str(self.settings['theiler_t']))
self.calc.setProperty('NOISE_LEVEL_TO_ADD',
str(self.settings['noise_level']))
self.calc.setProperty('NUM_THREADS', str(self.settings['num_threads']))
self.calc.setDebug(self.settings['debug'])
[docs] def is_analytic_null_estimator(self):
return False
[docs]class JidtDiscrete(JidtEstimator):
"""Abstract class for implementation of discrete JIDT-estimators.
Abstract class for implementation of plug-in JIDT-estimators for discrete
data. Child classes implement estimators for mutual information (MI),
conditional mutual information (CMI), actice information storage (AIS), and
transfer entropy (TE). See parent class for references.
Set common estimation parameters for discrete JIDT-estimators. For usage of
these estimators see documentation for the child classes.
Args:
settings : dict [optional]
set estimator parameters:
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
- discretise_method : str [optional] - if and how to discretise
incoming continuous data, can be 'max_ent' for maximum entropy
binning, 'equal' for equal size bins, and 'none' if no binning is
required (default='none')
Note:
Discrete JIDT estimators require the data's alphabet size for
instantiation. Hence, opposed to the Kraskov and Gaussian estimators,
the JAVA class is added to the object instance, while for Kraskov/
Gaussian estimators an instance of that class is added (because for the
latter, objects can be instantiated independent of data properties).
"""
def __init__(self, settings):
settings.setdefault('discretise_method', 'none')
super().__init__(settings)
def _discretise_vars(self, var1, var2, conditional=None):
# Discretise variables if requested. Otherwise assert data are discrete
# and provided alphabet sizes are correct.
if self.settings['discretise_method'] == 'equal':
var1 = utils.discretise(var1, self.settings['alph1'])
var2 = utils.discretise(var2, self.settings['alph2'])
if conditional is not None:
conditional = utils.discretise(conditional,
self.settings['alphc'])
elif self.settings['discretise_method'] == 'max_ent':
var1 = utils.discretise_max_ent(var1, self.settings['alph1'])
var2 = utils.discretise_max_ent(var2, self.settings['alph2'])
if not (conditional is None):
conditional = utils.discretise_max_ent(conditional,
self.settings['alphc'])
elif self.settings['discretise_method'] == 'none':
assert issubclass(var1.dtype.type, np.integer), (
'Var1 is not an integer numpy array. '
'Discretise data to use this estimator.')
assert issubclass(var2.dtype.type, np.integer), (
'Var2 is not an integer numpy array. '
'Discretise data to use this estimator.')
assert np.min(var1) >= 0, 'Minimum of var1 is smaller than 0.'
assert np.min(var2) >= 0, 'Minimum of var2 is smaller than 0.'
assert np.max(var1) < self.settings['alph1'], (
'Maximum of var1 is larger than the alphabet size.')
assert np.max(var2) < self.settings['alph2'], (
'Maximum of var2 is larger than the alphabet size.')
if conditional is not None:
assert np.min(conditional) >= 0, (
'Minimum of conditional is smaller than 0.')
assert issubclass(conditional.dtype.type, np.integer), (
'Conditional is not an integer numpy array. '
'Discretise data to use this estimator.')
assert np.max(conditional) < self.settings['alphc'], (
'Maximum of conditional is larger than the alphabet size.')
else:
raise ValueError('Unkown discretisation method.')
if conditional is not None:
return var1, var2, conditional
else:
return var1, var2
[docs] def is_analytic_null_estimator(self):
return True
[docs] @abstractmethod
def get_analytic_distribution(self, **data):
"""Return a JIDT AnalyticNullDistribution object.
Required so that our estimate_surrogates_analytic method can use the
common_estimate_surrogates_analytic() method, where data is formatted
as per the estimate method for this estimator.
Args:
data : numpy arrays
realisations of random variables required for the calculation
(varies between estimators, e.g. 2 variables for MI, 3 for
CMI). Formatted as per the estimate method for this estimator.
Returns:
Java object
JIDT calculator that was used here
"""
pass
[docs] def estimate_surrogates_analytic(self, n_perm=200, **data):
"""Return estimate of the analytical surrogate distribution.
This method must be implemented because this class'
is_analytic_null_estimator() method returns true.
Args:
n_perms : int [optional]
number of permutations (default=200)
data : numpy arrays
realisations of random variables required for the calculation
(varies between estimators, e.g. 2 variables for MI, 3 for
CMI). Formatted as per the estimate method for this estimator.
Returns:
float | numpy array
n_perm surrogates of the average MI/CMI/TE over all samples
under the null hypothesis of no relationship between var1 and
var2 (in the context of conditional)
"""
return common_estimate_surrogates_analytic(self, n_perm, **data)
[docs]class JidtGaussian(JidtEstimator):
"""Abstract class for implementation of JIDT Gaussian-estimators.
Abstract class for implementation of JIDT Gaussian-estimators, child
classes implement estimators for mutual information (MI), conditional
mutual information (CMI), actice information storage (AIS), transfer
entropy (TE) using JIDT's Gaussian estimator for continuous data. See
parent class for references.
Set common estimation parameters for JIDT Kraskov-estimators. For usage of
these estimators see documentation for the child classes.
Results are returned in nats.
Args:
CalcClass : JAVA class
JAVA class returned by jpype.JPackage
settings : dict [optional]
set estimator parameters:
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
"""
def __init__(self, CalcClass, settings):
super().__init__(settings)
self.calc = CalcClass()
self.calc.setDebug(self.settings['debug'])
[docs] def is_analytic_null_estimator(self):
return True
[docs] def get_analytic_distribution(self, **data):
"""Return a JIDT AnalyticNullDistribution object.
Required so that our estimate_surrogates_analytic method can use the
common_estimate_surrogates_analytic() method, where data is formatted
as per the estimate method for this estimator.
Args:
data : numpy arrays
realisations of random variables required for the calculation
(varies between estimators, e.g. 2 variables for MI, 3 for
CMI). Formatted as per the estimate method for this estimator.
Returns:
Java object
JIDT calculator that was used here
"""
# Make one estimate to prepare the calculator:
self.estimate(**data)
return self.calc.computeSignificance()
[docs] def estimate_surrogates_analytic(self, n_perm=200, **data):
"""Estimate the surrogate distribution analytically.
This method must be implemented because this class'
is_analytic_null_estimator() method returns true
Args:
n_perms : int
number of permutations (default=200)
data : numpy arrays
realisations of random variables required for the calculation
(varies between estimators, e.g. 2 variables for MI, 3 for
CMI). Formatted as per estimate_parallel for this estimator.
Returns:
float | numpy array
n_perm surrogates of the average MI/CMI/TE over all samples
under the null hypothesis of no relationship between var1 and
var2 (in the context of conditional)
"""
return common_estimate_surrogates_analytic(self, n_perm, **data)
[docs]class JidtKraskovCMI(JidtKraskov):
"""Calculate conditional mutual inform with JIDT's Kraskov implementation.
Calculate the conditional mutual information (CMI) between three variables.
Call JIDT via jpype and use the Kraskov 1 estimator. If no conditional is
given (is None), the function returns the mutual information between var1
and var2. See parent class for references.
Results are returned in nats.
Args:
settings : dict [optional]
set estimator parameters:
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
- kraskov_k : int [optional] - no. nearest neighbours for KNN
search (default=4)
- normalise : bool [optional] - z-standardise data (default=False)
- theiler_t : int [optional] - no. next temporal neighbours ignored
in KNN and range searches (default=0)
- noise_level : float [optional] - random noise added to the data
(default=1e-8)
- num_threads : int | str [optional] - number of threads used for
estimation (default='USE_ALL', note that this uses *all*
available threads on the current machine)
- algorithm_num : int [optional] - which Kraskov algorithm (1 or 2)
to use (default=1)
Note:
Some technical details: JIDT normalises over realisations, IDTxl
normalises over raw data once, outside the CMI estimator to save
computation time. The Theiler window ignores trial boundaries. The
CMI estimator does add noise to the data as a default. To make analysis
runs replicable set noise_level to 0.
"""
def __init__(self, settings=None):
settings = self._check_settings(settings)
# Start JAVA virtual machine and create JAVA object.
self._start_jvm()
settings.setdefault('algorithm_num', 1)
assert type(settings['algorithm_num']) is int, (
'Algorithm number must be an integer.')
assert (settings['algorithm_num'] == 1) or (settings['algorithm_num'] == 2), (
'Algorithm number must be 1 or 2')
if (settings['algorithm_num'] == 1):
CalcClass = (
jp.JPackage('infodynamics.measures.continuous.kraskov').
ConditionalMutualInfoCalculatorMultiVariateKraskov1)
else:
CalcClass = (
jp.JPackage('infodynamics.measures.continuous.kraskov').
ConditionalMutualInfoCalculatorMultiVariateKraskov2)
super().__init__(CalcClass, settings)
[docs] def estimate(self, var1, var2, conditional=None):
"""Estimate conditional mutual information.
Args:
var1 : numpy array
realisations of first variable, either a 2D numpy array where
array dimensions represent [realisations x variable dimension]
or a 1D array representing [realisations]
var2 : numpy array
realisations of the second variable (similar to var1)
conditional : numpy array [optional]
realisations of the conditioning variable (similar to var), if
no conditional is provided, return MI between var1 and var2
Returns:
float | numpy array
average CMI over all samples or local CMI for individual
samples if 'local_values'=True
"""
# Return MI if no conditional was provided.
if conditional is None:
est_mi = JidtKraskovMI(self.settings)
return est_mi.estimate(var1, var2)
else:
assert(conditional.size != 0), 'Conditional Array is empty.'
# Check if variable realisations are passed as 1D or 2D arrays and have
# equal no. observations.
var1 = self._ensure_two_dim_input(var1)
var2 = self._ensure_two_dim_input(var2)
cond = self._ensure_two_dim_input(conditional)
assert(var1.shape[0] == var2.shape[0]), (
'Unequal number of observations (var1: {0}, var2: {1}).'.format(
var1.shape[0], var2.shape[0]))
assert(var1.shape[0] == cond.shape[0]), (
'Unequal number of observations (var1: {0}, cond: {1}).'.format(
var1.shape[0], cond.shape[0]))
# Check if number of points is sufficient for estimation.
self._check_number_of_points(var1.shape[0])
self.calc.initialise(var1.shape[1], var2.shape[1], cond.shape[1])
self.calc.setObservations2D(var1, var2, cond)
if self.settings['local_values']:
return np.array(self.calc.computeLocalOfPreviousObservations())
else:
return float(self.calc.computeAverageLocalOfObservations())
[docs]class JidtDiscreteCMI(JidtDiscrete):
"""Calculate CMI with JIDT's implementation for discrete variables.
Calculate the conditional mutual information between two variables given
the third. Call JIDT via jpype and use the discrete estimator. See parent
class for references.
Results are returned in bits.
Args:
settings : dict [optional]
sets estimation parameters:
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
- discretise_method : str [optional] - if and how to discretise
incoming continuous data, can be 'max_ent' for maximum entropy
binning, 'equal' for equal size bins, and 'none' if no binning is
required (default='none')
- n_discrete_bins : int [optional] - number of discrete bins/
levels or the base of each dimension of the discrete variables
(default=2). If set, this parameter overwrites/sets alph1, alph2
and alphc
- alph1 : int [optional] - number of discrete bins/levels for var1
(default=2, or the value set for n_discrete_bins)
- alph2 : int [optional] - number of discrete bins/levels for var2
(default=2, or the value set for n_discrete_bins)
- alphc : int [optional] - number of discrete bins/levels for
conditional (default=2, or the value set for n_discrete_bins)
"""
def __init__(self, settings=None):
settings = self._check_settings(settings)
# Set default alphabet sizes. Try to overwrite alphabet sizes with
# number of bins for discretisation if provided, otherwise assume
# binary variables.
try:
n_discrete_bins = int(settings['n_discrete_bins'])
settings['alph1'] = n_discrete_bins
settings['alph2'] = n_discrete_bins
settings['alphc'] = n_discrete_bins
except KeyError:
pass # Do nothing and use the default for alph_* set below
settings.setdefault('alph1', int(2))
settings.setdefault('alph2', int(2))
settings.setdefault('alphc', int(2))
super().__init__(settings)
# Start JAVA virtual machine and create JAVA object. Add JAVA object to
# instance
self._start_jvm()
CalcClass = (jp.JPackage('infodynamics.measures.discrete').
ConditionalMutualInformationCalculatorDiscrete)
self.calc = CalcClass()
self.calc.setDebug(self.settings['debug'])
# Keep a reference to an MI calculator if we need to use it (memory
# used here is minimal, and better than recreating it each time)
self.mi_calc = JidtDiscreteMI(self.settings)
[docs] def estimate(self, var1, var2, conditional=None, return_calc=False):
"""Estimate conditional mutual information.
Args:
var1 : numpy array
realisations of first variable, either a 2D numpy array where
array dimensions represent [realisations x variable dimension]
or a 1D array representing [realisations], array type can be
float (requires discretisation) or int
var2 : numpy array
realisations of the second variable (similar to var1)
conditional : numpy array [optional]
realisations of the conditioning variable (similar to var), if
no conditional is provided, return MI between var1 and var2
return_calc : boolean
return the calculator used here as well as the numeric
calculated value(s)
Returns:
float | numpy array
average CMI over all samples or local CMI for individual
samples if 'local_values'=True
Java object
JIDT calculator that was used here. Only returned if
return_calc was set.
Raises:
ex.JidtOutOfMemoryError
Raised when JIDT object cannot be instantiated due to mem error
"""
# Calculate an MI if no conditional was provided
if (conditional is None) or (self.settings['alphc'] == 0):
# Return value will be just the estimate if return_calc is False,
# or estimate plus the JIDT MI calculator if return_calc is True:
return self.mi_calc.estimate(var1, var2, return_calc)
else:
assert(conditional.size != 0), 'Conditional Array is empty.'
# Check and remember the no. dimensions for each variable before
# collapsing them into univariate arrays later.
var1 = self._ensure_two_dim_input(var1)
var2 = self._ensure_two_dim_input(var2)
conditional = self._ensure_two_dim_input(conditional)
var1_dim = var1.shape[1]
var2_dim = var2.shape[1]
cond_dim = conditional.shape[1]
# Discretise if requested.
var1, var2, conditional = self._discretise_vars(var1, var2,
conditional)
# Then collapse any mulitvariates into univariate arrays:
var1 = utils.combine_discrete_dimensions(var1, self.settings['alph1'])
var2 = utils.combine_discrete_dimensions(var2, self.settings['alph2'])
conditional = utils.combine_discrete_dimensions(conditional,
self.settings['alphc'])
# We have a non-trivial conditional, so make a proper conditional MI
# calculation
alph1_base = int(np.power(self.settings['alph1'], var1_dim))
alph2_base = int(np.power(self.settings['alph2'], var2_dim))
cond_base = int(np.power(self.settings['alphc'], cond_dim))
try:
self.calc.initialise(alph1_base, alph2_base, cond_base)
except:
# Handles both jp.JException (JPype v0.7) and jp.JavaException
# (JPype < v0.7). Only possible exception that can be raised here
# (if all bases >= 2) is a Java OutOfMemoryException:
assert(alph1_base >= 2)
assert(alph2_base >= 2)
assert(cond_base >= 2)
raise ex.JidtOutOfMemoryError(
'Cannot instantiate JIDT CMI discrete estimator with '
'alph1_base = {}, alph2_base = {}, cond_base = {}. Try '
're-running increasing Java heap size.'.format(
alph1_base, alph2_base, cond_base))
# Unfortunately no faster way to pass numpy arrays in than this list
# conversion
self.calc.addObservations(jp.JArray(jp.JInt, 1)(var1.tolist()),
jp.JArray(jp.JInt, 1)(var2.tolist()),
jp.JArray(jp.JInt, 1)(conditional.tolist()))
if self.settings['local_values']:
result = np.array(self.calc.computeLocalFromPreviousObservations(
jp.JArray(jp.JInt, 1)(var1.tolist()),
jp.JArray(jp.JInt, 1)(var2.tolist()),
jp.JArray(jp.JInt, 1)(conditional.tolist())))
else:
result = float(self.calc.computeAverageLocalOfObservations())
if return_calc:
return (result, self.calc)
else:
return result
[docs] def get_analytic_distribution(self, var1, var2, conditional=None):
"""Return a JIDT AnalyticNullDistribution object.
Required so that our estimate_surrogates_analytic method can use the
common_estimate_surrogates_analytic() method, where data is formatted
as per the estimate method for this estimator.
Args:
var1 : numpy array
realisations of first variable, either a 2D numpy array where
array dimensions represent [realisations x variable dimension]
or a 1D array representing [realisations], array type can be
float (requires discretisation) or int
var2 : numpy array
realisations of the second variable (similar to var1)
conditional : numpy array [optional]
realisations of the conditioning variable (similar to var), if
no conditional is provided, return MI between var1 and var2
Returns:
Java object
JIDT calculator that was used here
"""
# Make one estimate to prepare the calculator:
(est, jidt_calc) = self.estimate(var1, var2, conditional, True)
return jidt_calc.computeSignificance()
[docs]class JidtDiscreteMI(JidtDiscrete):
"""Calculate MI with JIDT's discrete-variable implementation.
Calculate the mutual information (MI) between two variables. Call JIDT via
jpype and use the discrete estimator. See parent class for references.
Results are returned in bits.
Args:
settings : dict [optional]
sets estimation parameters:
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
- discretise_method : str [optional] - if and how to discretise
incoming continuous data, can be 'max_ent' for maximum entropy
binning, 'equal' for equal size bins, and 'none' if no binning is
required (default='none')
- n_discrete_bins : int [optional] - number of discrete bins/
levels or the base of each dimension of the discrete variables
(default=2). If set, this parameter overwrites/sets alph1 and
alph2
- alph1 : int [optional] - number of discrete bins/levels for var1
(default=2, or the value set for n_discrete_bins)
- alph2 : int [optional] - number of discrete bins/levels for var2
(default=2, or the value set for n_discrete_bins)
- lag_mi : int [optional] - time difference in samples to calculate
the lagged MI between processes (default=0)
"""
def __init__(self, settings=None):
settings = self._check_settings(settings)
# Set default alphabet sizes. Try to overwrite alphabet sizes with
# number of bins for discretisation if provided, otherwise assume
# binary variables.
super().__init__(settings)
self.settings.setdefault('lag_mi', int(0))
try:
n_discrete_bins = int(self.settings['n_discrete_bins'])
self.settings['alph1'] = n_discrete_bins
self.settings['alph2'] = n_discrete_bins
except KeyError:
pass # Do nothing and use the default for alph_* set below
self.settings.setdefault('alph1', int(2))
self.settings.setdefault('alph2', int(2))
# Start JAVA virtual machine and create JAVA object. Add JAVA object to
# instance.
self._start_jvm()
CalcClass = (jp.JPackage('infodynamics.measures.discrete').
MutualInformationCalculatorDiscrete)
self.calc = CalcClass()
self.calc.setDebug(self.settings['debug'])
[docs] def estimate(self, var1, var2, return_calc=False):
"""Estimate mutual information.
Args:
var1 : numpy array
realisations of first variable, either a 2D numpy array where
array dimensions represent [realisations x variable dimension]
or a 1D array representing [realisations], array type can be
float (requires discretisation) or int
var2 : numpy array
realisations of the second variable (similar to var1)
return_calc : boolean
return the calculator used here as well as the numeric
calculated value(s)
Returns:
float | numpy array
average MI over all samples or local MI for individual
samples if 'local_values'=True
Java object
JIDT calculator that was used here. Only returned if
return_calc was set.
Raises:
ex.JidtOutOfMemoryError
Raised when JIDT object cannot be instantiated due to mem error
"""
# Check and remember the no. dimensions for each variable before
# collapsing them into univariate arrays later.
var1 = self._ensure_two_dim_input(var1)
var2 = self._ensure_two_dim_input(var2)
var1_dim = var1.shape[1]
var2_dim = var2.shape[1]
# Discretise variables if requested.
var1, var2 = self._discretise_vars(var1, var2)
# Then collapse any mulitvariates into univariate arrays:
var1 = utils.combine_discrete_dimensions(var1, self.settings['alph1'])
var2 = utils.combine_discrete_dimensions(var2, self.settings['alph2'])
# Initialise estimator
base_for_var1 = int(np.power(self.settings['alph1'], var1_dim))
base_for_var2 = int(np.power(self.settings['alph2'], var2_dim))
try:
self.calc.initialise(base_for_var1, base_for_var2,
self.settings['lag_mi'])
except:
# Handles both jp.JException (JPype v0.7) and jp.JavaException
# (JPype < v0.7). Only possible exception that can be raised here
# (if base_for_var* >= 2) is a Java OutOfMemoryException:
assert(base_for_var1 >= 2)
assert(base_for_var2 >= 2)
raise ex.JidtOutOfMemoryError(
'Cannot instantiate JIDT MI discrete estimator with bases = {}'
' and {}. Try re-running increasing Java heap size.'.format(
base_for_var1, base_for_var2))
# Unfortunately no faster way to pass numpy arrays in than this list
# conversion
self.calc.addObservations(jp.JArray(jp.JInt, 1)(var1.tolist()),
jp.JArray(jp.JInt, 1)(var2.tolist()))
if self.settings['local_values']:
result = np.array(self.calc.computeLocalFromPreviousObservations(
jp.JArray(jp.JInt, 1)(var1.tolist()),
jp.JArray(jp.JInt, 1)(var2.tolist())))
else:
result = float(self.calc.computeAverageLocalOfObservations())
if return_calc:
return (result, self.calc)
else:
return result
[docs] def get_analytic_distribution(self, var1, var2):
"""Return a JIDT AnalyticNullDistribution object.
Required so that our estimate_surrogates_analytic method can use the
common_estimate_surrogates_analytic() method, where data is formatted
as per the estimate method for this estimator.
Args:
var1 : numpy array
realisations of first variable, either a 2D numpy array where
array dimensions represent [realisations x variable dimension]
or a 1D array representing [realisations], array type can be
float (requires discretisation) or int
var2 : numpy array
realisations of the second variable (similar to var1)
Returns:
Java object
JIDT calculator that was used here
"""
# Make one estimate to prepare the calculator:
(est, jidt_calc) = self.estimate(var1, var2, True)
return jidt_calc.computeSignificance()
[docs]class JidtKraskovMI(JidtKraskov):
"""Calculate mutual information with JIDT's Kraskov implementation.
Calculate the mutual information between two variables. Call JIDT via jpype
and use the Kraskov 1 estimator. See parent class for references.
Results are returned in nats.
Args:
settings : dict [optional]
sets estimation parameters:
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
- kraskov_k : int [optional] - no. nearest neighbours for KNN
search (default=4)
- normalise : bool [optional] - z-standardise data (default=False)
- theiler_t : int [optional] - no. next temporal neighbours ignored
in KNN and range searches (default=0)
- noise_level : float [optional] - random noise added to the data
(default=1e-8)
- num_threads : int | str [optional] - number of threads used for
estimation (default='USE_ALL', note that this uses *all*
available threads on the current machine)
- algorithm_num : int [optional] - which Kraskov algorithm (1 or 2)
to use (default=1)
- lag_mi : int [optional] - time difference in samples to calculate
the lagged MI between processes (default=0)
Note:
Some technical details: JIDT normalises over realisations, IDTxl
normalises over raw data once, outside the MI estimator to save
computation time. The Theiler window ignores trial boundaries. The
MI estimator does add noise to the data as a default. To make analysis
runs replicable set noise_level to 0.
"""
def __init__(self, settings=None):
settings = self._check_settings(settings)
# Start JAVA virtual machine and create JAVA object.
self._start_jvm()
settings.setdefault('algorithm_num', 1)
assert type(settings['algorithm_num']) is int, (
'Algorithm number must be an integer.')
assert (settings['algorithm_num'] == 1) or (settings['algorithm_num'] == 2), (
'Algorithm number must be 1 or 2')
if (settings['algorithm_num'] == 1):
CalcClass = (jp.JPackage('infodynamics.measures.continuous.kraskov').
MutualInfoCalculatorMultiVariateKraskov1)
else:
CalcClass = (jp.JPackage('infodynamics.measures.continuous.kraskov').
MutualInfoCalculatorMultiVariateKraskov2)
super().__init__(CalcClass, settings)
# Get lag and shift second variable to account for a lag if requested
self.settings.setdefault('lag_mi', 0)
[docs] def estimate(self, var1, var2):
"""Estimate mutual information.
Args:
var1 : numpy array
realisations of first variable, either a 2D numpy array where
array dimensions represent [realisations x variable dimension]
or a 1D array representing [realisations]
var2 : numpy array
realisations of the second variable (similar to var1)
Returns:
float | numpy array
average MI over all samples or local MI for individual
samples if 'local_values'=True
"""
# Check if variable realisations are passed as 1D or 2D arrays
var1 = self._ensure_two_dim_input(var1)
var2 = self._ensure_two_dim_input(var2)
# Shift variables to calculate a lagged MI.
if self.settings['lag_mi'] > 0:
var1 = var1[:-self.settings['lag_mi'], :]
var2 = var2[self.settings['lag_mi']:, :]
# Check if number of points is sufficient for estimation.
self._check_number_of_points(var1.shape[0])
self.calc.initialise(var1.shape[1], var2.shape[1])
self.calc.setObservations2D(var1, var2)
if self.settings['local_values']:
return np.array(self.calc.computeLocalOfPreviousObservations())
else:
return float(self.calc.computeAverageLocalOfObservations())
[docs]class JidtKraskovAIS(JidtKraskov):
"""Calculate active information storage with JIDT's Kraskov implementation.
Calculate active information storage (AIS) for some process using JIDT's
implementation of the Kraskov type 1 estimator. AIS is defined as the
mutual information between the processes' past state and current value.
The past state needs to be defined in the settings dictionary, where a past
state is defined as a uniform embedding with parameters history and tau.
The history describes the number of samples taken from a processes' past,
tau describes the embedding delay, i.e., the spacing between every two
samples from the processes' past.
See parent class for references. Results are returned in nats.
Args:
settings : dict
sets estimation parameters:
- history : int - number of samples in the processes' past used as
embedding
- tau : int [optional] - the processes' embedding delay (default=1)
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
- kraskov_k : int [optional] - no. nearest neighbours for KNN
search (default=4)
- normalise : bool [optional] - z-standardise data (default=False)
- theiler_t : int [optional] - no. next temporal neighbours ignored
in KNN and range searches (default=0)
- noise_level : float [optional] - random noise added to the data
(default=1e-8)
- num_threads : int | str [optional] - number of threads used for
estimation (default='USE_ALL', note that this uses *all*
available threads on the current machine)
- algorithm_num : int [optional] - which Kraskov algorithm (1 or 2)
to use (default=1)
Note:
Some technical details: JIDT normalises over realisations, IDTxl
normalises over raw data once, outside the AIS estimator to save
computation time. The Theiler window ignores trial boundaries. The
AIS estimator does add noise to the data as a default. To make analysis
runs replicable set noise_level to 0.
"""
def __init__(self, settings):
settings = self._check_settings(settings)
# Check for history for AIS estimation.
try:
settings['history']
except KeyError:
raise RuntimeError('No history was provided for AIS estimation.')
settings.setdefault('tau', 1)
assert type(settings['history']) is int, (
'History has to be an integer.')
assert type(settings['tau']) is int, ('Tau has to be an integer.')
# Start JAVA virtual machine and create JAVA object.
self._start_jvm()
CalcClass = (jp.JPackage('infodynamics.measures.continuous.kraskov').
ActiveInfoStorageCalculatorKraskov)
super().__init__(CalcClass, settings)
[docs] def estimate(self, process):
"""Estimate active information storage.
Args:
process : numpy array
realisations of first variable, either a 2D numpy array where
array dimensions represent [realisations x variable dimension]
or a 1D array representing [realisations]
Returns:
float | numpy array
average AIS over all samples or local AIS for individual
samples if 'local_values'=True
"""
process = self._ensure_one_dim_input(process)
# Check if number of points is sufficient for estimation.
self._check_number_of_points(process.shape[0])
self.calc.initialise(self.settings['history'], self.settings['tau'])
self.calc.setObservations(process)
if self.settings['local_values']:
return np.array(self.calc.computeLocalOfPreviousObservations())
else:
return float(self.calc.computeAverageLocalOfObservations())
[docs]class JidtDiscreteAIS(JidtDiscrete):
"""Calculate AIS with JIDT's discrete-variable implementation.
Calculate the active information storage (AIS) for one process. Call JIDT
via jpype and use the discrete estimator. See parent class for references.
Results are returned in bits.
Args:
settings : dict
set estimator parameters:
- history : int - number of samples in the target's past used as
embedding (>= 0)
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
- discretise_method : str [optional] - if and how to discretise
incoming continuous data, can be 'max_ent' for maximum entropy
binning, 'equal' for equal size bins, and 'none' if no binning is
required (default='none')
- n_discrete_bins : int [optional] - number of discrete bins/
levels or the base of each dimension of the discrete variables
(default=2). If set, this parameter overwrites/sets alph. (>= 2)
- alph : int [optional] - number of discrete bins/levels for var1
(default=2 , or the value set for n_discrete_bins). (>= 2)
"""
def __init__(self, settings):
settings = self._check_settings(settings)
try:
settings['history']
except KeyError:
raise RuntimeError('No history was provided for AIS estimation.')
assert type(settings['history']) is int, (
'History has to be an integer.')
assert settings['history'] >= 0, 'History must be >= 0'
# Get alphabet sizes and check if discretisation is requested
try:
n_discrete_bins = int(settings['n_discrete_bins'])
settings['alph'] = n_discrete_bins
except KeyError:
pass # Do nothing and use the default for alph set below
settings.setdefault('alph', int(2))
assert settings['alph'] >= 2, 'Number of bins must be >= 2'
super().__init__(settings)
# Start JAVA virtual machine and create JAVA object.
self._start_jvm()
CalcClass = (jp.JPackage('infodynamics.measures.discrete').
ActiveInformationCalculatorDiscrete)
self.calc = CalcClass()
self.calc.setDebug(self.settings['debug'])
[docs] def estimate(self, process, return_calc=False):
"""Estimate active information storage.
Args:
process : numpy array
realisations as either a 2D numpy array where array dimensions
represent [realisations x variable dimension] or a 1D array
representing [realisations], array type can be float (requires
discretisation) or int
return_calc : boolean
return the calculator used here as well as the numeric
calculated value(s)
Returns:
float | numpy array
average AIS over all samples or local AIS for individual
samples if 'local_values'=True
Java object
JIDT calculator that was used here. Only returned if
return_calc was set.
Raises:
ex.JidtOutOfMemoryError
Raised when JIDT object cannot be instantiated due to mem error
"""
process = self._ensure_one_dim_input(process)
# Now discretise if required
if self.settings['discretise_method'] == 'none':
assert issubclass(process.dtype.type, np.integer), (
'Process is not an integer numpy array. '
'Discretise data to use this estimator.')
assert min(process) >= 0, 'Minimum of process is smaller than 0.'
assert max(process) < self.settings['alph'], (
'Maximum of process is larger than the alphabet size.')
if self.settings['alph'] < np.unique(process).shape[0]:
raise RuntimeError('The process'' alphabet size does not match'
' the no. unique elements in the process.')
elif self.settings['discretise_method'] == 'equal':
process = utils.discretise(process, self.settings['alph'])
elif self.settings['discretise_method'] == 'max_ent':
process = utils.discretise_max_ent(process, self.settings['alph'])
else:
pass # don't discretise at all, assume data to be discrete
# And finally make the AIS calculation:
try:
self.calc.initialise(
self.settings['alph'], self.settings['history'])
except:
# Handles both jp.JException (JPype v0.7) and jp.JavaException
# (JPype < v0.7). Only possible exception that can be raised here
# (if self.settings['alph'] >= 2) is a Java OutOfMemoryException:
assert(self.settings['alph'] >= 2)
raise ex.JidtOutOfMemoryError(
'Cannot instantiate JIDT AIS discrete estimator with alph = {}'
' and history = {}. Try re-running increasing Java heap '
'size.'.format(
self.settings['alph'], self.settings['history']))
# Unfortunately no faster way to pass numpy arrays in than this list
# conversion
self.calc.addObservations(jp.JArray(jp.JInt, 1)(process.tolist()))
if self.settings['local_values']:
result = np.array(self.calc.computeLocalFromPreviousObservations(
jp.JArray(jp.JInt, 1)(process.tolist())))
else:
result = float(self.calc.computeAverageLocalOfObservations())
if return_calc:
return (result, self.calc)
else:
return result
[docs] def get_analytic_distribution(self, process):
"""Return a JIDT AnalyticNullDistribution object.
Required so that our estimate_surrogates_analytic method can use the
common_estimate_surrogates_analytic() method, where data is formatted
as per the estimate method for this estimator.
Args:
process : numpy array
realisations as either a 2D numpy array where array dimensions
represent [realisations x variable dimension] or a 1D array
representing [realisations], array type can be float (requires
discretisation) or int
Returns:
Java object
JIDT calculator that was used here
"""
# Make one estimate to prepare the calculator:
(est, jidt_calc) = self.estimate(process, True)
return jidt_calc.computeSignificance()
[docs]class JidtGaussianAIS(JidtGaussian):
"""Calculate active information storage with JIDT's Gaussian implementation.
Calculate active information storage (AIS) for some process using JIDT's
implementation of the Gaussian estimator. AIS is defined as the
mutual information between the processes' past state and current value.
The past state needs to be defined in the settings dictionary, where a past
state is defined as a uniform embedding with parameters history and tau.
The history describes the number of samples taken from a processes' past,
tau describes the embedding delay, i.e., the spacing between every two
samples from the processes' past.
See parent class for references.Results are returned in nats.
Args:
settings : dict
sets estimation parameters:
- history : int - number of samples in the processes' past used as
embedding
- tau : int [optional] - the processes' embedding delay (default=1)
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
Note:
Some technical details: JIDT normalises over realisations, IDTxl
normalises over raw data once, outside the AIS estimator to save
computation time. The Theiler window ignores trial boundaries. The
AIS estimator does add noise to the data as a default. To make analysis
runs replicable set noise_level to 0.
"""
def __init__(self, settings):
settings = self._check_settings(settings)
# Check for history for AIS estimation.
try:
settings['history']
except KeyError:
raise RuntimeError('No history was provided for AIS estimation.')
settings.setdefault('tau', 1)
assert type(settings['history']) is int, (
'History has to be an integer.')
assert type(settings['tau']) is int, ('Tau has to be an integer.')
# Start JAVA virtual machine and create JAVA object.
self._start_jvm()
CalcClass = (jp.JPackage('infodynamics.measures.continuous.gaussian').
ActiveInfoStorageCalculatorGaussian)
super().__init__(CalcClass, settings)
[docs] def estimate(self, process):
"""Estimate active information storage.
Args:
process : numpy array
realisations of first variable, either a 2D numpy array where
array dimensions represent [realisations x variable dimension]
or a 1D array representing [realisations]
Returns:
float | numpy array
average AIS over all samples or local AIS for individual
samples if 'local_values'=True
"""
process = self._ensure_one_dim_input(process)
self.calc.initialise(self.settings['history'], self.settings['tau'])
self.calc.setObservations(process)
if self.settings['local_values']:
return np.array(self.calc.computeLocalOfPreviousObservations())
else:
return float(self.calc.computeAverageLocalOfObservations())
[docs]class JidtGaussianMI(JidtGaussian):
"""Calculate mutual information with JIDT's Gaussian implementation.
Calculate the mutual information between two variables. Call JIDT via jpype
and use the Gaussian estimator. See parent class for references.
Results are returned in nats.
Args:
settings : dict [optional]
sets estimation parameters:
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
- lag_mi : int [optional] - time difference in samples to calculate
the lagged MI between processes (default=0)
Note:
Some technical details: JIDT normalises over realisations, IDTxl
normalises over raw data once, outside the MI estimator to save
computation time. The Theiler window ignores trial boundaries. The
MI estimator does add noise to the data as a default. To make analysis
runs replicable set noise_level to 0.
"""
def __init__(self, settings=None):
settings = self._check_settings(settings)
# Start JAVA virtual machine and create JAVA object.
self._start_jvm()
CalcClass = (jp.JPackage('infodynamics.measures.continuous.gaussian').
MutualInfoCalculatorMultiVariateGaussian)
super().__init__(CalcClass, settings)
# Add lag between input variables. Setting the lag in JIDT didn't work,
# shift variables when calling the estimate method instead.
self.settings.setdefault('lag_mi', int(0))
# self.calc.setProperty('PROP_TIME_DIFF', str(self.settings['lag_mi']))
[docs] def estimate(self, var1, var2):
"""Estimate mutual information.
Args:
var1 : numpy array
realisations of first variable, either a 2D numpy array where
array dimensions represent [realisations x variable dimension]
or a 1D array representing [realisations]
var2 : numpy array
realisations of the second variable (similar to var1)
Returns:
float | numpy array
average MI over all samples or local MI for individual
samples if 'local_values'=True
"""
var1 = self._ensure_two_dim_input(var1)
var2 = self._ensure_two_dim_input(var2)
# Shift variables to calculate a lagged MI.
if self.settings['lag_mi'] > 0:
var1 = var1[:-self.settings['lag_mi'], :]
var2 = var2[self.settings['lag_mi']:, :]
self.calc.initialise(var1.shape[1], var2.shape[1])
self.calc.setObservations2D(var1, var2)
if self.settings['local_values']:
return np.array(self.calc.computeLocalOfPreviousObservations())
else:
return float(self.calc.computeAverageLocalOfObservations())
[docs]class JidtGaussianCMI(JidtGaussian):
"""Calculate conditional mutual infor with JIDT's Gaussian implementation.
Computes the differential conditional mutual information of two
multivariate sets of observations, conditioned on another, assuming that
the probability distribution function for these observations is a
multivariate Gaussian distribution.
Call JIDT via jpype and use
ConditionalMutualInfoCalculatorMultiVariateGaussian estimator.
If no conditional is given (is None), the function returns the mutual
information between var1 and var2.
See parent class for references. Results are returned in nats.
Args:
settings : dict [optional]
sets estimation parameters:
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
Note:
Some technical details: JIDT normalises over realisations, IDTxl
normalises over raw data once, outside the CMI estimator to save
computation time. The Theiler window ignores trial boundaries. The
CMI estimator does add noise to the data as a default. To make analysis
runs replicable set noise_level to 0.
"""
def __init__(self, settings=None):
settings = self._check_settings(settings)
# Start JAVA virtual machine and create JAVA object.
self._start_jvm()
CalcClass = (jp.JPackage('infodynamics.measures.continuous.gaussian').
ConditionalMutualInfoCalculatorMultiVariateGaussian)
super().__init__(CalcClass, settings)
self.est_mi = None
[docs] def estimate(self, var1, var2, conditional=None):
"""Estimate conditional mutual information.
Args:
var1 : numpy array
realisations of first variable, either a 2D numpy array where
array dimensions represent [realisations x variable dimension]
or a 1D array representing [realisations]
var2 : numpy array
realisations of the second variable (similar to var1)
conditional : numpy array [optional]
realisations of the conditioning variable (similar to var), if
no conditional is provided, return MI between var1 and var2
Returns:
float | numpy array
average CMI over all samples or local CMI for individual
samples if 'local_values'=True
"""
# Return MI if no conditioning variable was provided.
if conditional is None:
if (self.est_mi is None):
self.est_mi = JidtGaussianMI(self.settings)
return self.est_mi.estimate(var1, var2)
else:
assert(conditional.size != 0), 'Conditional Array is empty.'
var1 = self._ensure_two_dim_input(var1)
var2 = self._ensure_two_dim_input(var2)
cond = self._ensure_two_dim_input(conditional)
assert(var1.shape[0] == var2.shape[0]), (
'Unequal number of observations (var1: {0}, var2: {1}).'.format(
var1.shape[0], var2.shape[0]))
assert(var1.shape[0] == cond.shape[0]), (
'Unequal number of observations (var1: {0}, cond: {1}).'.format(
var1.shape[0], cond.shape[0]))
self.calc.initialise(var1.shape[1], var2.shape[1], cond.shape[1])
self.calc.setObservations2D(var1, var2, cond)
if self.settings['local_values']:
return np.array(self.calc.computeLocalOfPreviousObservations())
else:
return float(self.calc.computeAverageLocalOfObservations())
[docs] def get_analytic_distribution(self, var1, var2, conditional=None):
"""Return a JIDT AnalyticNullDistribution object.
Required so that our estimate_surrogates_analytic method can use the
common_estimate_surrogates_analytic() method, where data is formatted
as per the estimate method for this estimator.
Args:
var1 : numpy array
realisations of first variable, either a 2D numpy array where
array dimensions represent [realisations x variable dimension]
or a 1D array representing [realisations]
var2 : numpy array
realisations of the second variable (similar to var1)
conditional : numpy array [optional]
realisations of the conditioning variable (similar to var), if
no conditional is provided, return MI between var1 and var2
Returns:
Java object
JIDT calculator that was used here
"""
# Make one estimate to prepare the calculator:
self.estimate(var1, var2, conditional)
if (conditional is None):
return self.est_mi.calc.computeSignificance()
else:
return self.calc.computeSignificance()
[docs]class JidtKraskovTE(JidtKraskov):
"""Calculate transfer entropy with JIDT's Kraskov implementation.
Calculate transfer entropy between a source and a target variable using
JIDT's implementation of the Kraskov type 1 estimator. Transfer entropy is
defined as the conditional mutual information between the source's past
state and the target's current value, conditional on the target's past.
Past states need to be defined in the settings dictionary, where a past
state is defined as a uniform embedding with parameters history and tau.
The history describes the number of samples taken from a variable's past,
tau descrices the embedding delay, i.e., the spacing between every two
samples from the processes' past.
See parent class for references. Results are returned in nats.
Args:
settings : dict
sets estimation parameters:
- history_target : int - number of samples in the target's past
used as embedding
- history_source : int [optional] - number of samples in the
source's past used as embedding (default=same as the target
history)
- tau_source : int [optional] - source's embedding delay
(default=1)
- tau_target : int [optional] - target's embedding delay
(default=1)
- source_target_delay : int [optional] - information transfer delay
between source and target (default=1)
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
- algorithm_num : int [optional] - which Kraskov algorithm (1 or 2)
to use (default=1)
Note:
Some technical details: JIDT normalises over realisations, IDTxl
normalises over raw data once, outside the CMI estimator to save
computation time. The Theiler window ignores trial boundaries. The
CMI estimator does add noise to the data as a default. To make analysis
runs replicable set noise_level to 0.
"""
def __init__(self, settings):
settings = self._check_settings(settings)
# Start JAVA virtual machine.
self._start_jvm()
CalcClass = (jp.JPackage('infodynamics.measures.continuous.kraskov').
TransferEntropyCalculatorKraskov)
# Get embedding and delay parameters.
settings = self._set_te_defaults(settings)
super().__init__(CalcClass, settings)
[docs] def estimate(self, source, target):
"""Estimate transfer entropy from a source to a target variable.
Args:
source : numpy array
realisations of source variable, either a 2D numpy array where
array dimensions represent [realisations x variable dimension]
or a 1D array representing [realisations]
var2 : numpy array
realisations of target variable (similar to var1)
Returns:
float | numpy array
average TE over all samples or local TE for individual
samples if 'local_values'=True
"""
source = self._ensure_one_dim_input(source)
target = self._ensure_one_dim_input(target)
# Check if number of points is sufficient for estimation.
self._check_number_of_points(source.shape[0] -
self.settings['source_target_delay'])
self.calc.initialise(self.settings['history_target'],
self.settings['tau_target'],
self.settings['history_source'],
self.settings['tau_source'],
self.settings['source_target_delay'])
self.calc.setObservations(source, target)
if self.settings['local_values']:
return np.array(self.calc.computeLocalOfPreviousObservations())
else:
return float(self.calc.computeAverageLocalOfObservations())
[docs]class JidtDiscreteTE(JidtDiscrete):
"""Calculate TE with JIDT's implementation for discrete variables.
Calculate the transfer entropy between two time series processes.
Call JIDT via jpype and use the discrete estimator. Transfer entropy is
defined as the conditional mutual information between the source's past
state and the target's current value, conditional on the target's past.
See parent class for references.
Results are returned in bits.
Args:
settings : dict
sets estimation parameters:
- history_target : int - number of samples in the target's past
used as embedding. (>= 0)
- history_source : int [optional] - number of samples in the
source's past used as embedding (default=same as the target
history). (>= 1)
- tau_source : int [optional] - source's embedding delay
(default=1). (>= 1)
- tau_target : int [optional] - target's embedding delay
(default=1). (>= 1)
- source_target_delay : int [optional] - information transfer delay
between source and target (default=1) (>= 0)
- discretise_method : str [optional] - if and how to discretise
incoming continuous data, can be 'max_ent' for maximum entropy
binning, 'equal' for equal size bins, and 'none' if no binning is
required (default='none')
- n_discrete_bins : int [optional] - number of discrete bins/
levels or the base of each dimension of the discrete variables
(default=2). If set, this parameter overwrites/sets alph1 and
alph2. (>= 2)
- alph1 : int [optional] - number of discrete bins/levels for
source (default=2, or the value set for n_discrete_bins). (>= 2)
- alph2 : int [optional] - number of discrete bins/levels for
target (default=2, or the value set for n_discrete_bins). (>= 2)
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
"""
def __init__(self, settings):
settings = self._check_settings(settings)
# Get embedding and delay parameters.
settings = self._set_te_defaults(settings)
# Get alphabet sizes and check if discretisation is requested. Try to
# overwrite alphabet sizes with number of bins.
try:
n_discrete_bins = int(settings['n_discrete_bins'])
settings['alph1'] = n_discrete_bins
settings['alph2'] = n_discrete_bins
except KeyError:
# do nothing and set alphabet sizes to default below
pass
settings.setdefault('alph1', int(2))
settings.setdefault('alph2', int(2))
assert type(settings['alph1']) is int, (
'Num discrete levels for source has to be an integer.')
assert type(settings['alph2']) is int, (
'Num discrete levels for target has to be an integer.')
assert settings['alph1'] >= 2, (
'Num discrete levels for source must be >= 2')
assert settings['alph2'] >= 2, (
'Num discrete levels for target must be >= 2')
super().__init__(settings)
# Start JAVA virtual machine and create JAVA object.
self._start_jvm()
CalcClass = (jp.JPackage('infodynamics.measures.discrete').
TransferEntropyCalculatorDiscrete)
self.calc = CalcClass()
self.calc.setDebug(self.settings['debug'])
[docs] def estimate(self, source, target, return_calc=False):
"""Estimate transfer entropy from a source to a target variable.
Args:
source : numpy array
realisations of source variable, either a 2D numpy array where
array dimensions represent [realisations x variable dimension]
or a 1D array representing [realisations], array type can be
float (requires discretisation) or int
target : numpy array
realisations of target variable (similar to var1)
return_calc : boolean
return the calculator used here as well as the numeric
calculated value(s)
Returns:
float | numpy array
average TE over all samples or local TE for individual
samples if 'local_values'=True
Java object
JIDT calculator that was used here. Only returned if
return_calc was set.
Raises:
ex.JidtOutOfMemoryError
Raised when JIDT object cannot be instantiated due to mem error
"""
source = self._ensure_one_dim_input(source)
target = self._ensure_one_dim_input(target)
# Discretise variables if requested.
source, target = self._discretise_vars(source, target)
# And finally make the TE calculation:
max_base = max(self.settings['alph1'], self.settings['alph2'])
try:
self.calc.initialise(max_base,
self.settings['history_target'],
self.settings['tau_target'],
self.settings['history_source'],
self.settings['tau_source'],
self.settings['source_target_delay'])
except:
# Handles both jp.JException (JPype v0.7) and jp.JavaException
# (JPype < v0.7). Only possible exception that can be raised here
# (if max_base >= 2) is a Java OutOfMemoryException:
assert(max_base >= 2)
raise ex.JidtOutOfMemoryError(
'Cannot instantiate JIDT TE discrete estimator with max_base ='
' {} and history_target = {} and history_source = {}. Try '
're-running increasing Java heap size.'.format(
max_base,
self.settings['history_target'],
self.settings['history_source']))
# Unfortunately no faster way to pass numpy arrays in than this list
# conversion
self.calc.addObservations(
jp.JArray(jp.JInt, 1)(source.tolist()),
jp.JArray(jp.JInt, 1)(target.tolist()))
if self.settings['local_values']:
result = np.array(self.calc.computeLocalFromPreviousObservations(
jp.JArray(jp.JInt, 1)(source.tolist()),
jp.JArray(jp.JInt, 1)(target.tolist())))
else:
result = float(self.calc.computeAverageLocalOfObservations())
if return_calc:
return (result, self.calc)
else:
return result
[docs] def get_analytic_distribution(self, source, target):
"""Return a JIDT AnalyticNullDistribution object.
Required so that our estimate_surrogates_analytic method can use the
common_estimate_surrogates_analytic() method, where data is formatted
as per the estimate method for this estimator.
Args:
source : numpy array
realisations of source variable, either a 2D numpy array where
array dimensions represent [realisations x variable dimension]
or a 1D array representing [realisations], array type can be
float (requires discretisation) or int
target : numpy array
realisations of target variable (similar to var1)
Returns:
Java object
JIDT calculator that was used here
"""
# Make one estimate to prepare the calculator:
(est, jidt_calc) = self.estimate(source, target, True)
return jidt_calc.computeSignificance()
[docs]class JidtGaussianTE(JidtGaussian):
"""Calculate transfer entropy with JIDT's Gaussian implementation.
Calculate transfer entropy between a source and a target variable using
JIDT's implementation of the Gaussian estimator. Transfer entropy is
defined as the conditional mutual information between the source's past
state and the target's current value, conditional on the target's past.
Past states need to be defined in the settings dictionary, where a past
state is defined as a uniform embedding with parameters history and tau.
The history describes the number of samples taken from a variable's past,
tau descrices the embedding delay, i.e., the spacing between every two
samples from the processes' past.
See parent class for references. Results are returned in nats.
Args:
settings : dict
sets estimation parameters:
- history_target : int - number of samples in the target's past
used as embedding
- history_source : int [optional] - number of samples in the
source's past used as embedding (default=same as the target
history)
- tau_source : int [optional] - source's embedding delay
(default=1)
- tau_target : int [optional] - target's embedding delay
(default=1)
- source_target_delay : int [optional] - information transfer delay
between source and target (default=1)
- debug : bool [optional] - return debug information when calling
JIDT (default=False)
- local_values : bool [optional] - return local TE instead of
average TE (default=False)
Note:
Some technical details: JIDT normalises over realisations, IDTxl
normalises over raw data once, outside the CMI estimator to save
computation time. The Theiler window ignores trial boundaries. The
CMI estimator does add noise to the data as a default. To make analysis
runs replicable set noise_level to 0.
"""
def __init__(self, settings):
settings = self._check_settings(settings)
# Start JAVA virtual machine and create JAVA object.
self._start_jvm()
CalcClass = (jp.JPackage('infodynamics.measures.continuous.gaussian').
TransferEntropyCalculatorGaussian)
# Get embedding and delay parameters.
settings = self._set_te_defaults(settings)
super().__init__(CalcClass, settings)
[docs] def estimate(self, source, target):
"""Estimate transfer entropy from a source to a target variable.
Args:
source : numpy array
realisations of source variable, either a 2D numpy array where
array dimensions represent [realisations x variable dimension]
or a 1D array representing [realisations]
var2 : numpy array
realisations of target variable (similar to var1)
Returns:
float | numpy array
average TE over all samples or local TE for individual
samples if 'local_values'=True
"""
source = self._ensure_one_dim_input(source)
target = self._ensure_one_dim_input(target)
self.calc.initialise(self.settings['history_target'],
self.settings['tau_target'],
self.settings['history_source'],
self.settings['tau_source'],
self.settings['source_target_delay'])
self.calc.setObservations(source, target)
if self.settings['local_values']:
return np.array(self.calc.computeLocalOfPreviousObservations())
else:
return float(self.calc.computeAverageLocalOfObservations())
[docs]def common_estimate_surrogates_analytic(estimator, n_perm=200, **data):
"""Estimate the surrogate distribution analytically for JidtEstimator.
Estimate the surrogate distribution analytically for a JidtEstimator
which is_analytic_null_estimator(), by sampling estimates at random
p-values in the analytic distribution.
Args:
estimator : a JidtEstimator object, which returns True to a call to
its is_analytic_null_estimator() method
n_perms : int
number of permutations (default=200)
data : numpy arrays
realisations of random variables required for the calculation
(varies between estimators, e.g. 2 variables for MI, 3 for CMI)
Returns:
float | numpy array
n_perm surrogates of the average MI/CMI/TE over all samples
under the null hypothesis of no relationship between var1 and
var2 (in the context of conditional)
"""
# Compute the statistical significance of the estimate to get an
# AnalyticMeasurementDistribution object:
analytic_distribution = estimator.get_analytic_distribution(**data)
# Then compute surrogates at n_perm random p-values
surrogate_estimates = np.empty(n_perm)
for perm in range(n_perm):
surrogate_estimates[perm] = \
analytic_distribution.computeEstimateForGivenPValue(
np.random.random())
return surrogate_estimates