Source code for idtxl.estimators_jidt

"""Provide JIDT estimators."""
from pkg_resources import resource_filename
import numpy as np
from abc import abstractmethod
from idtxl.estimator import Estimator
from . import idtxl_exceptions as ex
from . import idtxl_utils as utils
try:
    import jpype as jp
except ImportError as err:
    ex.package_missing(err, 'Jpype is not available on this system. Install it'
                            ' from https://pypi.python.org/pypi/JPype1 to use '
                            'JAVA/JIDT-powered CMI estimation.')


[docs]class JidtEstimator(Estimator):
    """Abstract class for implementation of JIDT estimators.

    Abstract class for implementation of JIDT estimators, child classes
    implement estimators for mutual information (MI), conditional mutual
    information (CMI), active information storage (AIS), transfer entropy (TE)
    using the Kraskov-Grassberger-Stoegbauer estimator for continuous data,
    plug-in estimators for discrete data, and Gaussian estimators for
    continuous Gaussian data.

    References:

    - Lizier, Joseph T. (2014). JIDT: an information-theoretic toolkit for
      studying the dynamics of complex systems. Front Robot AI, 1(11).
    - Kraskov, A., Stoegbauer, H., & Grassberger, P. (2004). Estimating mutual
      information. Phys Rev E, 69(6), 066138.
    - Lizier, Joseph T., Mikhail Prokopenko, and Albert Y. Zomaya. (2012).
      Local measures of information storage in complex distributed computation.
      Inform Sci, 208, 39-54.
    - Schreiber, T. (2000). Measuring information transfer. Phys Rev Lett,
      85(2), 461.

    Set common estimation parameters for JIDT estimators. For usage of these
    estimators see documentation for the child classes.

    Args:
        settings : dict [optional]
            set estimator parameters:

            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)
    """

    def __init__(self, settings=None):
        """Set default estimator settings."""
        settings.setdefault('local_values', False)
        settings.setdefault('debug', False)
        self.settings = settings.copy()

    def _start_jvm(self):
        """Start JAVA virtual machine if it is not running."""
        jar_location = resource_filename(__name__, 'infodynamics.jar')
        if not jp.isJVMStarted():
            jp.startJVM(jp.getDefaultJVMPath(), '-ea', ('-Djava.class.path=' +
                                                        jar_location))

    def _set_te_defaults(self, settings):
        """Set defaults for transfer entropy estimation."""
        try:
            history_target = settings['history_target']
        except KeyError:
            raise RuntimeError('No target history was provided for TE '
                               'estimation.')
        settings.setdefault('history_source', history_target)
        settings.setdefault('tau_target', 1)
        settings.setdefault('tau_source', 1)
        settings.setdefault('source_target_delay', 1)

        assert type(settings['tau_target']) is int, (
            'Target tau has to be an integer.')
        assert type(settings['tau_source']) is int, (
            'Source tau has to be an integer.')
        assert type(settings['history_target']) is int, (
            'Target history has to be an integer.')
        assert type(settings['history_source']) is int, (
            'Source history has to be an integer.')
        assert type(settings['source_target_delay']) is int, (
            'Source-target delay has to be an integer.')
        assert settings['tau_target'] >= 1, 'Target tau must be >= 1'
        assert settings['tau_source'] >= 1, 'Source tau must be >= 1'
        assert settings['history_target'] >= 0, 'Target history must be >= 0'
        assert settings['history_source'] >= 1, 'Source history must be >= 1'
        assert settings['source_target_delay'] >= 0, (
            'Source-target delay must be >= 0')
        return settings

[docs]    def is_parallel(self):
        return False


[docs]class JidtKraskov(JidtEstimator):
    """Abstract class for implementation of JIDT Kraskov-estimators.

    Abstract class for implementation of JIDT Kraskov-estimators, child classes
    implement estimators for mutual information (MI), conditional mutual
    information (CMI), actice information storage (AIS), transfer entropy (TE)
    using the Kraskov-Grassberger-Stoegbauer estimator for continuous data.
    See parent class for references.

    Set common estimation parameters for JIDT Kraskov-estimators. For usage of
    these estimators see documentation for the child classes.

    Args:
        CalcClass : JAVA class
            JAVA class returned by jpype.JPackage
        settings : dict [optional]
            set estimator parameters:

            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)
            - kraskov_k : int [optional] - no. nearest neighbours for KNN
              search (default=4)
            - normalise : bool [optional] - z-standardise data (default=False)
            - theiler_t : int [optional] - no. next temporal neighbours ignored
              in KNN and range searches (default=0)
            - noise_level : float [optional] - random noise added to the data
              (default=1e-8)
            - num_threads : int | str [optional] - number of threads used for
              estimation (default='USE_ALL', note that this uses *all*
              available threads on the current machine)
            - algorithm_num : int [optional] - which Kraskov algorithm (1 or 2)
              to use (default=1). Only applied at this method for TE and AIS
              (is already applied for MI/CMI). Note that default algorithm of 1
              here is different to the default ALG_NUM argument for the JIDT
              AIS KSG estimator.
    """

    def __init__(self, CalcClass, settings=None):

        # Set default estimator settings.
        settings.setdefault('kraskov_k', str(4))
        settings.setdefault('normalise', 'false')
        settings.setdefault('theiler_t', str(0))
        settings.setdefault('noise_level', 1e-8)
        settings.setdefault('num_threads', 'USE_ALL')
        settings.setdefault('algorithm_num', 1)
        assert type(settings['algorithm_num']) is int, (
            'Algorithm number must be an integer.')
        assert (settings['algorithm_num'] == 1) or (settings['algorithm_num'] == 2), (
            'Algorithm number must be 1 or 2')
        super().__init__(settings)

        # Set properties of JIDT's estimator object.
        self.calc = CalcClass()
        self.calc.setProperty('ALG_NUM', str(self.settings['algorithm_num']))
        self.calc.setProperty('NORMALISE',
                              str(self.settings['normalise']).lower())
        self.calc.setProperty('k', str(self.settings['kraskov_k']))
        self.calc.setProperty('DYN_CORR_EXCL', str(self.settings['theiler_t']))
        self.calc.setProperty('NOISE_LEVEL_TO_ADD',
                              str(self.settings['noise_level']))
        self.calc.setProperty('NUM_THREADS', str(self.settings['num_threads']))
        self.calc.setDebug(self.settings['debug'])

[docs]    def is_analytic_null_estimator(self):
        return False


[docs]class JidtDiscrete(JidtEstimator):
    """Abstract class for implementation of discrete JIDT-estimators.

    Abstract class for implementation of plug-in JIDT-estimators for discrete
    data. Child classes implement estimators for mutual information (MI),
    conditional mutual information (CMI), actice information storage (AIS), and
    transfer entropy (TE). See parent class for references.

    Set common estimation parameters for discrete JIDT-estimators. For usage of
    these estimators see documentation for the child classes.

    Args:
        settings : dict [optional]
            set estimator parameters:

            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)
            - discretise_method : str [optional] - if and how to discretise
              incoming continuous data, can be 'max_ent' for maximum entropy
              binning, 'equal' for equal size bins, and 'none' if no binning is
              required (default='none')

    Note:
        Discrete JIDT estimators require the data's alphabet size for
        instantiation. Hence, opposed to the Kraskov and Gaussian estimators,
        the JAVA class is added to the object instance, while for Kraskov/
        Gaussian estimators an instance of that class is added (because for the
        latter, objects can be instantiated independent of data properties).
    """

    def __init__(self, settings):
        settings.setdefault('discretise_method', 'none')
        super().__init__(settings)

    def _discretise_vars(self, var1, var2, conditional=None):
        # Discretise variables if requested. Otherwise assert data are discrete
        # and provided alphabet sizes are correct.
        if self.settings['discretise_method'] == 'equal':
            var1 = utils.discretise(var1, self.settings['alph1'])
            var2 = utils.discretise(var2, self.settings['alph2'])
            if conditional is not None:
                conditional = utils.discretise(conditional,
                                               self.settings['alphc'])

        elif self.settings['discretise_method'] == 'max_ent':
            var1 = utils.discretise_max_ent(var1, self.settings['alph1'])
            var2 = utils.discretise_max_ent(var2, self.settings['alph2'])
            if not (conditional is None):
                conditional = utils.discretise_max_ent(conditional,
                                                       self.settings['alphc'])

        elif self.settings['discretise_method'] == 'none':
            assert issubclass(var1.dtype.type, np.integer), (
                'Var1 is not an integer numpy array. '
                'Discretise data to use this estimator.')
            assert issubclass(var2.dtype.type, np.integer), (
                'Var2 is not an integer numpy array. '
                'Discretise data to use this estimator.')
            assert np.min(var1) >= 0, 'Minimum of var1 is smaller than 0.'
            assert np.min(var2) >= 0, 'Minimum of var2 is smaller than 0.'
            assert np.max(var1) < self.settings['alph1'], (
                        'Maximum of var1 is larger than the alphabet size.')
            assert np.max(var2) < self.settings['alph2'], (
                        'Maximum of var2 is larger than the alphabet size.')
            if conditional is not None:
                assert np.min(conditional) >= 0, (
                        'Minimum of conditional is smaller than 0.')
                assert issubclass(conditional.dtype.type, np.integer), (
                    'Conditional is not an integer numpy array. '
                    'Discretise data to use this estimator.')
                assert np.max(conditional) < self.settings['alphc'], (
                    'Maximum of conditional is larger than the alphabet size.')
        else:
            raise ValueError('Unkown discretisation method.')

        if conditional is not None:
            return var1, var2, conditional
        else:
            return var1, var2

[docs]    def is_analytic_null_estimator(self):
        return True

[docs]    @abstractmethod
    def get_analytic_distribution(self, **data):
        """Return a JIDT AnalyticNullDistribution object.

        Required so that our estimate_surrogates_analytic method can use the
        common_estimate_surrogates_analytic() method, where data is formatted
        as per the estimate method for this estimator.

        Args:
            data : numpy arrays
                realisations of random variables required for the calculation
                (varies between estimators, e.g. 2 variables for MI, 3 for
                CMI). Formatted as per the estimate method for this estimator.

        Returns:
            Java object
                JIDT calculator that was used here
        """
        pass

[docs]    def estimate_surrogates_analytic(self, n_perm=200, **data):
        """Return estimate of the analytical surrogate distribution.

        This method must be implemented because this class'
        is_analytic_null_estimator() method returns true.

        Args:
            n_perms : int [optional]
                number of permutations (default=200)
            data : numpy arrays
                realisations of random variables required for the calculation
                (varies between estimators, e.g. 2 variables for MI, 3 for
                CMI). Formatted as per the estimate method for this estimator.

        Returns:
            float | numpy array
                n_perm surrogates of the average MI/CMI/TE over all samples
                under the null hypothesis of no relationship between var1 and
                var2 (in the context of conditional)
        """
        return common_estimate_surrogates_analytic(self, n_perm, **data)


[docs]class JidtGaussian(JidtEstimator):
    """Abstract class for implementation of JIDT Gaussian-estimators.

    Abstract class for implementation of JIDT Gaussian-estimators, child
    classes implement estimators for mutual information (MI), conditional
    mutual information (CMI), actice information storage (AIS), transfer
    entropy (TE) using JIDT's Gaussian estimator for continuous data. See
    parent class for references.

    Set common estimation parameters for JIDT Kraskov-estimators. For usage of
    these estimators see documentation for the child classes.

    Results are returned in nats.

    Args:
        CalcClass : JAVA class
            JAVA class returned by jpype.JPackage
        settings : dict [optional]
            set estimator parameters:

            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)
    """

    def __init__(self, CalcClass, settings):
        super().__init__(settings)
        self.calc = CalcClass()
        self.calc.setDebug(self.settings['debug'])

[docs]    def is_analytic_null_estimator(self):
        return True

[docs]    def get_analytic_distribution(self, **data):
        """Return a JIDT AnalyticNullDistribution object.

        Required so that our estimate_surrogates_analytic method can use the
        common_estimate_surrogates_analytic() method, where data is formatted
        as per the estimate method for this estimator.

        Args:
            data : numpy arrays
                realisations of random variables required for the calculation
                (varies between estimators, e.g. 2 variables for MI, 3 for
                CMI). Formatted as per the estimate method for this estimator.

        Returns:
            Java object
                JIDT calculator that was used here
        """
        # Make one estimate to prepare the calculator:
        self.estimate(**data)
        return self.calc.computeSignificance()

[docs]    def estimate_surrogates_analytic(self, n_perm=200, **data):
        """Estimate the surrogate distribution analytically.
        This method must be implemented because this class'
        is_analytic_null_estimator() method returns true

        Args:
            n_perms : int
                number of permutations (default=200)
            data : numpy arrays
                realisations of random variables required for the calculation
                (varies between estimators, e.g. 2 variables for MI, 3 for
                CMI). Formatted as per estimate_parallel for this estimator.

        Returns:
            float | numpy array
                n_perm surrogates of the average MI/CMI/TE over all samples
                under the null hypothesis of no relationship between var1 and
                var2 (in the context of conditional)
        """
        return common_estimate_surrogates_analytic(self, n_perm, **data)


[docs]class JidtKraskovCMI(JidtKraskov):
    """Calculate conditional mutual inform with JIDT's Kraskov implementation.

    Calculate the conditional mutual information (CMI) between three variables.
    Call JIDT via jpype and use the Kraskov 1 estimator. If no conditional is
    given (is None), the function returns the mutual information between var1
    and var2. See parent class for references.

     Results are returned in nats.

    Args:
        settings : dict [optional]
            set estimator parameters:

            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)
            - kraskov_k : int [optional] - no. nearest neighbours for KNN
              search (default=4)
            - normalise : bool [optional] - z-standardise data (default=False)
            - theiler_t : int [optional] - no. next temporal neighbours ignored
              in KNN and range searches (default=0)
            - noise_level : float [optional] - random noise added to the data
              (default=1e-8)
            - num_threads : int | str [optional] - number of threads used for
              estimation (default='USE_ALL', note that this uses *all*
              available threads on the current machine)
            - algorithm_num : int [optional] - which Kraskov algorithm (1 or 2)
              to use (default=1)
    Note:
        Some technical details: JIDT normalises over realisations, IDTxl
        normalises over raw data once, outside the CMI estimator to save
        computation time. The Theiler window ignores trial boundaries. The
        CMI estimator does add noise to the data as a default. To make analysis
        runs replicable set noise_level to 0.
    """

    def __init__(self, settings=None):
        settings = self._check_settings(settings)
        # Start JAVA virtual machine and create JAVA object.
        self._start_jvm()
        settings.setdefault('algorithm_num', 1)
        assert type(settings['algorithm_num']) is int, (
            'Algorithm number must be an integer.')
        assert (settings['algorithm_num'] == 1) or (settings['algorithm_num'] == 2), (
            'Algorithm number must be 1 or 2')
        if (settings['algorithm_num'] == 1):
            CalcClass = (
                jp.JPackage('infodynamics.measures.continuous.kraskov').
                ConditionalMutualInfoCalculatorMultiVariateKraskov1)
        else:
            CalcClass = (
                jp.JPackage('infodynamics.measures.continuous.kraskov').
                ConditionalMutualInfoCalculatorMultiVariateKraskov2)
        super().__init__(CalcClass, settings)

[docs]    def estimate(self, var1, var2, conditional=None):
        """Estimate conditional mutual information.

        Args:
            var1 : numpy array
                realisations of first variable, either a 2D numpy array where
                array dimensions represent [realisations x variable dimension]
                or a 1D array representing [realisations]
            var2 : numpy array
                realisations of the second variable (similar to var1)
            conditional : numpy array [optional]
                realisations of the conditioning variable (similar to var), if
                no conditional is provided, return MI between var1 and var2

        Returns:
            float | numpy array
                average CMI over all samples or local CMI for individual
                samples if 'local_values'=True
        """
        # Return MI if no conditional was provided.
        if conditional is None:
            est_mi = JidtKraskovMI(self.settings)
            return est_mi.estimate(var1, var2)
        else:
            assert(conditional.size != 0), 'Conditional Array is empty.'

        # Check if variable realisations are passed as 1D or 2D arrays and have
        # equal no. observations.
        var1 = self._ensure_two_dim_input(var1)
        var2 = self._ensure_two_dim_input(var2)
        cond = self._ensure_two_dim_input(conditional)
        assert(var1.shape[0] == var2.shape[0]), (
            'Unequal number of observations (var1: {0}, var2: {1}).'.format(
                var1.shape[0], var2.shape[0]))
        assert(var1.shape[0] == cond.shape[0]), (
            'Unequal number of observations (var1: {0}, cond: {1}).'.format(
                var1.shape[0], cond.shape[0]))

        # Check if number of points is sufficient for estimation.
        self._check_number_of_points(var1.shape[0])

        self.calc.initialise(var1.shape[1], var2.shape[1], cond.shape[1])
        self.calc.setObservations2D(var1, var2, cond)
        if self.settings['local_values']:
            return np.array(self.calc.computeLocalOfPreviousObservations())
        else:
            return float(self.calc.computeAverageLocalOfObservations())


[docs]class JidtDiscreteCMI(JidtDiscrete):
    """Calculate CMI with JIDT's implementation for discrete variables.

    Calculate the conditional mutual information between two variables given
    the third. Call JIDT via jpype and use the discrete estimator. See parent
    class for references.

    Results are returned in bits.

    Args:
        settings : dict [optional]
            sets estimation parameters:

            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)
            - discretise_method : str [optional] - if and how to discretise
              incoming continuous data, can be 'max_ent' for maximum entropy
              binning, 'equal' for equal size bins, and 'none' if no binning is
              required (default='none')
            - n_discrete_bins : int [optional] - number of discrete bins/
              levels or the base of each dimension of the discrete variables
              (default=2). If set, this parameter overwrites/sets alph1, alph2
              and alphc
            - alph1 : int [optional] - number of discrete bins/levels for var1
              (default=2, or the value set for n_discrete_bins)
            - alph2 : int [optional] - number of discrete bins/levels for var2
              (default=2, or the value set for n_discrete_bins)
            - alphc : int [optional] - number of discrete bins/levels for
              conditional (default=2, or the value set for n_discrete_bins)
    """

    def __init__(self, settings=None):
        settings = self._check_settings(settings)
        # Set default alphabet sizes. Try to overwrite alphabet sizes with
        # number of bins for discretisation if provided, otherwise assume
        # binary variables.
        try:
            n_discrete_bins = int(settings['n_discrete_bins'])
            settings['alph1'] = n_discrete_bins
            settings['alph2'] = n_discrete_bins
            settings['alphc'] = n_discrete_bins
        except KeyError:
            pass  # Do nothing and use the default for alph_* set below
        settings.setdefault('alph1', int(2))
        settings.setdefault('alph2', int(2))
        settings.setdefault('alphc', int(2))
        super().__init__(settings)

        # Start JAVA virtual machine and create JAVA object. Add JAVA object to
        # instance
        self._start_jvm()
        CalcClass = (jp.JPackage('infodynamics.measures.discrete').
                     ConditionalMutualInformationCalculatorDiscrete)
        self.calc = CalcClass()
        self.calc.setDebug(self.settings['debug'])

        # Keep a reference to an MI calculator if we need to use it (memory
        # used here is minimal, and better than recreating it each time)
        self.mi_calc = JidtDiscreteMI(self.settings)

[docs]    def estimate(self, var1, var2, conditional=None, return_calc=False):
        """Estimate conditional mutual information.

        Args:
            var1 : numpy array
                realisations of first variable, either a 2D numpy array where
                array dimensions represent [realisations x variable dimension]
                or a 1D array representing [realisations], array type can be
                float (requires discretisation) or int
            var2 : numpy array
                realisations of the second variable (similar to var1)
            conditional : numpy array [optional]
                realisations of the conditioning variable (similar to var), if
                no conditional is provided, return MI between var1 and var2
            return_calc : boolean
                return the calculator used here as well as the numeric
                calculated value(s)

        Returns:
            float | numpy array
                average CMI over all samples or local CMI for individual
                samples if 'local_values'=True
            Java object
                JIDT calculator that was used here. Only returned if
                return_calc was set.

        Raises:
            ex.JidtOutOfMemoryError
                Raised when JIDT object cannot be instantiated due to mem error

        """
        # Calculate an MI if no conditional was provided
        if (conditional is None) or (self.settings['alphc'] == 0):
            # Return value will be just the estimate if return_calc is False,
            #  or estimate plus the JIDT MI calculator if return_calc is True:
            return self.mi_calc.estimate(var1, var2, return_calc)
        else:
            assert(conditional.size != 0), 'Conditional Array is empty.'

        # Check and remember the no. dimensions for each variable before
        # collapsing them into univariate arrays later.
        var1 = self._ensure_two_dim_input(var1)
        var2 = self._ensure_two_dim_input(var2)
        conditional = self._ensure_two_dim_input(conditional)
        var1_dim = var1.shape[1]
        var2_dim = var2.shape[1]
        cond_dim = conditional.shape[1]

        # Discretise if requested.
        var1, var2, conditional = self._discretise_vars(var1, var2,
                                                        conditional)

        # Then collapse any mulitvariates into univariate arrays:
        var1 = utils.combine_discrete_dimensions(var1, self.settings['alph1'])
        var2 = utils.combine_discrete_dimensions(var2, self.settings['alph2'])
        conditional = utils.combine_discrete_dimensions(conditional,
                                                        self.settings['alphc'])

        # We have a non-trivial conditional, so make a proper conditional MI
        # calculation
        alph1_base = int(np.power(self.settings['alph1'], var1_dim))
        alph2_base = int(np.power(self.settings['alph2'], var2_dim))
        cond_base = int(np.power(self.settings['alphc'], cond_dim))
        try:
            self.calc.initialise(alph1_base, alph2_base, cond_base)
        except:
            # Handles both jp.JException (JPype v0.7) and jp.JavaException
            # (JPype < v0.7). Only possible exception that can be raised here
            # (if all bases >= 2) is a Java OutOfMemoryException:
            assert(alph1_base >= 2)
            assert(alph2_base >= 2)
            assert(cond_base >= 2)
            raise ex.JidtOutOfMemoryError(
                'Cannot instantiate JIDT CMI discrete estimator with '
                'alph1_base = {}, alph2_base = {}, cond_base = {}. Try '
                're-running increasing Java heap size.'.format(
                    alph1_base, alph2_base, cond_base))
        # Unfortunately no faster way to pass numpy arrays in than this list
        # conversion
        self.calc.addObservations(jp.JArray(jp.JInt, 1)(var1.tolist()),
                                  jp.JArray(jp.JInt, 1)(var2.tolist()),
                                  jp.JArray(jp.JInt, 1)(conditional.tolist()))
        if self.settings['local_values']:
            result = np.array(self.calc.computeLocalFromPreviousObservations(
                jp.JArray(jp.JInt, 1)(var1.tolist()),
                jp.JArray(jp.JInt, 1)(var2.tolist()),
                jp.JArray(jp.JInt, 1)(conditional.tolist())))
        else:
            result = float(self.calc.computeAverageLocalOfObservations())
        if return_calc:
            return (result, self.calc)
        else:
            return result

[docs]    def get_analytic_distribution(self, var1, var2, conditional=None):
        """Return a JIDT AnalyticNullDistribution object.

        Required so that our estimate_surrogates_analytic method can use the
        common_estimate_surrogates_analytic() method, where data is formatted
        as per the estimate method for this estimator.

        Args:
            var1 : numpy array
                realisations of first variable, either a 2D numpy array where
                array dimensions represent [realisations x variable dimension]
                or a 1D array representing [realisations], array type can be
                float (requires discretisation) or int
            var2 : numpy array
                realisations of the second variable (similar to var1)
            conditional : numpy array [optional]
                realisations of the conditioning variable (similar to var), if
                no conditional is provided, return MI between var1 and var2

        Returns:
            Java object
                JIDT calculator that was used here
        """
        # Make one estimate to prepare the calculator:
        (est, jidt_calc) = self.estimate(var1, var2, conditional, True)
        return jidt_calc.computeSignificance()


[docs]class JidtDiscreteMI(JidtDiscrete):
    """Calculate MI with JIDT's discrete-variable implementation.

    Calculate the mutual information (MI) between two variables. Call JIDT via
    jpype and use the discrete estimator. See parent class for references.

    Results are returned in bits.

    Args:
        settings : dict [optional]
            sets estimation parameters:

            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)
            - discretise_method : str [optional] - if and how to discretise
              incoming continuous data, can be 'max_ent' for maximum entropy
              binning, 'equal' for equal size bins, and 'none' if no binning is
              required (default='none')
            - n_discrete_bins : int [optional] - number of discrete bins/
              levels or the base of each dimension of the discrete variables
              (default=2). If set, this parameter overwrites/sets alph1 and
              alph2
            - alph1 : int [optional] - number of discrete bins/levels for var1
              (default=2, or the value set for n_discrete_bins)
            - alph2 : int [optional] - number of discrete bins/levels for var2
              (default=2, or the value set for n_discrete_bins)
            - lag_mi : int [optional] - time difference in samples to calculate
              the lagged MI between processes (default=0)
    """

    def __init__(self, settings=None):
        settings = self._check_settings(settings)
        # Set default alphabet sizes. Try to overwrite alphabet sizes with
        # number of bins for discretisation if provided, otherwise assume
        # binary variables.
        super().__init__(settings)
        self.settings.setdefault('lag_mi', int(0))
        try:
            n_discrete_bins = int(self.settings['n_discrete_bins'])
            self.settings['alph1'] = n_discrete_bins
            self.settings['alph2'] = n_discrete_bins
        except KeyError:
            pass  # Do nothing and use the default for alph_* set below
        self.settings.setdefault('alph1', int(2))
        self.settings.setdefault('alph2', int(2))

        # Start JAVA virtual machine and create JAVA object. Add JAVA object to
        # instance.
        self._start_jvm()
        CalcClass = (jp.JPackage('infodynamics.measures.discrete').
                     MutualInformationCalculatorDiscrete)
        self.calc = CalcClass()
        self.calc.setDebug(self.settings['debug'])

[docs]    def estimate(self, var1, var2, return_calc=False):
        """Estimate mutual information.

        Args:
            var1 : numpy array
                realisations of first variable, either a 2D numpy array where
                array dimensions represent [realisations x variable dimension]
                or a 1D array representing [realisations], array type can be
                float (requires discretisation) or int
            var2 : numpy array
                realisations of the second variable (similar to var1)
            return_calc : boolean
                return the calculator used here as well as the numeric
                calculated value(s)

        Returns:
            float | numpy array
                average MI over all samples or local MI for individual
                samples if 'local_values'=True
            Java object
                JIDT calculator that was used here. Only returned if
                return_calc was set.

        Raises:
            ex.JidtOutOfMemoryError
                Raised when JIDT object cannot be instantiated due to mem error
        """
        # Check and remember the no. dimensions for each variable before
        # collapsing them into univariate arrays later.
        var1 = self._ensure_two_dim_input(var1)
        var2 = self._ensure_two_dim_input(var2)
        var1_dim = var1.shape[1]
        var2_dim = var2.shape[1]

        # Discretise variables if requested.
        var1, var2 = self._discretise_vars(var1, var2)

        # Then collapse any mulitvariates into univariate arrays:
        var1 = utils.combine_discrete_dimensions(var1, self.settings['alph1'])
        var2 = utils.combine_discrete_dimensions(var2, self.settings['alph2'])

        # Initialise estimator
        base_for_var1 = int(np.power(self.settings['alph1'], var1_dim))
        base_for_var2 = int(np.power(self.settings['alph2'], var2_dim))
        try:
            self.calc.initialise(base_for_var1, base_for_var2,
                                 self.settings['lag_mi'])
        except:
            # Handles both jp.JException (JPype v0.7) and jp.JavaException
            # (JPype < v0.7). Only possible exception that can be raised here
            # (if base_for_var* >= 2) is a Java OutOfMemoryException:
            assert(base_for_var1 >= 2)
            assert(base_for_var2 >= 2)
            raise ex.JidtOutOfMemoryError(
                'Cannot instantiate JIDT MI discrete estimator with bases = {}'
                ' and {}. Try re-running increasing Java heap size.'.format(
                    base_for_var1, base_for_var2))

        # Unfortunately no faster way to pass numpy arrays in than this list
        # conversion
        self.calc.addObservations(jp.JArray(jp.JInt, 1)(var1.tolist()),
                                  jp.JArray(jp.JInt, 1)(var2.tolist()))
        if self.settings['local_values']:
            result = np.array(self.calc.computeLocalFromPreviousObservations(
                jp.JArray(jp.JInt, 1)(var1.tolist()),
                jp.JArray(jp.JInt, 1)(var2.tolist())))
        else:
            result = float(self.calc.computeAverageLocalOfObservations())
        if return_calc:
            return (result, self.calc)
        else:
            return result

[docs]    def get_analytic_distribution(self, var1, var2):
        """Return a JIDT AnalyticNullDistribution object.

        Required so that our estimate_surrogates_analytic method can use the
        common_estimate_surrogates_analytic() method, where data is formatted
        as per the estimate method for this estimator.

        Args:
            var1 : numpy array
                realisations of first variable, either a 2D numpy array where
                array dimensions represent [realisations x variable dimension]
                or a 1D array representing [realisations], array type can be
                float (requires discretisation) or int
            var2 : numpy array
                realisations of the second variable (similar to var1)

        Returns:
            Java object
                JIDT calculator that was used here
        """
        # Make one estimate to prepare the calculator:
        (est, jidt_calc) = self.estimate(var1, var2, True)
        return jidt_calc.computeSignificance()


[docs]class JidtKraskovMI(JidtKraskov):
    """Calculate mutual information with JIDT's Kraskov implementation.

    Calculate the mutual information between two variables. Call JIDT via jpype
    and use the Kraskov 1 estimator. See parent class for references.

    Results are returned in nats.

    Args:
        settings : dict [optional]
            sets estimation parameters:

            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)
            - kraskov_k : int [optional] - no. nearest neighbours for KNN
              search (default=4)
            - normalise : bool [optional] - z-standardise data (default=False)
            - theiler_t : int [optional] - no. next temporal neighbours ignored
              in KNN and range searches (default=0)
            - noise_level : float [optional] - random noise added to the data
              (default=1e-8)
            - num_threads : int | str [optional] - number of threads used for
              estimation (default='USE_ALL', note that this uses *all*
              available threads on the current machine)
            - algorithm_num : int [optional] - which Kraskov algorithm (1 or 2)
              to use (default=1)
            - lag_mi : int [optional] - time difference in samples to calculate
              the lagged MI between processes (default=0)

    Note:
        Some technical details: JIDT normalises over realisations, IDTxl
        normalises over raw data once, outside the MI estimator to save
        computation time. The Theiler window ignores trial boundaries. The
        MI estimator does add noise to the data as a default. To make analysis
        runs replicable set noise_level to 0.
    """

    def __init__(self, settings=None):
        settings = self._check_settings(settings)
        # Start JAVA virtual machine and create JAVA object.
        self._start_jvm()
        settings.setdefault('algorithm_num', 1)
        assert type(settings['algorithm_num']) is int, (
            'Algorithm number must be an integer.')
        assert (settings['algorithm_num'] == 1) or (settings['algorithm_num'] == 2), (
            'Algorithm number must be 1 or 2')
        if (settings['algorithm_num'] == 1):
            CalcClass = (jp.JPackage('infodynamics.measures.continuous.kraskov').
                         MutualInfoCalculatorMultiVariateKraskov1)
        else:
            CalcClass = (jp.JPackage('infodynamics.measures.continuous.kraskov').
                         MutualInfoCalculatorMultiVariateKraskov2)
        super().__init__(CalcClass, settings)

        # Get lag and shift second variable to account for a lag if requested
        self.settings.setdefault('lag_mi', 0)

[docs]    def estimate(self, var1, var2):
        """Estimate mutual information.

        Args:
            var1 : numpy array
                realisations of first variable, either a 2D numpy array where
                array dimensions represent [realisations x variable dimension]
                or a 1D array representing [realisations]
            var2 : numpy array
                realisations of the second variable (similar to var1)

        Returns:
            float | numpy array
                average MI over all samples or local MI for individual
                samples if 'local_values'=True
        """
        # Check if variable realisations are passed as 1D or 2D arrays
        var1 = self._ensure_two_dim_input(var1)
        var2 = self._ensure_two_dim_input(var2)

        # Shift variables to calculate a lagged MI.
        if self.settings['lag_mi'] > 0:
            var1 = var1[:-self.settings['lag_mi'], :]
            var2 = var2[self.settings['lag_mi']:, :]

        # Check if number of points is sufficient for estimation.
        self._check_number_of_points(var1.shape[0])

        self.calc.initialise(var1.shape[1], var2.shape[1])
        self.calc.setObservations2D(var1, var2)

        if self.settings['local_values']:
            return np.array(self.calc.computeLocalOfPreviousObservations())
        else:
            return float(self.calc.computeAverageLocalOfObservations())


[docs]class JidtKraskovAIS(JidtKraskov):
    """Calculate active information storage with JIDT's Kraskov implementation.

    Calculate active information storage (AIS) for some process using JIDT's
    implementation of the Kraskov type 1 estimator. AIS is defined as the
    mutual information between the processes' past state and current value.

    The past state needs to be defined in the settings dictionary, where a past
    state is defined as a uniform embedding with parameters history and tau.
    The history describes the number of samples taken from a processes' past,
    tau describes the embedding delay, i.e., the spacing between every two
    samples from the processes' past.

    See parent class for references. Results are returned in nats.

    Args:
        settings : dict
            sets estimation parameters:

            - history : int - number of samples in the processes' past used as
              embedding
            - tau : int [optional] - the processes' embedding delay (default=1)
            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)
            - kraskov_k : int [optional] - no. nearest neighbours for KNN
              search (default=4)
            - normalise : bool [optional] - z-standardise data (default=False)
            - theiler_t : int [optional] - no. next temporal neighbours ignored
              in KNN and range searches (default=0)
            - noise_level : float [optional] - random noise added to the data
              (default=1e-8)
            - num_threads : int | str [optional] - number of threads used for
              estimation (default='USE_ALL', note that this uses *all*
              available threads on the current machine)
            - algorithm_num : int [optional] - which Kraskov algorithm (1 or 2)
              to use (default=1)
    Note:
        Some technical details: JIDT normalises over realisations, IDTxl
        normalises over raw data once, outside the AIS estimator to save
        computation time. The Theiler window ignores trial boundaries. The
        AIS estimator does add noise to the data as a default. To make analysis
        runs replicable set noise_level to 0.
    """

    def __init__(self, settings):
        settings = self._check_settings(settings)
        # Check for history for AIS estimation.
        try:
            settings['history']
        except KeyError:
            raise RuntimeError('No history was provided for AIS estimation.')
        settings.setdefault('tau', 1)
        assert type(settings['history']) is int, (
                                            'History has to be an integer.')
        assert type(settings['tau']) is int, ('Tau has to be an integer.')

        # Start JAVA virtual machine and create JAVA object.
        self._start_jvm()
        CalcClass = (jp.JPackage('infodynamics.measures.continuous.kraskov').
                     ActiveInfoStorageCalculatorKraskov)
        super().__init__(CalcClass, settings)

[docs]    def estimate(self, process):
        """Estimate active information storage.

        Args:
            process : numpy array
                realisations of first variable, either a 2D numpy array where
                array dimensions represent [realisations x variable dimension]
                or a 1D array representing [realisations]

        Returns:
            float | numpy array
                average AIS over all samples or local AIS for individual
                samples if 'local_values'=True
        """
        process = self._ensure_one_dim_input(process)

        # Check if number of points is sufficient for estimation.
        self._check_number_of_points(process.shape[0])

        self.calc.initialise(self.settings['history'], self.settings['tau'])
        self.calc.setObservations(process)
        if self.settings['local_values']:
            return np.array(self.calc.computeLocalOfPreviousObservations())
        else:
            return float(self.calc.computeAverageLocalOfObservations())


[docs]class JidtDiscreteAIS(JidtDiscrete):
    """Calculate AIS with JIDT's discrete-variable implementation.

    Calculate the active information storage (AIS) for one process. Call JIDT
    via jpype and use the discrete estimator. See parent class for references.

    Results are returned in bits.

    Args:
        settings : dict
            set estimator parameters:

            - history : int - number of samples in the target's past used as
              embedding (>= 0)
            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)
            - discretise_method : str [optional] - if and how to discretise
              incoming continuous data, can be 'max_ent' for maximum entropy
              binning, 'equal' for equal size bins, and 'none' if no binning is
              required (default='none')
            - n_discrete_bins : int [optional] - number of discrete bins/
              levels or the base of each dimension of the discrete variables
              (default=2). If set, this parameter overwrites/sets alph. (>= 2)
            - alph : int [optional] - number of discrete bins/levels for var1
              (default=2 , or the value set for n_discrete_bins). (>= 2)
    """

    def __init__(self, settings):
        settings = self._check_settings(settings)
        try:
            settings['history']
        except KeyError:
            raise RuntimeError('No history was provided for AIS estimation.')
        assert type(settings['history']) is int, (
                                            'History has to be an integer.')
        assert settings['history'] >= 0, 'History must be >= 0'

        # Get alphabet sizes and check if discretisation is requested
        try:
            n_discrete_bins = int(settings['n_discrete_bins'])
            settings['alph'] = n_discrete_bins
        except KeyError:
            pass  # Do nothing and use the default for alph set below
        settings.setdefault('alph', int(2))
        assert settings['alph'] >= 2, 'Number of bins must be >= 2'
        super().__init__(settings)

        # Start JAVA virtual machine and create JAVA object.
        self._start_jvm()
        CalcClass = (jp.JPackage('infodynamics.measures.discrete').
                     ActiveInformationCalculatorDiscrete)
        self.calc = CalcClass()
        self.calc.setDebug(self.settings['debug'])

[docs]    def estimate(self, process, return_calc=False):
        """Estimate active information storage.

        Args:
            process : numpy array
                realisations as either a 2D numpy array where array dimensions
                represent [realisations x variable dimension] or a 1D array
                representing [realisations], array type can be float (requires
                discretisation) or int
            return_calc : boolean
                return the calculator used here as well as the numeric
                calculated value(s)

        Returns:
            float | numpy array
                average AIS over all samples or local AIS for individual
                samples if 'local_values'=True
            Java object
                JIDT calculator that was used here. Only returned if
                return_calc was set.

        Raises:
            ex.JidtOutOfMemoryError
                Raised when JIDT object cannot be instantiated due to mem error

        """
        process = self._ensure_one_dim_input(process)

        # Now discretise if required
        if self.settings['discretise_method'] == 'none':
            assert issubclass(process.dtype.type, np.integer), (
                'Process is not an integer numpy array. '
                'Discretise data to use this estimator.')
            assert min(process) >= 0, 'Minimum of process is smaller than 0.'
            assert max(process) < self.settings['alph'], (
                'Maximum of process is larger than the alphabet size.')
            if self.settings['alph'] < np.unique(process).shape[0]:
                raise RuntimeError('The process'' alphabet size does not match'
                                   ' the no. unique elements in the process.')
        elif self.settings['discretise_method'] == 'equal':
            process = utils.discretise(process, self.settings['alph'])
        elif self.settings['discretise_method'] == 'max_ent':
            process = utils.discretise_max_ent(process, self.settings['alph'])
        else:
            pass  # don't discretise at all, assume data to be discrete

        # And finally make the AIS calculation:
        try:
            self.calc.initialise(
                self.settings['alph'], self.settings['history'])
        except:
            # Handles both jp.JException (JPype v0.7) and jp.JavaException
            # (JPype < v0.7). Only possible exception that can be raised here
            # (if self.settings['alph'] >= 2) is a Java OutOfMemoryException:
            assert(self.settings['alph'] >= 2)
            raise ex.JidtOutOfMemoryError(
                'Cannot instantiate JIDT AIS discrete estimator with alph = {}'
                ' and history = {}. Try re-running increasing Java heap '
                'size.'.format(
                    self.settings['alph'], self.settings['history']))
        # Unfortunately no faster way to pass numpy arrays in than this list
        # conversion
        self.calc.addObservations(jp.JArray(jp.JInt, 1)(process.tolist()))
        if self.settings['local_values']:
            result = np.array(self.calc.computeLocalFromPreviousObservations(
                                    jp.JArray(jp.JInt, 1)(process.tolist())))
        else:
            result = float(self.calc.computeAverageLocalOfObservations())
        if return_calc:
            return (result, self.calc)
        else:
            return result

[docs]    def get_analytic_distribution(self, process):
        """Return a JIDT AnalyticNullDistribution object.

        Required so that our estimate_surrogates_analytic method can use the
        common_estimate_surrogates_analytic() method, where data is formatted
        as per the estimate method for this estimator.

        Args:
            process : numpy array
                realisations as either a 2D numpy array where array dimensions
                represent [realisations x variable dimension] or a 1D array
                representing [realisations], array type can be float (requires
                discretisation) or int

        Returns:
            Java object
                JIDT calculator that was used here
        """
        # Make one estimate to prepare the calculator:
        (est, jidt_calc) = self.estimate(process, True)
        return jidt_calc.computeSignificance()


[docs]class JidtGaussianAIS(JidtGaussian):
    """Calculate active information storage with JIDT's Gaussian implementation.

    Calculate active information storage (AIS) for some process using JIDT's
    implementation of the Gaussian estimator. AIS is defined as the
    mutual information between the processes' past state and current value.

    The past state needs to be defined in the settings dictionary, where a past
    state is defined as a uniform embedding with parameters history and tau.
    The history describes the number of samples taken from a processes' past,
    tau describes the embedding delay, i.e., the spacing between every two
    samples from the processes' past.

    See parent class for references.Results are returned in nats.

    Args:
        settings : dict
            sets estimation parameters:

            - history : int - number of samples in the processes' past used as
              embedding
            - tau : int [optional] - the processes' embedding delay (default=1)
            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)

    Note:
        Some technical details: JIDT normalises over realisations, IDTxl
        normalises over raw data once, outside the AIS estimator to save
        computation time. The Theiler window ignores trial boundaries. The
        AIS estimator does add noise to the data as a default. To make analysis
        runs replicable set noise_level to 0.
    """

    def __init__(self, settings):
        settings = self._check_settings(settings)
        # Check for history for AIS estimation.
        try:
            settings['history']
        except KeyError:
            raise RuntimeError('No history was provided for AIS estimation.')
        settings.setdefault('tau', 1)
        assert type(settings['history']) is int, (
                                            'History has to be an integer.')
        assert type(settings['tau']) is int, ('Tau has to be an integer.')

        # Start JAVA virtual machine and create JAVA object.
        self._start_jvm()
        CalcClass = (jp.JPackage('infodynamics.measures.continuous.gaussian').
                     ActiveInfoStorageCalculatorGaussian)
        super().__init__(CalcClass, settings)

[docs]    def estimate(self, process):
        """Estimate active information storage.

        Args:
            process : numpy array
                realisations of first variable, either a 2D numpy array where
                array dimensions represent [realisations x variable dimension]
                or a 1D array representing [realisations]

        Returns:
            float | numpy array
                average AIS over all samples or local AIS for individual
                samples if 'local_values'=True
        """
        process = self._ensure_one_dim_input(process)

        self.calc.initialise(self.settings['history'], self.settings['tau'])
        self.calc.setObservations(process)
        if self.settings['local_values']:
            return np.array(self.calc.computeLocalOfPreviousObservations())
        else:
            return float(self.calc.computeAverageLocalOfObservations())


[docs]class JidtGaussianMI(JidtGaussian):
    """Calculate mutual information with JIDT's Gaussian implementation.

    Calculate the mutual information between two variables. Call JIDT via jpype
    and use the Gaussian estimator. See parent class for references.

    Results are returned in nats.

    Args:
        settings : dict [optional]
            sets estimation parameters:

            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)
            - lag_mi : int [optional] - time difference in samples to calculate
              the lagged MI between processes (default=0)

    Note:
        Some technical details: JIDT normalises over realisations, IDTxl
        normalises over raw data once, outside the MI estimator to save
        computation time. The Theiler window ignores trial boundaries. The
        MI estimator does add noise to the data as a default. To make analysis
        runs replicable set noise_level to 0.
    """

    def __init__(self, settings=None):
        settings = self._check_settings(settings)
        # Start JAVA virtual machine and create JAVA object.
        self._start_jvm()
        CalcClass = (jp.JPackage('infodynamics.measures.continuous.gaussian').
                     MutualInfoCalculatorMultiVariateGaussian)
        super().__init__(CalcClass, settings)

        # Add lag between input variables. Setting the lag in JIDT didn't work,
        # shift variables when calling the estimate method instead.
        self.settings.setdefault('lag_mi', int(0))
        # self.calc.setProperty('PROP_TIME_DIFF', str(self.settings['lag_mi']))

[docs]    def estimate(self, var1, var2):
        """Estimate mutual information.

        Args:
            var1 : numpy array
                realisations of first variable, either a 2D numpy array where
                array dimensions represent [realisations x variable dimension]
                or a 1D array representing [realisations]
            var2 : numpy array
                realisations of the second variable (similar to var1)

        Returns:
            float | numpy array
                average MI over all samples or local MI for individual
                samples if 'local_values'=True
        """
        var1 = self._ensure_two_dim_input(var1)
        var2 = self._ensure_two_dim_input(var2)

        # Shift variables to calculate a lagged MI.
        if self.settings['lag_mi'] > 0:
            var1 = var1[:-self.settings['lag_mi'], :]
            var2 = var2[self.settings['lag_mi']:, :]

        self.calc.initialise(var1.shape[1], var2.shape[1])
        self.calc.setObservations2D(var1, var2)
        if self.settings['local_values']:
            return np.array(self.calc.computeLocalOfPreviousObservations())
        else:
            return float(self.calc.computeAverageLocalOfObservations())


[docs]class JidtGaussianCMI(JidtGaussian):
    """Calculate conditional mutual infor with JIDT's Gaussian implementation.

    Computes the differential conditional mutual information of two
    multivariate sets of observations, conditioned on another, assuming that
    the probability distribution function for these observations is a
    multivariate Gaussian distribution.
    Call JIDT via jpype and use
    ConditionalMutualInfoCalculatorMultiVariateGaussian estimator.
    If no conditional is given (is None), the function returns the mutual
    information between var1 and var2.

    See parent class for references. Results are returned in nats.

    Args:
        settings : dict [optional]
            sets estimation parameters:

            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)

    Note:
        Some technical details: JIDT normalises over realisations, IDTxl
        normalises over raw data once, outside the CMI estimator to save
        computation time. The Theiler window ignores trial boundaries. The
        CMI estimator does add noise to the data as a default. To make analysis
        runs replicable set noise_level to 0.
    """

    def __init__(self, settings=None):
        settings = self._check_settings(settings)
        # Start JAVA virtual machine and create JAVA object.
        self._start_jvm()
        CalcClass = (jp.JPackage('infodynamics.measures.continuous.gaussian').
                     ConditionalMutualInfoCalculatorMultiVariateGaussian)
        super().__init__(CalcClass, settings)
        self.est_mi = None

[docs]    def estimate(self, var1, var2, conditional=None):
        """Estimate conditional mutual information.

        Args:
            var1 : numpy array
                realisations of first variable, either a 2D numpy array where
                array dimensions represent [realisations x variable dimension]
                or a 1D array representing [realisations]
            var2 : numpy array
                realisations of the second variable (similar to var1)
            conditional : numpy array [optional]
                realisations of the conditioning variable (similar to var), if
                no conditional is provided, return MI between var1 and var2

        Returns:
            float | numpy array
                average CMI over all samples or local CMI for individual
                samples if 'local_values'=True
        """
        # Return MI if no conditioning variable was provided.
        if conditional is None:
            if (self.est_mi is None):
                self.est_mi = JidtGaussianMI(self.settings)
            return self.est_mi.estimate(var1, var2)
        else:
            assert(conditional.size != 0), 'Conditional Array is empty.'

        var1 = self._ensure_two_dim_input(var1)
        var2 = self._ensure_two_dim_input(var2)
        cond = self._ensure_two_dim_input(conditional)

        assert(var1.shape[0] == var2.shape[0]), (
            'Unequal number of observations (var1: {0}, var2: {1}).'.format(
                var1.shape[0], var2.shape[0]))
        assert(var1.shape[0] == cond.shape[0]), (
            'Unequal number of observations (var1: {0}, cond: {1}).'.format(
                var1.shape[0], cond.shape[0]))

        self.calc.initialise(var1.shape[1], var2.shape[1], cond.shape[1])
        self.calc.setObservations2D(var1, var2, cond)
        if self.settings['local_values']:
            return np.array(self.calc.computeLocalOfPreviousObservations())
        else:
            return float(self.calc.computeAverageLocalOfObservations())

[docs]    def get_analytic_distribution(self, var1, var2, conditional=None):
        """Return a JIDT AnalyticNullDistribution object.

        Required so that our estimate_surrogates_analytic method can use the
        common_estimate_surrogates_analytic() method, where data is formatted
        as per the estimate method for this estimator.

        Args:
            var1 : numpy array
                realisations of first variable, either a 2D numpy array where
                array dimensions represent [realisations x variable dimension]
                or a 1D array representing [realisations]
            var2 : numpy array
                realisations of the second variable (similar to var1)
            conditional : numpy array [optional]
                realisations of the conditioning variable (similar to var), if
                no conditional is provided, return MI between var1 and var2

        Returns:
            Java object
                JIDT calculator that was used here
        """
        # Make one estimate to prepare the calculator:
        self.estimate(var1, var2, conditional)
        if (conditional is None):
            return self.est_mi.calc.computeSignificance()
        else:
            return self.calc.computeSignificance()


[docs]class JidtKraskovTE(JidtKraskov):
    """Calculate transfer entropy with JIDT's Kraskov implementation.

    Calculate transfer entropy between a source and a target variable using
    JIDT's implementation of the Kraskov type 1 estimator. Transfer entropy is
    defined as the conditional mutual information between the source's past
    state and the target's current value, conditional on the target's past.

    Past states need to be defined in the settings dictionary, where a past
    state is defined as a uniform embedding with parameters history and tau.
    The history describes the number of samples taken from a variable's past,
    tau descrices the embedding delay, i.e., the spacing between every two
    samples from the processes' past.

    See parent class for references. Results are returned in nats.

    Args:
        settings : dict
            sets estimation parameters:

            - history_target : int - number of samples in the target's past
              used as embedding
            - history_source  : int [optional] - number of samples in the
              source's past used as embedding (default=same as the target
              history)
            - tau_source : int [optional] - source's embedding delay
              (default=1)
            - tau_target : int [optional] - target's embedding delay
              (default=1)
            - source_target_delay : int [optional] - information transfer delay
              between source and target (default=1)
            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)
            - algorithm_num : int [optional] - which Kraskov algorithm (1 or 2)
              to use (default=1)

    Note:
        Some technical details: JIDT normalises over realisations, IDTxl
        normalises over raw data once, outside the CMI estimator to save
        computation time. The Theiler window ignores trial boundaries. The
        CMI estimator does add noise to the data as a default. To make analysis
        runs replicable set noise_level to 0.
    """

    def __init__(self, settings):
        settings = self._check_settings(settings)
        # Start JAVA virtual machine.
        self._start_jvm()
        CalcClass = (jp.JPackage('infodynamics.measures.continuous.kraskov').
                     TransferEntropyCalculatorKraskov)
        # Get embedding and delay parameters.
        settings = self._set_te_defaults(settings)
        super().__init__(CalcClass, settings)

[docs]    def estimate(self, source, target):
        """Estimate transfer entropy from a source to a target variable.

        Args:
            source : numpy array
                realisations of source variable, either a 2D numpy array where
                array dimensions represent [realisations x variable dimension]
                or a 1D array representing [realisations]
            var2 : numpy array
                realisations of target variable (similar to var1)

        Returns:
            float | numpy array
                average TE over all samples or local TE for individual
                samples if 'local_values'=True
        """
        source = self._ensure_one_dim_input(source)
        target = self._ensure_one_dim_input(target)

        # Check if number of points is sufficient for estimation.
        self._check_number_of_points(source.shape[0] -
                                     self.settings['source_target_delay'])

        self.calc.initialise(self.settings['history_target'],
                             self.settings['tau_target'],
                             self.settings['history_source'],
                             self.settings['tau_source'],
                             self.settings['source_target_delay'])
        self.calc.setObservations(source, target)
        if self.settings['local_values']:
            return np.array(self.calc.computeLocalOfPreviousObservations())
        else:
            return float(self.calc.computeAverageLocalOfObservations())


[docs]class JidtDiscreteTE(JidtDiscrete):
    """Calculate TE with JIDT's implementation for discrete variables.

    Calculate the transfer entropy between two time series processes.
    Call JIDT via jpype and use the discrete estimator. Transfer entropy is
    defined as the conditional mutual information between the source's past
    state and the target's current value, conditional on the target's past.
    See parent class for references.

    Results are returned in bits.

    Args:
        settings : dict
            sets estimation parameters:

            - history_target : int - number of samples in the target's past
              used as embedding. (>= 0)
            - history_source  : int [optional] - number of samples in the
              source's past used as embedding (default=same as the target
              history). (>= 1)
            - tau_source : int [optional] - source's embedding delay
              (default=1). (>= 1)
            - tau_target : int [optional] - target's embedding delay
              (default=1). (>= 1)
            - source_target_delay : int [optional] - information transfer delay
              between source and target (default=1) (>= 0)
            - discretise_method : str [optional] - if and how to discretise
              incoming continuous data, can be 'max_ent' for maximum entropy
              binning, 'equal' for equal size bins, and 'none' if no binning is
              required (default='none')
            - n_discrete_bins : int [optional] - number of discrete bins/
              levels or the base of each dimension of the discrete variables
              (default=2). If set, this parameter overwrites/sets alph1 and
              alph2. (>= 2)
            - alph1 : int [optional] - number of discrete bins/levels for
              source (default=2, or the value set for n_discrete_bins). (>= 2)
            - alph2 : int [optional] - number of discrete bins/levels for
              target (default=2, or the value set for n_discrete_bins). (>= 2)
            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)
    """

    def __init__(self, settings):
        settings = self._check_settings(settings)
        # Get embedding and delay parameters.
        settings = self._set_te_defaults(settings)

        # Get alphabet sizes and check if discretisation is requested. Try to
        # overwrite alphabet sizes with number of bins.
        try:
            n_discrete_bins = int(settings['n_discrete_bins'])
            settings['alph1'] = n_discrete_bins
            settings['alph2'] = n_discrete_bins
        except KeyError:
            # do nothing and set alphabet sizes to default below
            pass
        settings.setdefault('alph1', int(2))
        settings.setdefault('alph2', int(2))
        assert type(settings['alph1']) is int, (
            'Num discrete levels for source has to be an integer.')
        assert type(settings['alph2']) is int, (
            'Num discrete levels for target has to be an integer.')
        assert settings['alph1'] >= 2, (
            'Num discrete levels for source must be >= 2')
        assert settings['alph2'] >= 2, (
            'Num discrete levels for target must be >= 2')
        super().__init__(settings)

        # Start JAVA virtual machine and create JAVA object.
        self._start_jvm()
        CalcClass = (jp.JPackage('infodynamics.measures.discrete').
                     TransferEntropyCalculatorDiscrete)
        self.calc = CalcClass()
        self.calc.setDebug(self.settings['debug'])

[docs]    def estimate(self, source, target, return_calc=False):
        """Estimate transfer entropy from a source to a target variable.

        Args:
            source : numpy array
                realisations of source variable, either a 2D numpy array where
                array dimensions represent [realisations x variable dimension]
                or a 1D array representing [realisations], array type can be
                float (requires discretisation) or int
            target : numpy array
                realisations of target variable (similar to var1)
            return_calc : boolean
                return the calculator used here as well as the numeric
                calculated value(s)

        Returns:
            float | numpy array
                average TE over all samples or local TE for individual
                samples if 'local_values'=True
            Java object
                JIDT calculator that was used here. Only returned if
                return_calc was set.

        Raises:
            ex.JidtOutOfMemoryError
                Raised when JIDT object cannot be instantiated due to mem error

        """
        source = self._ensure_one_dim_input(source)
        target = self._ensure_one_dim_input(target)

        # Discretise variables if requested.
        source, target = self._discretise_vars(source, target)

        # And finally make the TE calculation:
        max_base = max(self.settings['alph1'], self.settings['alph2'])
        try:
            self.calc.initialise(max_base,
                                 self.settings['history_target'],
                                 self.settings['tau_target'],
                                 self.settings['history_source'],
                                 self.settings['tau_source'],
                                 self.settings['source_target_delay'])
        except:
            # Handles both jp.JException (JPype v0.7) and jp.JavaException
            # (JPype < v0.7). Only possible exception that can be raised here
            # (if max_base >= 2) is a Java OutOfMemoryException:
            assert(max_base >= 2)
            raise ex.JidtOutOfMemoryError(
                'Cannot instantiate JIDT TE discrete estimator with max_base ='
                ' {} and history_target = {} and history_source = {}. Try '
                're-running increasing Java heap size.'.format(
                    max_base,
                    self.settings['history_target'],
                    self.settings['history_source']))
        # Unfortunately no faster way to pass numpy arrays in than this list
        # conversion
        self.calc.addObservations(
            jp.JArray(jp.JInt, 1)(source.tolist()),
            jp.JArray(jp.JInt, 1)(target.tolist()))
        if self.settings['local_values']:
            result = np.array(self.calc.computeLocalFromPreviousObservations(
                jp.JArray(jp.JInt, 1)(source.tolist()),
                jp.JArray(jp.JInt, 1)(target.tolist())))
        else:
            result = float(self.calc.computeAverageLocalOfObservations())
        if return_calc:
            return (result, self.calc)
        else:
            return result

[docs]    def get_analytic_distribution(self, source, target):
        """Return a JIDT AnalyticNullDistribution object.

        Required so that our estimate_surrogates_analytic method can use the
        common_estimate_surrogates_analytic() method, where data is formatted
        as per the estimate method for this estimator.

        Args:
            source : numpy array
                realisations of source variable, either a 2D numpy array where
                array dimensions represent [realisations x variable dimension]
                or a 1D array representing [realisations], array type can be
                float (requires discretisation) or int
            target : numpy array
                realisations of target variable (similar to var1)

        Returns:
            Java object
                JIDT calculator that was used here
        """
        # Make one estimate to prepare the calculator:
        (est, jidt_calc) = self.estimate(source, target, True)
        return jidt_calc.computeSignificance()


[docs]class JidtGaussianTE(JidtGaussian):
    """Calculate transfer entropy with JIDT's Gaussian implementation.

    Calculate transfer entropy between a source and a target variable using
    JIDT's implementation of the Gaussian estimator. Transfer entropy is
    defined as the conditional mutual information between the source's past
    state and the target's current value, conditional on the target's past.

    Past states need to be defined in the settings dictionary, where a past
    state is defined as a uniform embedding with parameters history and tau.
    The history describes the number of samples taken from a variable's past,
    tau descrices the embedding delay, i.e., the spacing between every two
    samples from the processes' past.

    See parent class for references. Results are returned in nats.

    Args:
        settings : dict
            sets estimation parameters:

            - history_target : int - number of samples in the target's past
              used as embedding
            - history_source  : int [optional] - number of samples in the
              source's past used as embedding (default=same as the target
              history)
            - tau_source : int [optional] - source's embedding delay
              (default=1)
            - tau_target : int [optional] - target's embedding delay
              (default=1)
            - source_target_delay : int [optional] - information transfer delay
              between source and target (default=1)
            - debug : bool [optional] - return debug information when calling
              JIDT (default=False)
            - local_values : bool [optional] - return local TE instead of
              average TE (default=False)

    Note:
        Some technical details: JIDT normalises over realisations, IDTxl
        normalises over raw data once, outside the CMI estimator to save
        computation time. The Theiler window ignores trial boundaries. The
        CMI estimator does add noise to the data as a default. To make analysis
        runs replicable set noise_level to 0.
    """

    def __init__(self, settings):
        settings = self._check_settings(settings)
        # Start JAVA virtual machine and create JAVA object.
        self._start_jvm()
        CalcClass = (jp.JPackage('infodynamics.measures.continuous.gaussian').
                     TransferEntropyCalculatorGaussian)
        # Get embedding and delay parameters.
        settings = self._set_te_defaults(settings)
        super().__init__(CalcClass, settings)

[docs]    def estimate(self, source, target):
        """Estimate transfer entropy from a source to a target variable.

        Args:
            source : numpy array
                realisations of source variable, either a 2D numpy array where
                array dimensions represent [realisations x variable dimension]
                or a 1D array representing [realisations]
            var2 : numpy array
                realisations of target variable (similar to var1)

        Returns:
            float | numpy array
                average TE over all samples or local TE for individual
                samples if 'local_values'=True
        """
        source = self._ensure_one_dim_input(source)
        target = self._ensure_one_dim_input(target)

        self.calc.initialise(self.settings['history_target'],
                             self.settings['tau_target'],
                             self.settings['history_source'],
                             self.settings['tau_source'],
                             self.settings['source_target_delay'])
        self.calc.setObservations(source, target)
        if self.settings['local_values']:
            return np.array(self.calc.computeLocalOfPreviousObservations())
        else:
            return float(self.calc.computeAverageLocalOfObservations())


[docs]def common_estimate_surrogates_analytic(estimator, n_perm=200, **data):
    """Estimate the surrogate distribution analytically for JidtEstimator.

    Estimate the surrogate distribution analytically for a JidtEstimator
    which is_analytic_null_estimator(), by sampling estimates at random
    p-values in the analytic distribution.

    Args:
        estimator : a JidtEstimator object, which returns True to a call to
            its is_analytic_null_estimator() method
        n_perms : int
            number of permutations (default=200)
        data : numpy arrays
            realisations of random variables required for the calculation
            (varies between estimators, e.g. 2 variables for MI, 3 for CMI)

    Returns:
        float | numpy array
            n_perm surrogates of the average MI/CMI/TE over all samples
            under the null hypothesis of no relationship between var1 and
            var2 (in the context of conditional)
    """
    # Compute the statistical significance of the estimate to get an
    #  AnalyticMeasurementDistribution object:
    analytic_distribution = estimator.get_analytic_distribution(**data)
    # Then compute surrogates at n_perm random p-values
    surrogate_estimates = np.empty(n_perm)
    for perm in range(n_perm):
        surrogate_estimates[perm] = \
            analytic_distribution.computeEstimateForGivenPValue(
                np.random.random())
    return surrogate_estimates