Source code for pyunicorn.climate.mutual_info

#!/usr/bin/python
# -*- coding: utf-8 -*-
#
# This file is part of pyunicorn.
# Copyright (C) 2008--2019 Jonathan F. Donges and pyunicorn authors
# URL: <http://www.pik-potsdam.de/members/donges/software>
# License: BSD (3-clause)

"""
Provides classes for generating and analyzing complex climate networks.
"""

#
#  Import essential packages
#

# array object and fast numerics
import numpy as np

from ._ext.numerics import _calculate_mutual_information_cython

#  Import progress bar for easy progress bar handling
# from ..utils import progressbar

#  Import cnNetwork for Network base class
from .climate_network import ClimateNetwork

#
#  Define class MutualInfoClimateNetwork
#


[docs]class MutualInfoClimateNetwork(ClimateNetwork): """ Represents a mutual information climate network. Constructs a static climate network based on mutual information at zero lag, as in [Ueoka2008]_. Mutual information climate networks are undirected, since mutual information is a symmetrical measure. In contrast to Pearson correlation used in :class:`.TsonisClimateNetwork`, mutual information has the potential to detect nonlinear statistical interdependencies. """ # # Defines internal methods #
[docs] def __init__(self, data, threshold=None, link_density=None, non_local=False, node_weight_type="surface", winter_only=True, silence_level=0): """ Initialize an instance of MutualInfoClimateNework. .. note:: Either threshold **OR** link_density have to be given! Possible choices for ``node_weight_type``: - None (constant unit weights) - "surface" (cos lat) - "irrigation" (cos**2 lat) :type data: :class:`.ClimateData` :arg data: The climate data used for network construction. :arg float threshold: The threshold of similarity measure, above which two nodes are linked in the network. :arg float link_density: The networks's desired link density. :arg bool non_local: Determines, whether links between spatially close nodes should be suppressed. :arg str node_weight_type: The type of geographical node weight to be used. :arg bool winter_only: Determines, whether only data points from the winter months (December, January and February) should be used for analysis. Possibly, this further suppresses the annual cycle in the time series. :arg int silence_level: The inverse level of verbosity of the object. """ if silence_level <= 1: print("Generating a mutual information climate network...") self.silence_level = silence_level # Set instance variables self.data = data """(ClimateData) - The climate data used for network construction.""" self.N = self.data.grid.N self._prescribed_link_density = link_density self._winter_only = winter_only # Class specific settings self.mi_file = "mutual_information_" + data.data_source + "_" \ + data.observable_name + ".data" """(string) - The name of the file for storing the mutual information matrix.""" self._set_winter_only(winter_only) ClimateNetwork.__init__(self, grid=self.data.grid, similarity_measure=self._similarity_measure, threshold=threshold, non_local=non_local, directed=False, node_weight_type=node_weight_type, silence_level=silence_level)
[docs] def __str__(self): """ Return a string representation of MutualInfoClimateNetwork. """ return 'MutualInfoClimateNetwork:\n' + ClimateNetwork.__str__(self)
[docs] def _cython_calculate_mutual_information(self, anomaly, n_bins=32): """ Calculate the mutual information matrix at zero lag. The cython code is adopted from the Tisean 3.0.1 mutual.c module. :type anomaly: 2D Numpy array (time, index) :arg anomaly: The anomaly time series. :arg int n_bins: The number of bins for estimating probability distributions. :arg bool fast: Indicates, whether fast or slow algorithm should be used. :rtype: 2D array (index, index) :return: the mutual information matrix at zero lag. """ if self.silence_level <= 1: print("Calculating mutual information matrix at zero lag from " "anomaly values using cython...") # Normalize anomaly time series to zero mean and unit variance self.data.normalize_time_series_array(anomaly) # Create local transposed copy of anomaly anomaly = np.fastCopyAndTranspose(anomaly) (N, n_samples) = anomaly.shape # Get common range for all histograms range_min = float(anomaly.min()) range_max = float(anomaly.max()) # Rescale all time series to the interval [0,1], # using the maximum range of the whole dataset. scaling = 1./(range_max - range_min) anomaly = anomaly.astype(np.float32).copy(order='c') mi = _calculate_mutual_information_cython(anomaly, n_samples, N, n_bins, scaling, range_min) if self.silence_level <= 1: print("Done!") return mi
[docs] def _calculate_mutual_information(self, anomaly, n_bins=32): """ Calculate the mutual information matrix at zero lag. .. note:: Slow since solely based on Python and Numpy! :type anomaly: 2D array (time, index) :arg anomaly: The anomaly time series. :arg int n_bins: The number of bins for estimating probability distributions. :rtype: 2D array (index, index) :return: the mutual information matrix at zero lag. """ if self.silence_level <= 1: print("Calculating mutual information matrix at zero lag from " "anomaly values...") # Define references to numpy functions for faster function calls histogram = np.histogram histogram2d = np.histogram2d log = np.log # Normalize anomaly time series to zero mean and unit variance self.data.normalize_time_series_array(anomaly) # Get faster reference to length of time series = number of samples # per grid point. n_samples = anomaly.shape[0] # Initialize mutual information array mi = np.zeros((self.N, self.N)) # Get common range for all histograms range_min = anomaly.min() range_max = anomaly.max() # Calculate the histograms for each time series p = np.zeros((self.N, n_bins)) for i in range(self.N): p[i, :] = histogram( anomaly[:, i], bins=n_bins, range=(range_min, range_max) )[0].astype("float64") # Normalize by total number of samples = length of each time series p /= n_samples # Make sure that bins with zero estimated probability are not counted # in the entropy measures. p[p == 0] = 1 # Compute the information entropies of each time series H = - (p * log(p)).sum(axis=1) # Initialize progress bar # if self.silence_level <= 1: # progress = progressbar.ProgressBar(maxval=self.N**2).start() # Calculate only the lower half of the MI matrix, since MI is # symmetric with respect to X and Y. for i in range(self.N): # Update progress bar every 10 steps # if self.silence_level <= 1: # if (i % 10) == 0: # progress.update(i**2) for j in range(i): # Calculate the joint probability distribution pxy = histogram2d( anomaly[:, i], anomaly[:, j], bins=n_bins, range=((range_min, range_max), (range_min, range_max)))[0].astype("float64") # Normalize joint distribution pxy /= n_samples # Compute the joint information entropy pxy[pxy == 0] = 1 HXY = - (pxy * log(pxy)).sum() # ... and store the result mi.itemset((i, j), H.item(i) + H.item(j) - HXY) mi.itemset((j, i), mi.item((i, j))) # if self.silence_level <= 1: # progress.finish() return mi
[docs] def calculate_similarity_measure(self, anomaly): """ Calculate the mutual information matrix. Encapsulates calculation of mutual information with standard parameters. :type anomaly: 2D Numpy array (time, index) :arg anomaly: The anomaly time series. :rtype: 2D Numpy array (index, index) :return: the mutual information matrix at zero lag. """ return self._cython_calculate_mutual_information(anomaly)
[docs] def mutual_information(self, anomaly=None, dump=True): """ Return mutual information matrix at zero lag. Check if mutual information matrix (MI) was already calculated before: - If yes, return MI from a data file. - If not, return MI from calculation and store in file. :type anomaly: 2D Numpy array (time, index) :arg anomaly: The anomaly time series. :arg bool dump: Store MI in data file. :rtype: 2D Numpy array (index, index) :return: the mutual information matrix at zero lag. """ try: # Try to load MI from file if self.silence_level <= 1: print("Loading mutual information matrix from " f"{self.mi_file}...") with open(self.mi_file, 'r') as f: mi = np.load(f) # Check if the dimensions of mutual_information correspond to # the grid. if mi.shape != (self.N, self.N): print(f"{self.mi_file} in current directory has " "incorrect dimensions!") raise RuntimeError except (IOError, RuntimeError): if self.silence_level <= 1: print("An error occured while loading data from " f"{self.mi_file}.") print("Recalculating mutual information.") mi = self._cython_calculate_mutual_information(anomaly) if dump: with open(self.mi_file, 'w') as f: if self.silence_level <= 1: print("Storing in", self.mi_file) mi.dump(f) return mi
[docs] def winter_only(self): """ Indicate, if only winter months were used for network generation. :return bool: whether only winter months were used for network generation. """ return self._winter_only
[docs] def _set_winter_only(self, winter_only, dump=False): """ Toggle use of exclusively winter data points for network generation. :arg bool winter_only: Indicates whether only winter months were used for network generation. :arg bool dump: Store MI in data file. """ self._winter_only = winter_only if winter_only: winter_anomaly = self.data.anomaly_selected_months([0, 1, 11]) mi = self.mutual_information(winter_anomaly, dump=dump) else: mi = self.mutual_information(self.data.anomaly(), dump=dump) self._similarity_measure = mi
[docs] def set_winter_only(self, winter_only, dump=True): """ Toggle use of exclusively winter data points for network generation. Also explicitly regenerates the instance of MutualInfoClimateNetwork. :arg bool winter_only: Indicates whether only winter months were used for network generation. :arg bool dump: Store MI in data file. """ self._set_winter_only(winter_only, dump=dump) self._regenerate_network()
# # Defines methods to calculate weighted network measures #
[docs] def mutual_information_weighted_average_path_length(self): """ Return mutual information weighted average path length. :return float: the mutual information weighted average path length. """ if "mutual_information" not in self._path_lengths_cached: self.set_link_attribute("mutual_information", abs(self.mutual_information())) return self.average_path_length("mutual_information")
[docs] def mutual_information_weighted_closeness(self): """ Return mutual information weighted closeness. :rtype: 1D Numpy array [index] :return: the mutual information weighted closeness sequence. """ if "mutual_information" not in self._path_lengths_cached: self.set_link_attribute("mutual_information", abs(self.mutual_information())) return self.closeness("mutual_information")
[docs] def local_mutual_information_weighted_vulnerability(self): """ Return mutual information weighted vulnerability. :rtype: 1D Numpy array [index] :return: the mutual information weighted vulnerability sequence. """ if "mutual_information" not in self._path_lengths_cached: self.set_link_attribute("mutual_information", abs(self.mutual_information())) return self.local_vulnerability("mutual_information")