Source code for polyfemos.front.sohplot.datacontainer

# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# This file is part of Polyfemos.
#
# Polyfemos is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or any later version.
#
# Polyfemos is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License and
# GNU General Public License along with Polyfemos. If not, see
# <https://www.gnu.org/licenses/>.'
#
# Author: Henrik Jänkävaara
# -----------------------------------------------------------------------------
"""
Classes for handling datapoints

:copyright:
    2019, University of Oulu, Sodankyla Geophysical Observatory
:license:
    GNU Lesser General Public License v3.0 or later
    (https://spdx.org/licenses/LGPL-3.0-or-later.html)
"""
import sys
from dateutil import tz
import math
import functools

import numpy as np

from polyfemos.parser import typeoperator as to
from polyfemos.util.messenger import messenger


[docs]def remove_timezone_other(dt): """ :type dt: :class:`~datetime.datetime` :param dt: :rtype: :class:`~datetime.datetime` :return: a datetime instance without timezone """ # Atleast with Python 3.5 and 3.7 The next line works dt = dt.replace(tzinfo=tz.gettz('UTC')) return dt.replace(tzinfo=None)
[docs]def remove_timezone_py36(dt): """ Used with python 3.6 :type dt: :class:`~datetime.datetime` :param dt: :rtype: :class:`~datetime.datetime` :return: a datetime instance without timezone """ dt = dt.replace(tzinfo=tz.gettz('UTC')) return dt.astimezone(tz.tzlocal()).replace(tzinfo=None)
[docs]def remove_timezone_py37(dt): """ :type dt: :class:`~datetime.datetime` :param dt: :rtype: :class:`~datetime.datetime` :return: the function does nothing to the datetime object """ return dt
_timezone_removal_functions = { "3.6": remove_timezone_py36, "3.7": remove_timezone_py37, } # Bokeh has complications with different python version python_version = ".".join(map(str, sys.version_info[:2])) remove_timezone = remove_timezone_other if python_version in _timezone_removal_functions: remove_timezone = _timezone_removal_functions[python_version]
[docs]def _track_datalen(method): """ A decorator to be used with :class:`~polyfemos.front.sohplot.datacontainer.DataContainer` to keep track of the datapoint amount and quantity of nan values. At the moment the looping of the datapoints is not very optimized. :type method: func :param method: A decorated method :rtype: func :return: """ @functools.wraps(method) def wrapper(self, *args, **kwargs): if self.track_datalen: msg = "in method: {}".format(method.__name__) messenger(msg, "R") orig_len = len(self) orig_nan_len = self.count_nans() method(self, *args, **kwargs) if self.track_datalen: new_len = len(self) new_nan_len = self.count_nans() str_ = "{:*<19}**".format(method.__name__ + ":") str_ += "dps:*{:*>7}*>*{:*<7}*".format(orig_len, new_len) str_ += "nans:*{:*>7}*>*{:*<7}".format(orig_nan_len, new_nan_len) self.add2info(str_) return wrapper
[docs]class DataPoint(object): """ A structlike class to store one datapoint in timeseries data """
[docs] def __init__(self, dtstr=None, timestamp=None, utcdatetime=None, y=None, z=None): """ The data must have timevalue, given either ``dtstr``, ``timestamp`` or ``utcdatetime``. ``z`` is an optional axis containing arbitrary string values following python dictionary syntax :type dtstr: str :param dtstr: utcdatetime compatible string representing time value :type timestamp: float :param timestamp: :type utcdatetime: :class:`~obspy.core.utcdatetime.UTCDateTime` :param utcdatetime: :type y: str or numlike :param y: string representing float number, if invalid string is provided, ``y`` will be ``nan`` :type z: str, optional :param z: Additional arbitrary values contained in string following python dict syntax """ self.__dtstr = dtstr self.__timestamp = timestamp self.__utcdatetime = utcdatetime self.__datetime = None self.__ordinal = None self.__timezone_naive_datetime = None self.__hash = None self.z = z self.y = to.check_type(float, invalid_value=float('nan'))(y)
[docs] def get_utcdatetime(self): """ If ``self.__utcdatetime`` is not previously defined, the value is read using ``self.__dtstr`` or ``self.__timestamp``. If either of those is not provided, error is thrown. :rtype: :class:`~obspy.core.utcdatetime.UTCDateTime` :return: """ if self.__utcdatetime is not None: return self.__utcdatetime if self.__dtstr is not None: self.__utcdatetime = to.utcdatetime(self.__dtstr) elif self.__timestamp is not None: self.__utcdatetime = to.utcdatetime(self.__timestamp) if self.__utcdatetime is None: raise Exception("DataPoint has no valid timevalue") return self.__utcdatetime
[docs] def get_dtstr(self): """ :rtype: str :return: string representation of the timevalue """ if self.__dtstr is None: self.__dtstr = str(self.get_utcdatetime()) return self.__dtstr
[docs] def get_timestamp(self): """ :rtype: float :return: timestamp """ if self.__timestamp is None: self.__timestamp = self.get_utcdatetime().timestamp return self.__timestamp
[docs] def get_datetime(self): """ :rtype: :class:`~datetime.datetime` :return: """ if self.__datetime is None: self.__datetime = self.get_utcdatetime().datetime return self.__datetime
[docs] def get_ordinal(self): """ :rtype: str :return: year and the day of the yeat as a string in format ``YEAR.JULDAY``, e.g. ``2019.023`` """ if self.__ordinal is None: self.__ordinal = self.get_utcdatetime().strftime("%Y.%j") return self.__ordinal
[docs] def get_timezone_naive_datetime(self): """ :rtype: :class:`~datetime.datetime` :return: """ if self.__timezone_naive_datetime is not None: return self.__timezone_naive_datetime self.__timezone_naive_datetime = remove_timezone(self.get_datetime()) return self.__timezone_naive_datetime
[docs] def set_z(self, value): """ :type value: str :param value: Value to be set to ``z`` attribute """ self.z = value
[docs] def get_z(self): """ :rtype: dict :return: Returns the value of ``z`` attribute. If the type was not previously converted from string to dictionary, the conversion is done in addition. """ if isinstance(self.z, str): self.z = to.dict_(self.z) return self.z
[docs] def ifz(self): """ :rtype: bool :return: return ``True`` if ``z`` has a value set """ return self.z is not None
[docs] def isnan(self): """ :rtype: bool :return: Checks if ``y`` is nan """ return math.isnan(self.y)
[docs] def isnotnan(self): """ :rtype: bool :return: return ``True`` if ``y`` is not nan """ return not self.isnan()
[docs] def tonan(self, inplace=True): """ Change the ``y`` attribute to nan. If ``inplace`` is ``False``, a copy of :class:`~polyfemos.front.sohplot.datacontainer.DataPoint` is returned. Note the mutability of the :class:`~polyfemos.front.sohplot.datacontainer.DataPoint` instance if ``inplace`` is ``True``. :type inplace: bool, optional :param inplace: defaults to ``True`` :rtype: :class:`~polyfemos.front.sohplot.datacontainer.DataPoint` :return: """ if inplace: self.y = float('nan') return self return self.copy().tonan()
[docs] def copy(self): """ :rtype: :class:`~polyfemos.front.sohplot.datacontainer.DataPoint` :return: returs a copy of the current :class:`~polyfemos.front.sohplot.datacontainer.DataPoint` instance """ return DataPoint(timestamp=self.__timestamp, dtstr=self.__dtstr, utcdatetime=self.__utcdatetime, y=self.y, z=self.z)
[docs] def __str__(self): """ :rtype: str :return: A some kind of string representation of the values """ return "{} {} {}".format( self.get_dtstr(), self.get_timestamp(), self.y, self.z)
[docs] def __bool__(self): """ :rtype: bool :return: returns ``True`` if ``y`` is not nan """ return self.isnotnan()
[docs] def __eq__(self, other): """ Compares the timestamp and y values between ``self`` and ``other``. If both values are iedntical, returns ``True``. :type other: :class:`~polyfemos.front.sohplot.datacontainer.DataPoint` :param other: :rtype: bool :return: """ if self.get_timestamp() == other.get_timestamp(): if self.isnan() and other.isnan(): return True elif self.y == other.y: return True return False
[docs] def __ne__(self, other): """ :type other: :class:`~polyfemos.front.sohplot.datacontainer.DataPoint` :param other: :rtype: bool :return: opposite of the :meth:`~polyfemos.front.sohplot.datacontainer.DataPoint.__eq__` """ return not self == other
[docs] def __hash__(self): """ :rtype: int :return: hashed tuple containing ``self.__timestamp`` and ``self.y`` values """ if self.__hash is None: self.__hash = hash((self.get_timestamp(), self.y)) return self.__hash
[docs]class DataContainer(object): """ A class to handle timeseries data consisting of :class:`~polyfemos.front.sohplot.datacontainer.DataPoint` instances. The class has ``__setitem__`` and ``__getitem__`` methods so it works like dictionary on that part. """
[docs] def __init__(self, track_datalen=False, remove_identicals=False): """ :type track_datalen: bool, optional :param track_datalen: Defaults to ``False``, If ``True``, the amount of datapoints, nans, etc., is monitored. :type remove_identicals: bool, optional :param remove_identicals: defaults to ``False``. If ``True``, values with identical x and y values are removed. """ self.track_datalen = track_datalen self.remove_identicals = remove_identicals # A list containing DataPoint instances self.datapoints = [] # After advanced outlier removal, this list contains the outlier # datapoints self.outlier_datapoints = [] self.__ys_wo_nans = [] self.__info = [] self.__added_datapoint_hashes = set() self.__identical_count = 0
[docs] def add2info(self, str_): r""" :type str\_: str :param str\_: A notes appended to ``self.__info`` list """ self.__info.append(str_)
[docs] def get_info(self): """ :rtype: list :return: A list containing the info notes """ return self.__info
[docs] def append(self, dp): """ If ``self.remove_identicals`` is ``True``, the datapoint is not included if it has identical x and y values as one of the already added datapoints. :type value: :class:`~polyfemos.front.sohplot.datacontainer.DataPoint` :param value: Addends :class:`~polyfemos.front.sohplot.datacontainer.DataPoint` instance to ``self.datapoints`` """ if self.remove_identicals: dphash = hash(dp) if dphash not in self.__added_datapoint_hashes: self.datapoints.append(dp) self.__added_datapoint_hashes.add(dphash) else: self.__identical_count += 1 else: self.datapoints.append(dp)
[docs] def sort(self): """ Sorts ``self.datapoints`` comparing timestamp values """ self.datapoints = \ sorted(self.datapoints, key=lambda dp: dp.get_timestamp())
[docs] def add_identical_removal_info(self): """ Adds original datapoint and nan amounts and identical datapoint information to ``self.__info``. """ if self.track_datalen: str_ = "" str_ += "dps:*{:*<7}* nans:*{:*<7}*" \ .format(len(self), self.count_nans()) str_ += "identicals removed:*{:*<7}*" \ .format(self.__identical_count) self.add2info(str_)
[docs] @_track_datalen def remove_irrationals(self, irlims=None): """ If :class:`~polyfemos.front.sohplot.datacontainer.DataPoint` instance's ``y`` value is not within the interval (including both ends) defined by ``irlims``, the ``y`` is set to nan in place. :type irlims: list :param irlims: lower and upper limit for irrational values """ if irlims is None or len(irlims) != 2: return irlims = sorted(irlims) for dp in self.datapoints: if irlims[0] <= dp.y <= irlims[1] or dp.isnan(): pass else: dp.tonan()
[docs] @_track_datalen def outlier_removal(self, outlierremfunc): """ Applies advanced outlier removal to ``datapoints`` attribute. Changes ``y`` values of outlying datapoints into nan, while simultaneously creating dataset ``self.outlier_datapoints``, which contains the outliers. Some functions for removing outliers are in :mod:`~polyfemos.data.outlierremover`. :type outlierremfunc: func :param outlierremfunc: a function to remove outliers, arguments of the ``outlierremfunc`` has to be predefined, except the actual data. """ # If outlierremfunc is not a function, do nothing if not callable(outlierremfunc): return if len(self) <= 1: return tempdata = np.array([ [dp.get_timestamp(), dp.y] for dp in self.datapoints ]) mask = outlierremfunc(tempdata) self.outlier_datapoints = [] for m, dp in zip(mask, self.datapoints): copydp = dp.copy() if math.isnan(m): dp.tonan() self.outlier_datapoints.append(copydp.tonan()) elif m: self.outlier_datapoints.append(copydp.tonan()) else: self.outlier_datapoints.append(copydp) dp.tonan()
[docs] @_track_datalen def decimate(self): """ Decimates ``self.datapoints`` list by removing datapoints so that the after decimation, the amount of datapoints in the list is not over ``decimation_limit``, which is set to 10000. Bokeh can plot 10000 datapoints with relative ease but more datapoints than that will result in slow plotting. """ decimation_limit = 10000.0 orig_len = len(self) if orig_len >= decimation_limit: decimation_factor = math.ceil(orig_len / decimation_limit) self.datapoints = self.datapoints[::decimation_factor]
[docs] def get_ys_wo_nans(self, force=False): """ :type force: bool, optional :param force: defaults to ``False``, if ``True`` the returned list is recalculated in every case. :rtype: list :return: A list containing ``y`` values of :class:`~polyfemos.front.sohplot.datacontainer.DataPoint` instances excluding nan values """ if len(self.__ys_wo_nans) < 1 or force: self.__ys_wo_nans = [dp.y for dp in self.datapoints if dp] return self.__ys_wo_nans
[docs] def count_nans(self): """ :rtype: int :return: A count of nan values in ``self.datapoints`` list """ return sum(1 for dp in self.datapoints if not dp)
[docs] def __len__(self): """ :rtype: int :return: length of ``self.datapoints`` """ return len(self.datapoints)