# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# This file is part of Polyfemos.
#
# Polyfemos is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or any later version.
#
# Polyfemos is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License and
# GNU General Public License along with Polyfemos. If not, see
# <https://www.gnu.org/licenses/>.'
#
# Author: Henrik Jänkävaara
# -----------------------------------------------------------------------------
"""
Class :class:`~polyfemos.front.sohplot.sohplot.SOHPlot` for reading and
plotting state of health data
:copyright:
2019, University of Oulu, Sodankyla Geophysical Observatory
:license:
GNU Lesser General Public License v3.0 or later
(https://spdx.org/licenses/LGPL-3.0-or-later.html)
"""
import itertools
import numpy as np
from bokeh.embed import components
from bokeh.plotting import figure, ColumnDataSource
from bokeh.models import (DatetimeTickFormatter, HoverTool, FuncTickFormatter,
Range1d)
from polyfemos.parser import typeoperator as to
from polyfemos.util.messenger import messenger
from polyfemos.util import fileutils
from polyfemos.data import statistics
from polyfemos.almanac.utils import parse_date, get_jY
from polyfemos.front import (colors, userdef, request)
from polyfemos.front.sohplot import offsets, outfilefields
from polyfemos.front.sohplot.datacontainer import DataPoint, DataContainer
[docs]def _get_line_source(data, datarealtimeness=False):
"""
:type data: list
:param data: a list consisting of
:class:`~polyfemos.front.sohplot.datacontainer.DataPoint` instances
:type datarealtimeness: bool, optional
:param datarealtimeness: If ``True``, datarealtimeness starttime value
will be used to plot 'square' plots.
:rtype: :class:`~bokeh.models.sources.ColumnDataSource`
:return:
"""
ys = []
xs = []
xs_str = []
xs_str_ordinal = []
def add_xy(dp):
xs.append(dp.get_timezone_naive_datetime())
xs_str.append(dp.get_dtstr())
xs_str_ordinal.append(dp.get_ordinal())
ys.append(dp.y)
for i, dp in enumerate(data):
# Only for plotting 'square' plot in the case of datarealtimeness
# using z values starttimes
if datarealtimeness:
starttime = None
dict_ = dp.get_z()
if dict_ is not None:
starttime = dict_.get('starttime', None)
if starttime is not None and i > 0:
dpm1 = data[i - 1]
new_dp = DataPoint(dtstr=starttime, y=dp.y)
if new_dp != dpm1:
add_xy(new_dp)
add_xy(dp)
line_source = ColumnDataSource(
data={
'xs': xs, # python datetime object as X axis, local timezone
'ys': ys,
'xs_str': xs_str, # string of datetime for displayed in tooltip
'xs_str_ordinal': xs_str_ordinal,
}
)
return line_source
[docs]def _plot_line_source(bokeh_figure, line_source, color="#000000", cross=False):
"""
:type bokeh_figure: :class:`~bokeh.plotting.figure.Figure`
:param bokeh_figure:
:type line_source: :class:`~bokeh.models.sources.ColumnDataSource`
:param line_source:
:type color: str, optional
:param color: color of the line or cross as string containing RGB value
as hexadecimals (#RRBBGG), defaults to black
:type cross: bool, optional
:param cross: defaults to ``False``. If ``True``, crosses are plotted
in any case. If ``False``, the cross marker is used for plotting
If only 1 datapoint is present.
"""
# If only one datapoint is present, use the cross marker
if len(line_source.data['xs']) <= 1 or cross:
line_ = bokeh_figure.cross(
'xs',
'ys',
source=line_source,
color=color,
line_width=1,
name='line',
alpha=1,
size=20,
)
else:
line_ = bokeh_figure.line(
'xs',
'ys',
source=line_source,
color=color,
line_width=1,
name='line'
)
line_.visible = True
[docs]class SOHPlot(object):
"""
Class for parsing and plotting the state of health data of given
station and sohpar combination.
Reads soh text files (the format is fixed).
Plots the data from startdate to enddate.
Creates statistical summary of the data.
Removal of irrational values and advanced outlier removal possible.
"""
[docs] def __init__(self, station_id="", sohpar_name="", startdate="",
enddate="", headerdate="", outlierremfunc=None,
remove_identicals=False,
remove_irrationals=False, advanced_outlier_removal=False,
fext="stf", track_datalen=False):
"""
On initialization of SOHPlot, the data and header information are
collected from the sohtextfiles. Optionally, irrational and
outlying values are removed.
``startdate``, ``enddate`` and ``headerdate`` are parsed into
python :class:`~datetime.date` instances using
:func:`~polyfemos.almanac.utils.parse_date` function.
:type station_id: str
:param station_id: Station id in format ``NETWORK.STATION``,
for example: ``FN.MSF``
:type sohpar_name: str
:param sohpar_name: State of health parameter available in sohtextfile.
:type startdate: str
:param startdate: The first date of the plotting timespan
:type enddate: str
:param enddate: The last date of the plotting timespan
:type headerdate: str
:param headerdate: The header information of this date's sohtextfile
is used.
:type outlierremfunc: func
:param outlierremfunc: Defaults to ``None``. If no value is given,
``outlierremfunc`` for advanced outlier removal is retrieved
using :func:`~polyfemos.front.userdef.summary_outlierremfuncs`.
For more info see
:meth:`~polyfemos.front.sohplot.datacontainer.DataContainer.outlier_removal`.
:type remove_identicals: bool, optional
:param remove_identicals: defaults to ``False``. If ``True``,
values with identical x and y values are removed.
see :class:`~polyfemos.front.sohplot.datacontainer.DataContainer`
for more info
:type remove_irrationals: bool, optional
:param remove_irrationals: defaults to ``False``, see
:meth:`~polyfemos.front.sohplot.datacontainer.DataContainer.remove_irrationals`
for more info.
:type advanced_outlier_removal: bool, optional
:param advanced_outlier_removal: defaults to ``False``
:type fext: str, optional
:param fext: defaults to "stf", select "stf" or "csv",
defines the datafile format which is read
:type track_datalen: bool, optional
:param track_datalen: Defaults to ``False``, If ``True``, the amount
of datapoints, nans, etc., is monitored.
"""
offsets.init_definitions()
self.station_id = station_id
self.sohpar_name = sohpar_name
self.startdate = parse_date(startdate)
self.enddate = parse_date(enddate)
if self.startdate > self.enddate:
self.startdate, self.enddate = self.enddate, self.startdate
if not headerdate:
self.headerdate = self.enddate
else:
self.headerdate = parse_date(headerdate)
self.data_container = DataContainer(
track_datalen=track_datalen,
remove_identicals=remove_identicals
)
self.header = {}
self.statistics_dict = None
# If selected sohpar is N, E or Z offset and thus needs conversion
nez = self.sohpar_name in offsets.NEZ_OFFSETS
self._read_header(nez=nez)
if nez or fext == "stf":
self._read_stf_data(nez=nez)
elif fext == "csv":
self._read_csv_data()
self.data_container.add_identical_removal_info()
if remove_irrationals:
irlims = self.header["IRLIMS"]
self.data_container.remove_irrationals(irlims=irlims)
if advanced_outlier_removal:
if outlierremfunc is None:
outlierremfunc = userdef.summary_outlierremfuncs(
station_id=self.station_id, sohpar_name=self.sohpar_name)
self.data_container.outlier_removal(outlierremfunc=outlierremfunc)
[docs] def get_info(self):
"""
:rtype: list
:return: A list containing info notes
"""
return self.data_container.get_info()
[docs] def get_statistics_dict(self):
"""
:rtype: dict
:return: see :func:`~polyfemos.data.statistics.get_statistics_dict`
for more info
"""
if self.statistics_dict is None:
self.statistics_dict = statistics.get_statistics_dict(
self.data_container.get_ys_wo_nans(),
thresholds=self.header["YELLOW"],
unit=self.header["UNIT"])
return self.statistics_dict
[docs] def get_statistics_table(self):
"""
:rtype: list
:return: see :func:`~polyfemos.data.statistics.get_statistics_table`
for more info
"""
return statistics.get_statistics_table(self.get_statistics_dict())
[docs] def get_plot_components(self, decimate=True):
"""
:type decimate: bool, optional
:param decimate: defaults to ``True``,
see :func:`~polyfemos.front.sohplot.sohplot.SOHPlot._get_plot`
:rtype: str, str
:return: script and div html blocks, see
:func:`~bokeh.embed.components` for more information.
"""
figure = self._get_plot(decimate=decimate)
if figure is None:
return "", ""
return components(figure)
[docs] def _get_filepath(self, date, fpf, extension, force=False):
"""
:type date: :class:`~obspy.core.utcdatetime.UTCDateTime`
:param date:
is constructed from the ``root`` and ``fpf``, 'root/fpf(args)'
:type fpf: func
:param fpf: filepath format function
:type extension: str
:param extension: file extension
:type force: bool
:param force: see :func:`~polyfemos.front.request.filepath`
:rtype: str
:return: path to file
"""
julday, year = get_jY(date)
network_code, station_code = self.station_id.split(".")
filepath = fpf(
parname=self.sohpar_name,
network_code=network_code,
station_code=station_code,
year=year,
julday=julday)
return request.filepath(filepath, extension, force=force)
[docs] def _get_csv_filepath(self, date, extension=".csv", force=False):
"""
:type date: :class:`~obspy.core.utcdatetime.UTCDateTime`
:param date:
:type force: bool
:param force: see :func:`~polyfemos.front.request.filepath`
:type extension: str
:param extension: file extension, defaults to ``".csv"``
:rtype: str
:return: Filepath following the filepathformat defined in
YAML config files.
"""
return self._get_filepath(date, userdef.filepathformats("csv"),
extension, force=force)
[docs] def _get_stf_filepath(self, date):
"""
:type date: :class:`~obspy.core.utcdatetime.UTCDateTime`
:param date:
:rtype: str
:return: Filepath following the filepathformat defined in
YAML config files.
"""
return self._get_filepath(date, userdef.filepathformats("stf"), ".stf")
[docs] def _read_stf_data(self, nez=False):
"""
Reads and parses the data from sohtextfiles.
:type nez: bool, optional
:param nez: defaults to ``False``. If selected sohpar is N, E or Z
offset, values for U, W and V offsets has to be read.
For information about UWV to NEZ conversion see
:class:`~polyfemos.front.sohplot.offsets.UWVOffsets`
"""
uwvo = offsets.UWVOffsets()
stored_data = []
runningdate = self.startdate
while (runningdate <= self.enddate):
filepath = self._get_stf_filepath(runningdate)
data_scope = False
runningdate += 86400
if not filepath:
continue
for row in fileutils.rowsof(filepath):
# Data values are read after 'DATA' text is encountered
if row[0] == "DATA":
data_scope = True
continue
if data_scope:
if len(row) < 3:
continue
stored_data.append(row)
for row in stored_data:
if nez and row[1] in uwvo.offsets:
uwvo.update(*row[:3])
# If U, W and V offset values are defined, conversion
# can be done
sensor = self.header["SENSOR"]
if uwvo and sensor is not None:
dt, value = \
uwvo.transform(sensor, self.sohpar_name[-1])
uwvo.clear()
else:
continue
elif row[1] == self.sohpar_name:
dt = to.utcdatetime(row[0])
if dt is None:
msg = "Could not convert '{}' to utcdatetime." \
.format(row[0])
messenger(msg, "W")
value = row[2]
else:
continue
dp = DataPoint(utcdatetime=dt, y=value)
if len(row) > 3:
dp.set_z(row[3])
self.data_container.append(dp)
[docs] def _read_csv_data(self):
"""
Reads the data from sohcsv files.
"""
starttimestamp = self.startdate.timestamp
endtimestamp = self.enddate.timestamp + 86399
redd_filepaths = set()
added_days = set()
def read_file(fn, exclude_added_days=False):
if not fn:
return
if fn in redd_filepaths:
return
redd_filepaths.add(fn)
for row in fileutils.read_csv(fn)[1:]:
if len(row) < 2:
continue
dptimestamp = to.float_(row[0])
if dptimestamp is None:
msg = ""
messenger(msg, "W")
elif starttimestamp <= dptimestamp <= endtimestamp:
day = dptimestamp // 86400
if exclude_added_days and day in added_days:
continue
if not exclude_added_days:
added_days.add(day)
dp = DataPoint(timestamp=dptimestamp, y=row[1])
if len(row) > 2:
dp.set_z(row[2])
self.data_container.append(dp)
runningdate = self.startdate
while (runningdate <= self.enddate):
filepath = self._get_csv_filepath(
runningdate, force=True)
retro_filepath = self._get_csv_filepath(
runningdate, extension=".retro.csv", force=True)
read_file(filepath)
read_file(retro_filepath, exclude_added_days=True)
runningdate += 86400
[docs] def _get_plot(self, decimate=True):
"""
Creates a 2D bokeh figure, time in x axis. By default, the data is
plotted as a line, lines are separated if any number of nan values
are between continuous set of valid values. If line consists of only
one value, cross marker used instead of a line.
Colors are as follows:
- Red, actual data
- Brighter red, horizontal lines defined by ``H_LINES``
- Dark green, outlier datapoints
:type decimate: bool, optional
:param decimate: defaults to ``True``. If ``True``, data is decimated.
:rtype: :class:`~bokeh.plotting.figure.Figure`
:return:
"""
BG_ALPHA = 1
PLOT_W = 550
PLOT_H = 480
if len(self.data_container) < 1:
return None
if decimate:
self.data_container.decimate()
self.data_container.sort()
dp0 = self.data_container.datapoints[0]
dp1 = self.data_container.datapoints[-1]
header = "{} {} {} - {}" \
.format(self.station_id, self.sohpar_name,
dp0.get_ordinal(), dp1.get_ordinal())
tools_to_use = \
['pan', 'box_zoom', 'wheel_zoom', 'hover', 'reset', 'save']
bokehfig = figure(
plot_width=PLOT_W,
plot_height=PLOT_H,
tools=tools_to_use,
x_axis_type='datetime',
title=header,
output_backend="webgl",
)
bokehfig.border_fill_color = colors.GREY_1
bokehfig.outline_line_color = colors.GREY_2
bokehfig.background_fill_color = colors.GREY_3
bokehfig.background_fill_alpha = BG_ALPHA
tempunit = self.header["UNIT"]
tempunit = "" if tempunit == "" else " ({})".format(tempunit)
bokehfig.yaxis.axis_label = self.sohpar_name + tempunit
bokehfig.xaxis.axis_label = "UTC Time"
bokehfig.xaxis.axis_label_text_font = "Courier"
bokehfig.yaxis.axis_label_text_font = "Courier"
bokehfig.title.text_font = "Courier"
bokehfig.xaxis.axis_label_text_font_style = "normal"
bokehfig.yaxis.axis_label_text_font_style = "normal"
bokehfig.title.text_font_style = "normal"
bokehfig.xaxis.axis_label_text_color = colors.BLACK
bokehfig.yaxis.axis_label_text_color = colors.BLACK
bokehfig.title.text_color = colors.BLACK
bokehfig.xaxis.axis_line_color = colors.BLACK
bokehfig.yaxis.axis_line_color = colors.BLACK
bokehfig.xaxis.major_label_text_color = colors.GREY_4
bokehfig.yaxis.major_label_text_color = colors.GREY_4
bokehfig.xaxis.minor_tick_line_color = colors.BLACK
bokehfig.yaxis.minor_tick_line_color = colors.BLACK
bokehfig.xaxis.major_tick_line_color = colors.BLACK
bokehfig.yaxis.major_tick_line_color = colors.BLACK
bokehfig.grid.grid_line_color = colors.GREY_2
y_lim = self.header["PLOTLIMS"]
if y_lim and len(y_lim) == 2:
bokehfig.y_range = Range1d(*tuple(y_lim))
bokehfig.yaxis.major_label_orientation = np.pi / 2
ticklabelcode = userdef.ticklabels(self.sohpar_name)
if ticklabelcode:
bokehfig.yaxis.formatter = FuncTickFormatter(code=ticklabelcode)
bokehfig.yaxis.major_label_orientation = np.pi * 7 / 16
# Horisontal lines, multiline
multiline_xs = []
multiline_ys = []
for y in self.header["H_LINES"]:
multiline_xs.append([
dp0.get_timezone_naive_datetime(),
dp1.get_timezone_naive_datetime()
])
multiline_ys.append([y, y])
# Multiline data
multiline_source = ColumnDataSource(
data={
'mlxs': multiline_xs,
'mlys': multiline_ys,
}
)
_ = bokehfig.multi_line(
'mlxs',
'mlys',
source=multiline_source,
color=colors.RED_1,
line_width=1.5)
datarealtimeness = \
self.sohpar_name == userdef.definitions("datarealtimeness")
all_data_split = [list(v) for k, v in itertools.groupby(
self.data_container.datapoints, bool) if k]
for md in all_data_split:
main_line_source = \
_get_line_source(md, datarealtimeness=datarealtimeness)
_plot_line_source(bokehfig, main_line_source, color=colors.RED_3)
if len(self.data_container.outlier_datapoints) > 0:
outlier_data_split = [list(v) for k, v in itertools.groupby(
self.data_container.outlier_datapoints, bool) if k]
for od in outlier_data_split:
outlier_line_source = \
_get_line_source(od, datarealtimeness=datarealtimeness)
_plot_line_source(bokehfig, outlier_line_source,
color=colors.GREEN, cross=True)
hover = bokehfig.select(dict(type=HoverTool))
hover.tooltips = [
("value", "@ys"),
("date", "@xs_str"),
("ordinal", "@xs_str_ordinal"),
]
hover.mode = 'mouse'
hover.names = ['line']
bokehfig.xaxis.formatter = DatetimeTickFormatter(
hours=["%d/%m/%Y %H:%M"],
days=["%d/%m/%Y"],
months=["%m/%Y"],
years=["%Y"],
)
bokehfig.xaxis.major_label_orientation = np.pi / 5
return bokehfig