Source code for polyfemos.data.statistics

# -*- coding: utf-8 -*-
# -----------------------------------------------------------------------------
# This file is part of Polyfemos.
#
# Polyfemos is free software: you can redistribute it and/or modify it under
# the terms of the GNU Lesser General Public License as published by the Free
# Software Foundation, either version 3 of the License, or any later version.
#
# Polyfemos is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License and
# GNU General Public License along with Polyfemos. If not, see
# <https://www.gnu.org/licenses/>.'
#
# Author: Henrik Jänkävaara
# -----------------------------------------------------------------------------
"""
Functions for retrieving statistical info about the data

:copyright:
    2019, University of Oulu, Sodankyla Geophysical Observatory
:license:
    GNU Lesser General Public License v3.0 or later
    (https://spdx.org/licenses/LGPL-3.0-or-later.html)
"""
import math

import numpy as np


[docs]def get_statistics_dict(data, thresholds=[], unit=float('nan')): """ Calculates statistical parameters of the given data :type data: list :param data: 1-D data, without nans :type thresholds: list, optional :param thresholds: Upper and lower thresholds as a list of at most two entries, used to calculate broken threshold percentage :type unit: str, optional :param unit: The unit of the data :rtype: dict :return: dictionary containing statistical information of the given data """ def round_to_n(x_, n): return np.round_(x_, decimals=n) thresholds = thresholds[:2] dict_ = {} dict_["Median"] = float('nan') dict_["Min"] = float('nan') dict_["Max"] = float('nan') dict_["Mean"] = float('nan') dict_["SD"] = float('nan') dict_["CV%"] = float('nan') dict_["TIB%"] = float('nan') dict_["Lower"] = float('nan') dict_["Higher"] = float('nan') dict_["UNIT"] = unit if len(data) <= 0: return dict_ mean = np.mean(data) std = np.std(data) # Coefficient of variation # Actually not very usefull, because same parameters # have negative values if not (math.isnan(std) or math.isnan(mean) or mean < 10e-99): cv = 100.0 * std / mean dict_["CV%"] = round_to_n(cv, 2) dict_["Median"] = round_to_n(np.median(data), 2) dict_["Min"] = round_to_n(np.min(data), 2) dict_["Max"] = round_to_n(np.max(data), 2) dict_["Mean"] = round_to_n(mean, 2) dict_["SD"] = round_to_n(std, 2) percentage = float('nan') lower = float('nan') higher = float('nan') if len(thresholds) > 0: lower = min(thresholds) higher = max(thresholds) def threshold_is_broken(x): if lower == higher: return x < lower else: return not (lower <= x <= higher) percentage = sum(map(threshold_is_broken, data)) percentage /= len(data) percentage *= 100.0 dict_["TIB%"] = round_to_n(percentage, 2) dict_["Lower"] = lower if lower != higher: dict_["Higher"] = higher return dict_
[docs]def get_statistics_table(dict_): """ :type dict\_: dict :param dict\_: statistics dictionary :rtype: list :return: a 2-D list containing statistical information about the selected parameter during selected timespan, each row in table consists of parameter, value and unit """ if dict_ is None: return [[]] dict_ = dict_.copy() unit0 = dict_.pop("UNIT") if "UNIT" in dict_ else "" table = [] for k, v in dict_.items(): unit = "%" if k in {"CV%", "TIB%"} else unit0 table.append([k, v, unit]) return table