Source code for sensortoolkit.reference._load_ref_data

# -*- coding: utf-8 -*-
"""
This module contains methods for importing reference datasets (i.e., datasets
containing reference measurements that have been processed into the SDFS format
following use of the ``sensortoolkit.lib_utils.ReferenceSetup()`` method).

================================================================================

@Author:
  | Samuel Frederick, NSSC Contractor (ORAU)
  | U.S. EPA / ORD / CEMM / AMCD / SFSB

Created:
  Tue Jan 28 14:23:44 2020
Last Updated:
  Wed Jul 14 12:44:57 2021
"""
import os
import pandas as pd
from sensortoolkit.datetime_utils import interval_averaging

[docs]def load_ref_dataframes(bdate, edate, path, classes): """Load reference data for the parameters measured by the sensors in the passed sensor dataframe list and for the timeframe indicated by sensor dataset timestamps. Args: sensor_df_list (list): List of sensor dataframes. path (str): Full directory path to reference data. sensor_params (set): A unique listing of parameters measured by the sensor make and model being evaluated. Returns: ref_dict (dict): Dictionary containing reference datasets organized by parameter classification (keys). """ print("Loading reference dataframes") pm_ref_data, gas_ref_data, met_ref_data = False, False, False if 'PM' in classes: pm_ref_data = True if 'Gases' in classes: gas_ref_data = True if 'Met' in classes: met_ref_data = True (pm_h_ref_df, gas_h_ref_df, met_h_ref_df) = (pd.DataFrame(), pd.DataFrame(), pd.DataFrame()) (pm_d_ref_df, gas_d_ref_df, met_d_ref_df) = (pd.DataFrame(), pd.DataFrame(), pd.DataFrame()) for date in pd.date_range(start=bdate, end=edate).to_period('M').unique(): month = str(date.month).zfill(2) year = str(date.year) print('..{0:s}-{1:s}'.format(year, month)) if pm_ref_data is True: # Import 1-hr averaged data pm_h_ref_df = import_ref_dataframe(pm_h_ref_df, path, year, month, suffix='_PM') if met_ref_data is True: # Import 1-hr averaged data met_h_ref_df = import_ref_dataframe(met_h_ref_df, path, year, month, suffix='_Met') if gas_ref_data is True: # Import 1-hr averaged data gas_h_ref_df = import_ref_dataframe(gas_h_ref_df, path, year, month, suffix='_Gases') # Compute 24-hr averaged data print('Computing 24-hour averaged reference datasets') if pm_ref_data is True: pm_d_ref_df = interval_averaging(pm_h_ref_df, freq='D', interval_count=24, thres=0.75) if met_ref_data is True: met_d_ref_df = interval_averaging(met_h_ref_df, freq='D', interval_count=24, thres=0.75) if gas_ref_data is True: gas_d_ref_df = interval_averaging(gas_h_ref_df, freq='D', interval_count=24, thres=0.75) ref_dict = {'PM': {'1-hour': pm_h_ref_df, '24-hour': pm_d_ref_df}, 'Gases': {'1-hour': gas_h_ref_df, '24-hour': gas_d_ref_df}, 'Met': {'1-hour': met_h_ref_df, '24-hour': met_d_ref_df} } return ref_dict
[docs]def import_ref_dataframe(df, path, year, month, suffix=None): """Import reference data for the specified monthly period and parameter classification. Args: df (pandas dataframe): Constructor dataframe containing reference data (FRM/FEM) at 1-hour averaged intervals. Data loaded by this module are appended to the constructor dataframe and returned. path (str): Full path to reference data directory where files are located. year (str): The year (YYYY) for which data will be loaded. month (str): The month (MM) for which data will be loaded. suffix (str): The parameter classification ('PM', 'Gases', or 'Met') indicating the type of reference data to be imported. Returns: df (pandas dataframe): Modified dataframe with imported data appended to the passed dataset. Contains reference data (FRM/FEM) at 1-hour averaged intervals. """ try: filename = f'H_{year}{month}{suffix}.csv' load_df = pd.read_csv(os.path.join(path, filename), parse_dates=['DateTime'], index_col='DateTime') # Append loaded dataframe based on the first instance of a timestamp # index value (i.e., avoid duplicate index values by combining only # new timestamp indecies with the primary dataframe 'df') df = df.combine_first(load_df) print('....' + filename) except FileNotFoundError: print("Warning, file not found:", filename) pass return df