Source code for olm.loggers.loggerScripts

"""
Contains functions that are useful in general for manipulation of data logger data
"""

from pandas import DataFrame, Series, notnull, Timestamp
from matplotlib.dates import date2num
from numpy import arange

#accepts a list of logger DateFrame objects as first argument
[docs] def joinLoggers(loggerlist, how='inner', interpolate = False): """ Joins the data from a list of logger DataFrame objects together. Parameters ---------- loggerlist : list A list of logger DataFrame objects to be joined. how : string, optional How the two DataFrames are to be joined. Default is inner. interpolate : boolean, optional Determines whether empty rows are to be filled with data via interpolation. Uses Pandas Dataframe.interpolate(). Default = False Returns ------- joined : pandas.DataFrame DataFrame of joined loggers. """ #merge data from multiple loggers if type(loggerlist) == list: joined = loggerlist[0].join(loggerlist[1:], how=how) if interpolate: for col in joined.columns: filled_col = joined[col].interpolate() joined[col] = filled_col return joined else: print("Problem with input list: Need to input a list of DataFrame objects") return None
[docs] def joinAndResampleLoggers(loggerlist, interval, suffixes=[], how='inner', interpolate=False, limit=None): """ Joins and resamples data from DataFrame objects provided in a list. Parameters ---------- loggerlist : list List of logger pandas.core.dataframe.DataFrame objects to be joined. interval : string Pandas offset string (http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases) on which the DataFrames should be resampled (e.g. 'H'=hour, 'T'=minute, 'D'=day). suffixes : list A list of strings (same length as loggerlist) that contains suffixes to be applied to each logger. This is useful if multiple loggers have the same column names. how : string Method for joining loggers (default = 'inner'). interpolate : boolean Whether data should be interpolated to fill gaps in rows (default=False). limit : int Maximum number of consecutive NaNs to fill if data are interpolated. Returns ------- joined : pandas.core.dataframe.DataFrame DataFrame Object that contains joined DataFrames. """ #If no suffixes were passed, create a list full of None values # this keeps suffixes from being added in the code below if suffixes==[]: for i in arange(len(loggerlist)): suffixes.append(None) resampledList = [] if type(loggerlist)==list: #print "Processing list type loggerlist..." for i,logger in enumerate(loggerlist): if suffixes[i]!=None: logger.columns+='_'+suffixes[i] resampledList.append(logger.resample(interval).mean()) elif type(loggerlist)==dict: #print "Processing dict type loggerlist..." for logger_key in list(loggerlist.keys()): logger = loggerlist[logger_key] if type(suffixes)==dict: if suffixes[logger_key]!=None: logger.columns+='_'+suffixes[logger_key] resampledList.append(logger.resample(interval).mean()) else: print("Problem with suffixes. If loggerlist is a dict, suffixes also must be a dict.") return None else: print("Problem with logger list: Need to input a list or dict of DataFrame or Series objects") return None for i, logger in enumerate(resampledList): if i==0: joined=logger # elif i==1: # joined=joined.join(logger, how=how, lsuffix='_'+suffixes[0], rsuffix='_'+suffixes[1]) # elif i==3: # return joined else: joined=joined.join(logger, how=how)#, rsuffix='_'+suffixes[i]) if interpolate: for col in joined.columns: # print joined # print col filled_col = joined[col].interpolate(limit=limit) joined[col] = filled_col return joined
[docs] def linear_correction(rawSeries, correctionSeries): """ Uses a Pandas Series of spot measured values to linearly correct time series data from a logger. Parameters ---------- rawSeries : pandas.core.series.Series A Pandas Series that contains the time series data to be corrected. correctionSeries : pandas.core.series.Series A Pandas Series that contains spot measurement data that are to be used to correct rawSeries. Returns ------- rawSeries : pandas.core.series.Series A corrected version of the rawSeries time series data. """ #loop through correction series and calculate multiplying factors corrDict = {} for date, measurement in correctionSeries.items(): candidates = rawSeries.index[notnull(rawSeries)] index = candidates.searchsorted(date) if index > 0: asOfDate = candidates[index - 1] this_k = measurement/rawSeries[asOfDate] corrDict[asOfDate]= this_k else: asOfDate = rawSeries.index[0] if not rawSeries.index[0] in corrDict: corrDict[rawSeries.index[0]]=1 if not rawSeries.index[-1] in corrDict: corrDict[rawSeries.index[-1]] = corrDict[asOfDate] k_series = Series(corrDict) joined_series = DataFrame({'raw':rawSeries,'k':k_series}) joined_series.k = joined_series.k.interpolate() rawSeries = rawSeries*joined_series.k return rawSeries
[docs] def manualCompare(logger, manual, value_name='', ltag='_log', mtag='_man'): """ Function to extract logger data with same timestamps as manual measurements for comparison. Both data sets are resampled on an hourly interval to assure alignment of indicies. Parameters ---------- logger : pandas.core.series.Series A Pandas TimeSeries containing a single column and time stamps as indices. manual : pandas.core.series.Series A Pandas TimeSeries containing a single variable from the manual data set value_name : string A string to use as the base for the collumn labels in the output DataFrame. ltag : string A suffix to be added to the logger column name, or used as the logger column name if value_name is not set. mtag : string A suffix to be added to the manual measurement column name, or used as the manual measurement column name if value_name is not set. Returns ------- joined : pandas.core.dataframe.DataFrame A DataFrame object containing values of manual measurements and corresponding values from the logger time series using the aligned index that is resampled to the hour. """ if not(value_name==''): value_name += '_' logger = resampleHourly(logger) manual = resampleHourly(manual) wantidx = manual.index logger = logger[wantidx] joined = DataFrame({value_name+ltag:logger, value_name+rtag:manual}) return joined
[docs] def shiftLogger(logger, shift_to, align_at_start = True): """ Function to shift DataFrame timestamps to start or end at a specific datetime. Parameters ---------- logger : pandas.core.series.Series or pandas.core.dataframe.Dataframe A Pandas TimeSeries or DataFrame containing time stamps as indices. shift_to : string A string that contains the date and time that the logger series should be shifted to. By default this is the correct starting time (first time stamp) of the series. align_at_start : boolean If True, shift_to is assumed to represent the correct starting date for the series. If False, shift_to is assumed to represent the correct final date of the series. (default=True) Returns ------- logger : pandas.core.series.Series or pandas.core.dataframe.DataFrame A Series or DataFrame object that contains the correct shifted time stamps. """ bad_times = logger.index #align at starting time stamp if align_at_start: start_time = Timestamp(shift_to) dt = start_time - bad_times[0] #align at ending time stamp else: end_time = Timestamp(shift_to) dt = end_time - bad_times[-1] #shift index of original logger time series logger.index = logger.index + dt return logger