Source code for olm.loggers.loggerScripts

"""
Contains functions that are useful in general for manipulation of data logger data
"""

from pandas import datetools, DataFrame, Series, notnull
from matplotlib.dates import date2num

#accepts a list of logger DateFrame objects as first argument
[docs]def joinLoggers(loggerlist, how='inner', interpolate = False): """ Joins the data from a list of logger DataFrame objects together. Parameters ---------- loggerlist : list A list of logger DataFrame objects to be joined. how : string, optional How the two DataFrames are to be joined. Default is inner. interpolate : boolean, optional Determines whether empty rows are to be filled with data via interpolation. Uses Pandas Dataframe.interpolate(). Default = False Returns ------- joined : pandas.DataFrame DataFrame of joined loggers. """ #merge data from multiple loggers if type(loggerlist) == list: joined = loggerlist[0].join(loggerlist[1:], how=how) if interpolate: for col in joined.columns: filled_col = joined[col].interpolate() joined[col] = filled_col return joined else: print "Problem with input list: Need to input a list of DataFrame objects" return None
[docs]def joinAndResampleLoggers(loggerlist, interval, suffixes=[], how='inner', interpolate=False, limit=None): """ Joins and resamples data from DataFrame objects provided in a list. Parameters ---------- loggerlist : list List of logger pandas.core.dataframe.DataFrame objects to be joined. interval : string Pandas offset string (http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases) on which the DataFrames should be resampled (e.g. 'H'=hour, 'T'=minute, 'D'=day). suffixes : list A list of strings (same length as loggerlist) that contains suffixes to be applied to each logger. This is useful if multiple loggers have the same column names. how : string Method for joining loggers (default = 'inner'). interpolate : boolean Whether data should be interpolated to fill gaps in rows (default=False). limit : int Maximum number of consecutive NaNs to fill if data are interpolated. Returns ------- joined : pandas.core.dataframe.DataFrame DataFrame Object that contains joined DataFrames. """ #If no suffixes were passed, create a list full of None values # this keeps suffixes from being added in the code below if suffixes==[]: for i in arange(len(loggerlist)): suffixes.append(None) resampledList = [] if type(loggerlist)==list: #print "Processing list type loggerlist..." for i,logger in enumerate(loggerlist): if suffixes[i]!=None: logger.columns+='_'+suffixes[i] resampledList.append(logger.resample(interval)) elif type(loggerlist)==dict: #print "Processing dict type loggerlist..." for logger_key in loggerlist.keys(): logger = loggerlist[logger_key] if type(suffixes)==dict: if suffixes[logger_key]!=None: logger.columns+='_'+suffixes[logger_key] resampledList.append(logger.resample(interval)) else: print "Problem with suffixes. If loggerlist is a dict, suffixes also must be a dict." return None else: print "Problem with logger list: Need to input a list or dict of DataFrame or Series objects" return None for i, logger in enumerate(resampledList): if i==0: joined=logger # elif i==1: # joined=joined.join(logger, how=how, lsuffix='_'+suffixes[0], rsuffix='_'+suffixes[1]) # elif i==3: # return joined else: joined=joined.join(logger, how=how)#, rsuffix='_'+suffixes[i]) if interpolate: for col in joined.columns: print joined print col filled_col = joined[col].interpolate(limit=limit) joined[col] = filled_col return joined
[docs]def linear_correction(rawSeries, correctionSeries): """ Uses a Pandas Series of spot measured values to linearly correct time series data from a logger. Parameters ---------- rawSeries : pandas.core.series.Series A Pandas Series that contains the time series data to be corrected. correctionSeries : pandas.core.series.Series A Pandas Series that contains spot measurement data that are to be used to correct rawSeries. Returns ------- rawSeries : pandas.core.series.Series A corrected version of the rawSeries time series data. """ #loop through correction series and calculate multiplying factors corrDict = {} for date, measurement in correctionSeries.iteritems(): candidates = rawSeries.index[notnull(rawSeries)] index = candidates.searchsorted(date) if index > 0: asOfDate = candidates[index - 1] this_k = measurement/rawSeries[asOfDate] corrDict[asOfDate]= this_k else: asOfDate = rawSeries.index[0] if not rawSeries.index[0] in corrDict: corrDict[rawSeries.index[0]]=1 if not rawSeries.index[-1] in corrDict: corrDict[rawSeries.index[-1]] = corrDict[asOfDate] k_series = Series(corrDict) joined_series = DataFrame({'raw':rawSeries,'k':k_series}) joined_series.k = joined_series.k.interpolate() rawSeries = rawSeries*joined_series.k return rawSeries
[docs]def manualCompare(logger, manual, value_name='', ltag='_log', mtag='_man'): """ Function to extract logger data with same timestamps as manual measurements for comparison. Both data sets are resampled on an hourly interval to assure alignment of indicies. Parameters ---------- logger : pandas.core.series.Series A Pandas TimeSeries containing a single column and time stamps as indices. manual : pandas.core.series.Series A Pandas TimeSeries containing a single variable from the manual data set value_name : string A string to use as the base for the collumn labels in the output DataFrame. ltag : string A suffix to be added to the logger column name, or used as the logger column name if value_name is not set. mtag : string A suffix to be added to the manual measurement column name, or used as the manual measurement column name if value_name is not set. Returns ------- joined : pandas.core.dataframe.DataFrame A DataFrame object containing values of manual measurements and corresponding values from the logger time series using the aligned index that is resampled to the hour. """ if not(value_name==''): value_name += '_' logger = resampleHourly(logger) manual = resampleHourly(manual) wantidx = manual.index logger = logger[wantidx] joined = DataFrame({value_name+ltag:logger, value_name+rtag:manual}) return joined