Source code for olm.loggers.SchlumbergerCTDToolkit
#Tools for reading and analysis of data from Schlumberger CTD Divers
from pandas import read_csv
from pandas import concat
from pandas import DataFrame
"""
Functions to read Schlumberger diver logger files.
"""
#read in the CSV file from a CTD diver and return a pandas DataFrame
[docs]
def readCTD(csvfile):
"""
Reads data from a CSV or MON file exported from a Schlumberger CTD Diver.
Parameters
----------
csv_file : string
A string containing the file name of the CSV or MON file to be read.
Returns
-------
df : pandas.DataFrame
DataFrame containing data from HOBO csv file.
"""
if csvfile.endswith('MON'):
sep = '\s\s\s\s*'
else:
sep = ','
# header = 0
skiprows = 66 #this is somewhat weak, number of lines could change over time??
index_col = 0
names = ['Pressure', 'Temperature', 'Conductivity']
parse_dates = True
skipfooter = 1
df = read_csv(csvfile, sep=sep, names=names, skiprows=skiprows, index_col=index_col, parse_dates=parse_dates, skipfooter=skipfooter)
return df
#read in the CSV file from a CTD diver and return a pandas DataFrame
[docs]
def readBaro(csvfile):
"""
Reads data from a CSV or MON file from a Schlumberger Baro Diver.
Parameters
----------
csv_file : string
A string containing the file name of the CSV or MON file to be read.
Returns
-------
df : pandas.DataFrame
DataFrame containing data from HOBO csv file.
"""
if csvfile.endswith('MON'):
sep = '\s\s\s\s*'
else:
sep = ','
# header = 0
skiprows = 54 #this is somewhat weak, number of lines could change over time??
index_col = 0
names = ['Pressure', 'Temperature']
parse_dates = True
skipfooter = 1
df = read_csv(csvfile, sep=sep, names=names, skiprows=skiprows, index_col=index_col, parse_dates=parse_dates, skipfooter=skipfooter)
return df
#
#
# Arguments:
# dflist = a list of dataframes
#
# zero_shift =
#
# #
#
[docs]
def concatCTD(dflist, zero_shift = True, n_to_average = 5, offset_list=[], offset_dates = []):
"""
Accepts a list of CTD DataFrames and concatenates them.
Parameters
----------
dflist : list
List of pandas.DataFrames to concatenate.
zero_shift : boolean
If set to True, the pressure values will be adjusted at the time of each join, assuming that flow depth before and after the join was equal. If set to False, no adjustment will be made in pressure values. This is useful when downloading the logger may have resulted in a slightly different position in the water column. (Default = True)
n_to_average : int
Number of data points to average before and after join in order to determine data offset value for pressure
offset_list : list
List of offsets to be applied manually to pressure data.
offset_dates : list
List of datetime strings corresponding to manual offsets.
Returns
-------
(concatenated : pandas.DataFrame, offset_list : pandas.DataFrame)
A tuple is returned with the first item being a DataFrame object containing the concatenated data and the second item in the tuple being a DataFrame object containing offsets with datetimes of the offsets as an index.
"""
concatenated = None
if zero_shift == False:
#concatenate with no shifting
#note: might want to add some capability to handle overlapping data
concatenated = concat(dflist)
else:
if len(offset_list) > 0:
#offset each data file by the value in offset list
if len(offset_list) != len(dflist) - 1:
print("Number of elements in offset_list must be one less than number of data files to concatenate")
return None
else:
for i, df in enumerate(dflist):
if i != 0: #skip first data frame
df['Pressure'] = df['Pressure'] + offset_list[i-1]
else:
for i, df in enumerate(dflist):
if i != 0: #skip first data frame
#in tail/head we throw out last/first data point
#get average value from tail of previous data
tail_values = dflist[i-1]['Pressure'][-n_to_average-1:-1]
tail_average = tail_values.mean()
#get average value from head of following data
head_values = df['Pressure'][1:n_to_average+1]
head_average = head_values.mean()
delta = tail_average - head_average
offset_dates.append(df.index[0])
offset_list.append(delta)
df['Pressure'] = df['Pressure'] + delta
concatenated = concat(dflist)
offsets = DataFrame(offset_list, index=offset_dates)
return (concatenated, offsets)