Source code for olm.loggers.SchlumbergerCTDToolkit

#Tools for reading and analysis of data from Schlumberger CTD Divers

from pandas import read_csv
from pandas import concat
from pandas import DataFrame

"""
Functions to read Schlumberger diver logger files.
"""

#read in the CSV file from a CTD diver and return a pandas DataFrame

[docs]
def readCTD(csvfile):
    """
    Reads data from a CSV or MON file exported from a Schlumberger CTD Diver.

    Parameters
    ----------
    csv_file : string
        A string containing the file name of the CSV or MON file to be read.
       
    Returns
    -------
    df : pandas.DataFrame
        DataFrame containing data from HOBO csv file.
    """
    if csvfile.endswith('MON'):
        sep = '\s\s\s\s*'
    else:
        sep = ','
#    header = 0
    skiprows = 66 #this is somewhat weak, number of lines could change over time??
    index_col = 0
    names = ['Pressure', 'Temperature', 'Conductivity']
    parse_dates = True
    skipfooter = 1
    df = read_csv(csvfile, sep=sep, names=names, skiprows=skiprows, index_col=index_col, parse_dates=parse_dates, skipfooter=skipfooter)
    return df


#read in the CSV file from a CTD diver and return a pandas DataFrame

[docs]
def readBaro(csvfile):
    """
    Reads data from a CSV or MON file from a Schlumberger Baro Diver.

    Parameters
    ----------
    csv_file : string
        A string containing the file name of the CSV or MON file to be read.
       
    Returns
    -------
    df : pandas.DataFrame
        DataFrame containing data from HOBO csv file.
    """
    if csvfile.endswith('MON'):
        sep = '\s\s\s\s*'
    else:
        sep = ','
#    header = 0
    skiprows = 54 #this is somewhat weak, number of lines could change over time??
    index_col = 0
    names = ['Pressure', 'Temperature']
    parse_dates = True
    skipfooter = 1
    df = read_csv(csvfile, sep=sep, names=names, skiprows=skiprows, index_col=index_col, parse_dates=parse_dates, skipfooter=skipfooter)
    return df



# 
#
# Arguments:
# dflist = a list of dataframes
#
# zero_shift = 
#
# #
# 


[docs]
def concatCTD(dflist, zero_shift = True, n_to_average = 5, offset_list=[], offset_dates = []):
    """
    Accepts a list of CTD DataFrames and concatenates them.

    Parameters
    ----------
    dflist : list
        List of pandas.DataFrames to concatenate.
    zero_shift : boolean
        If set to True, the pressure values will be adjusted at the time of each join, assuming that flow depth before and after the join was equal.  If set to False, no adjustment will be made in pressure values. This is useful when downloading the logger may have resulted in a slightly different position in the water column. (Default = True)
    n_to_average : int
        Number of data points to average before and after join in order to determine data offset value for pressure
    offset_list : list
        List of offsets to be applied manually to pressure data.
    offset_dates : list
        List of datetime strings corresponding to manual offsets.

    Returns
    -------
    (concatenated : pandas.DataFrame, offset_list : pandas.DataFrame)
        A tuple is returned with the first item being a DataFrame object containing the concatenated data and the second item in the tuple being a DataFrame object containing offsets with datetimes of the offsets as an index.
        
    """
    concatenated = None
    if zero_shift == False:
        #concatenate with no shifting
        #note: might want to add some capability to handle overlapping data
        concatenated = concat(dflist)
    else:
        if len(offset_list) > 0:
            #offset each data file by the value in offset list
            if len(offset_list) != len(dflist) - 1:
                print("Number of elements in offset_list must be one less than number of data files to concatenate")
                return None
            else:
                for i, df in enumerate(dflist):
                    if i != 0: #skip first data frame
                        df['Pressure'] = df['Pressure'] + offset_list[i-1]
        else:
            for i, df in enumerate(dflist):
                if i != 0: #skip first data frame
                    #in tail/head we throw out last/first data point
                    #get average value from tail of previous data
                    tail_values = dflist[i-1]['Pressure'][-n_to_average-1:-1]
                    tail_average = tail_values.mean()
                    #get average value from head of following data
                    head_values = df['Pressure'][1:n_to_average+1]
                    head_average = head_values.mean()
                    delta = tail_average - head_average
                    offset_dates.append(df.index[0])
                    offset_list.append(delta)
                    df['Pressure'] = df['Pressure'] + delta
        concatenated = concat(dflist)
    offsets = DataFrame(offset_list, index=offset_dates)
    return (concatenated, offsets)