Source code for olm.USGS.PhreeqcPandas

#! /usr/bin/env python

Collection of functions to interface between WaterChem and PHREEQC.
from __future__ import print_function
from numpy import isnan, size
from scipy.optimize import brentq
from pandas import DataFrame, to_numeric
import pickle as pickle
import os
import subprocess
import xlrd
import sys
from olm.USGS.loadWaterQualityData import loadSiteListData
from olm.general import molL_to_mgL

default_phreeqc_to_WQX_translation = {
    "Temperature, water": "temp",
    "pH": "pH",
    "Alkalinity, total": "Alkalinity",
    "Alkalinity": "Alkalinity",
    "Calcium": "Ca",
    "Magnesium": "Mg",
    "Potassium": "K",
    "Sodium": "Na",
    "Sulfate": "S",
    "Nitrate": "N",
    "Chloride": "Cl",

[docs] def readPhreeqcOutput(phreeqcOutputFile): """ Reads the output file from a PHREEQC simulation run. Parameters ---------- phreeqcOutputFile : string File containing PHREEQC output that should be read. Returns ------- simulationDict : dict A dictionary containing outputs from the PHREEQC simulation. Notes ----- While this function can be used on its own. It is primarily designed to be used by the processMidf function, which provides a more convenient interface. """ with open(phreeqcOutputFile, "r", encoding="latin_1") as phreeqc_output: simulationDict = {} # loop through all lines of file block = "beginning" endOfRun = False for line in phreeqc_output: if not (endOfRun): if "ERROR" in line: print("PHREEQC has encountered an error while running this sample.") return None if block == "beginning": # print 'beginning' if "Reading input data for simulation" in line: block = "simulation" elif block == "simulation": if "Solution composition" in line: block = "composition" if "End of run" in line: endOfRun = True elif block == "composition": if not ("Elements" in line): linestrip = line.strip() linesplit = linestrip.split() if "Description of solution" in line: block = "description" elif len(linesplit) == 3: simulationDict[linesplit[0]] = to_numeric( linesplit[1], errors="coerce" ) elif block == "description": if "Electrical balance" in line: linestrip = line.strip() linesplit = linestrip.split() balance = linesplit[4] simulationDict["Electrical balance"] = to_numeric( balance, errors="coerce" ) if "Percent error" in line: linestrip = line.strip() linesplit = linestrip.split() error = linesplit[4] simulationDict["Percent error"] = to_numeric( error, errors="coerce" ) if "Distribution of species" in line: block = "distribution" elif block == "distribution": if not ("Species" in line) and not ("Log" in line): linestrip = line.strip() linesplit = linestrip.split() if "Saturation indices" in line: # this line should not be included in molality, hence the elif block = "SI" elif len(linesplit) > 2: # line to retreive individual species simulationDict[linesplit[0] + "_Molality"] = to_numeric( linesplit[1], errors="coerce" ) simulationDict[linesplit[0] + "_Activity"] = to_numeric( linesplit[2], errors="coerce" ) elif block == "SI": if not ("Phase" in line): linestrip = line.strip() linesplit = linestrip.split() if len(linesplit) == 5: simulationDict["SI_" + linesplit[0]] = to_numeric( linesplit[1], errors="coerce" ) if "End of simulation" in line: # add simulation to list block = "simulation" return simulationDict
[docs] def writePhreeqcInput( sample_row, phreeqc_file_name, phreeqcDict=default_phreeqc_to_WQX_translation, datetext="", charge=None, ): """ Writes a PHREEQC input file using the row from a site DataFrame. Parameters ---------- sample_row : pandas data frame row The row from the pandas data frame for the site and date to be processed. phreeqc_file_name : str Name of PHREEQC input file to write. phreeqcDict : dict a dictionary with WQX characteristics as keys and phreeqc chemical names as entries. By default, processMidf will use the built in translation dict, default_phreeqc_to_WQX_translation. datetext : str String containing the text that describes the date as it should be written into the PHREEQC input file. charge : str String containing name of element (or pH) that should be adjusted to obtain charge balance. This is done internally by PHREEQC. (Default=None) Returns ------- status : int OK if equal to 1. Error writing to file if equal to -1. """ try: phreeqc_input_file = open(phreeqc_file_name, "w") print("SOLUTION " + " on " + datetext, file=phreeqc_input_file) print("units mg/l", file=phreeqc_input_file) # loop through all characteristics that should be included in PHREEQC analysis for characteristic in phreeqcDict.keys(): # Check to see if this characteristic is in our DataFrame if characteristic in list(sample_row.keys()): # Check for cases with duplicate dates and no time information # In this case, get only first sample on that date if size(sample_row.shape) > 1 and min(sample_row.shape) > 1: sample_row = sample_row.iloc[0] if not isnan(sample_row[characteristic]): if phreeqcDict[characteristic] == charge: print( phreeqcDict[characteristic] + " " + str(sample_row[characteristic]) + " " + "charge", file=phreeqc_input_file, ) else: print( phreeqcDict[characteristic] + " " + str(sample_row[characteristic]), file=phreeqc_input_file, ) print("END", file=phreeqc_input_file) phreeqc_input_file.close() return 1 except IOError: phreeqc_input_file.close() print("Problem opening sample PHREEQC input file.") return -1
[docs] def phreeqcRunSetPCO2( logPCO2, phreeqcInputFile, PHREEQC_PATH, DATABASE_FILE, newInputFile=None ): """ Reads a PHREEQC input file, modifies it to a set PCO2 and runs the modified file. Parameters ---------- phreecInputFile : str The name of the input file to modify. logPCO2 : float Log10 of the PCO2 value to set the sample to. PHREEQC_PATH : string The path to the PHREEQC executable. DATABASE_FILE : string The database file to be used by PHREEQC. newInputFile : string The name of the set-PCO2 file to create and run. If not specified, the final 5 characters will be stripped off of phreeqcInputFile and 'set-PCO2.phrq' will be added. Returns ------- simulationDict : Dict A dictionary that contains the output of readPhreeqcOutput() run on the set PCO2 simulation. """ with open(phreeqcInputFile) as f: input_file_text = f.readlines() wrote_CO2_line = False PCO2_str = "\tC\t1\tCO2(g)\t" + str(logPCO2) + "\n" for linenum, input_line in enumerate(input_file_text): # If there is a pH input line we will overwrite it if "pH" in input_line: if not wrote_CO2_line: input_file_text[linenum] = PCO2_str wrote_CO2_line = True else: print( "There is more than one line with pH. This code only works for PHREEQC inputs with a single solution. Check input file!" ) # If there was no pH line, we still need to add the PCO2 line if not wrote_CO2_line: for linenum, input_line in enumerate(input_file_text): if "SOLUTION" in input_line: if not wrote_CO2_line: input_file_text.insert(linenum + 1, PCO2_str) wrote_CO2_line = True else: print( "There is more than one line with SOLUTION. This code only works for PHREEQC inputs with a single solution. Check input file!" ) # Write out new input file text with set PCO2 if newInputFile == None: newInputFile = phreeqcInputFile[:-5] + "-set-PCO2.phrq" with open(newInputFile, "w") as f: for line in input_file_text: f.write(line) # run phreeqc on the input file just created blank = "" split_file_name = newInputFile.split(".") split_list = split_file_name[:-1] # Trim off file ending phreeqcOutputFile = blank.join(split_list) + ".out" retcode = [ os.path.join(PHREEQC_PATH, "phreeqc"), newInputFile, phreeqcOutputFile, DATABASE_FILE, ] ) print("PHREEQC return code = ", retcode) if retcode == 0: simulationDict = readPhreeqcOutput(phreeqcOutputFile) else: # This catches cases where PHREEQC fails (e.g. doesn't converge) simulationDict = None return simulationDict
# solution_inputs = readPhreeqcOutput(phreeqcOutputFile) # solution_inputs['C']= ' 1 CO2(g) ' + str(logPCO2) # writePhreeqcInput(solution_inputs)
[docs] def calciteSaturationAtFixedPCO2( logPCO2, phreeqcInputFile, PHREEQC_PATH, DATABASE_FILE, newInputFile=None ): """ Function used in root finding of saturation PCO2. Function is used by findPCO2atCalciteSaturation(). As a stand alone function, it's better to use phreeqcRunSetPCO2(). Parameters ---------- phreecInputFile : str The name of the input file to modify. logPCO2 : float Log10 of the PCO2 value to set the sample to. PHREEQC_PATH : string The path to the PHREEQC executable. DATABASE_FILE : string The database file to be used by PHREEQC. newInputFile : string The name of the set-PCO2 file to create and run. If not specified, the final 5 characters will be stripped off of phreeqcInputFile and 'set-PCO2.phrq' will be added. Returns ------- SI : float Saturation index of calcite for given PCO2. """ simulationDict = phreeqcRunSetPCO2( logPCO2, phreeqcInputFile, PHREEQC_PATH, DATABASE_FILE, newInputFile=newInputFile, ) return simulationDict["SI_Calcite"]
[docs] def findPCO2atCalciteSaturation( phreeqcInputFile, PHREEQC_PATH, DATABASE_FILE, newInputFile=None, units="atm" ): """ Finds the PCO2 where calcite would be saturated by modifing PCO2 of a PHREEQC input file. Parameters ---------- phreecInputFile : str The name of the input file to modify. PHREEQC_PATH : string The path to the PHREEQC executable. DATABASE_FILE : string The database file to be used by PHREEQC. newInputFile : string The name of the set-PCO2 file to create and run. If not specified, the final 5 characters will be stripped off of phreeqcInputFile and 'set-PCO2.phrq' will be added. units: string The units in which to return the partial pressure of CO2. Currently, 'ppm' and 'atm' are allowed (Default=atm). Returns ------- PCO2 : float The PCO2 at which calcite would be in equilibrium. """ args = (phreeqcInputFile, PHREEQC_PATH, DATABASE_FILE, newInputFile) eqLogPCO2 = brentq(calciteSaturationAtFixedPCO2, -6.0, 0.0, args=args, xtol=0.01) if units == "ppm": PCO2 = 1e6 * 10.0 ** eqLogPCO2 elif units == "atm": PCO2 = 10.0 ** eqLogPCO2 else: print("Specified units were not valid, use 'ppm' or 'atm'.") PCO2 = None return PCO2
[docs] def processMidf( site_midf, site_dir, PHREEQC_PATH, DATABASE_FILE, phreeqcDict=None, force_balance="", ): """ Takes a WQXtoPandas site multi-indexed DataFrame and runs all samples through PHREEQC, returning a DataFrame of the outputs that is indexed by date. Will run automatically within WQXtoPandas if specified in the excel start file. However, the function can also be called later by reading in a pickled site multi-indexed DataFrame. Parameters ---------- site_midf : Pandas DataFrame The Pandas multi-indexed DataFrame object produced for each site by WQXtoPandas that is to be processed through PHREEQC. site_dir : string The directory containing the site data, and where the results should be output. PHREEQC_PATH : string The path to the PHREEQC executable. DATABASE_FILE : string The database file to be used by PHREEQC. phreeqcDict : dict a dictionary with WQX characteristics as keys and phreeqc chemical names as entries. By default, processMidf will use the built in translation dict, default_phreeqc_to_WQX_translation. force_balance : str PHREEQC should force charge balance on the ion indicated in this string. Use the string that represents the ion internally in PHREEQC. It is also possible to force balance on pH using force_balance='pH', or on alkalinity using force_balance='Alk' """ if phreeqcDict is None: phreeqcDict = default_phreeqc_to_WQX_translation output_dates = [] phreeqc_data_list = [] df = site_midf["data"] dates = df.index num_converged = 0 total_num = dates.size for date in dates: sample_row = df.loc[date] datetext = if not (os.path.exists(site_dir)): try: os.makedirs(site_dir) except os.error: print(("Problem creating location directory: " + site_dir)) return -1 if force_balance == "": phreeqc_file_name = os.path.join(site_dir, datetext + ".phrq") writePhreeqcInput( sample_row, phreeqc_file_name, phreeqcDict=phreeqcDict, datetext=datetext, ) else: phreeqc_file_name = os.path.join( site_dir, datetext + "-" + force_balance + ".phrq" ) if force_balance == "Alk": writePhreeqcInput( sample_row, phreeqc_file_name, phreeqcDict=phreeqcDict, datetext=datetext, ) else: writePhreeqcInput( sample_row, phreeqc_file_name, phreeqcDict=phreeqcDict, datetext=datetext, charge=force_balance, ) # run phreeqc on the input file just created phreeqcOutputFile = phreeqc_file_name[:-4] + "out" try: retcode = [ os.path.join(PHREEQC_PATH, "phreeqc"), phreeqc_file_name, phreeqcOutputFile, DATABASE_FILE, ] ) print("PHREEQC return code = ", retcode) except IOError: print("Problem running PHREEQC. Check path for executable and database.") print("PHREEQC path used was: ", PHREEQC_PATH) print("PHREEQC database path used was: ", DATABASE_FILE) retcode = -1 if retcode == 0: simulationDict = readPhreeqcOutput(phreeqcOutputFile) else: # This catches cases where PHREEQC fails (e.g. doesn't converge) simulationDict = None if force_balance == "Alk" and simulationDict is not None: # Force charge balance on Alkalinity alk_converged = False number_of_iterations = 0 max_iterations = 10 while not (alk_converged): if simulationDict is not None: # Retrieve charge balance error balance_eq = simulationDict["Electrical balance"] print("balance_eq=", balance_eq) if "Alkalinity" in simulationDict: alk = simulationDict["Alkalinity"] else: # Note: Some samples seem to have zero measured alkalinity # might think about conditions to test for under which # convergence doesn't happen or we should quit trying. # Perhaps negative alkalinity?? alk = 0.0 print("alk=", alk) New_alk_molL = ( alk + balance_eq ) # attempt to stabilize this using a factor of 0.9. Otherwise it seems to overshoot. this didn't work. if ( New_alk_molL < 0 ): # PHREEQC won't take negative alkalinity inputs New_alk_molL = alk * 0.5 print("new alk=", New_alk_molL) New_alk_mgL = 0.5 * molL_to_mgL( New_alk_molL, "CaCO3" ) # PHREEQC wants it as 0.5CaCO3 print("new alk mg/L=", New_alk_mgL) sample_row["Alkalinity"] = New_alk_mgL writePhreeqcInput( sample_row, phreeqc_file_name, phreeqcDict=phreeqcDict, datetext=datetext, ) retcode = [ os.path.join(PHREEQC_PATH, "phreeqc"), phreeqc_file_name, phreeqcOutputFile, DATABASE_FILE, ] ) print("PHREEQC return code = ", retcode) if retcode == 0: simulationDict = readPhreeqcOutput(phreeqcOutputFile) number_of_iterations = number_of_iterations + 1 if abs(simulationDict["Percent error"]) < 5.0: alk_converged = True num_converged = num_converged + 1 else: # This catches cases where PHREEQC returns an error (e.g. doesn't converge) simulationDict = None if number_of_iterations > max_iterations: simulationDict = ( None # This will cause this case to be thrown out below ) else: alk_converged = True print("PHREEQC encountered an error while processing this sample.") phreeqcData = {} phreeqcError = True processThisSample = False reason = ( "PHREEQC encountered an error while processing this sample. Return code = " + str(retcode) ) if simulationDict is not None: # append dates and list of dicts to output lists output_dates.append(date) # add this date to index list (row labels) phreeqc_data_list.append(simulationDict) phreeqcError = False else: print("PHREEQC encountered an error while processing this sample.") phreeqcData = {} phreeqcError = True processThisSample = False reason = ( "PHREEQC encountered an error while processing this sample. Return code = " + str(retcode) ) # create dataframe from phreeqc outputs phreeqc_df = DataFrame(phreeqc_data_list, index=output_dates) if force_balance == "Alk": print( "Number of converged alkalinity forced charge balance cases = ", num_converged, ) print("Total number of samples = ", total_num) return phreeqc_df
def processSites( sitesDir, PHREEQC_PATH, DATABASE_FILE, phreeqcDict=None, regEx="USGS-*", bracket_charge_balance=False, process_regular=True, ): """ Processes all data from site directories in PHREEQC. Parameters ---------- sitesDir : str The directory that contains the site directories to be processed. PHREEQC_PATH : str The path to the phreeqc executable. DATABASE_FILE : str The path to the phreeqc database file to be used. phreeqcDict : dict a dictionary with WQX characteristics as keys and phreeqc chemical names as entries. By default, processMidf will use the built in translation dict, default_phreeqc_to_WQX_translation. regEx : str (optional) A regular expression to be used to locate site directories. (default = 'USGS-') bracket_charge_balance : bool If set to True, then charge balance will be bracketed by forcing balance on Ca and Alkalinity alternately. Default = False. process_regular : bool If set to True, then PHREEQC will be run without any charge balance forcing. This can be set to False if the calculations without forced balance are already done and you only want to do the charge balance runs. Default = True. Returns ------- None """ sitesDict = loadSiteListData(regEx=regEx, processedSitesDir=sitesDir) if process_regular: # Run PHREEQC on sites without forced charge balance for site, site_midf in sitesDict.items(): print(("Processing " + site + " in PHREEQC")) sitedf = processMidf( site_midf, os.path.join(sitesDir, site), PHREEQC_PATH=PHREEQC_PATH, DATABASE_FILE=DATABASE_FILE, phreeqcDict=phreeqcDict, ) phreeqc_site_file = os.path.join(sitesDir, site, site + "-PHREEQC.pkl") try: pickle.dump(sitedf, open(phreeqc_site_file, "wb")) sitedf.to_csv(phreeqc_site_file[:-3] + "csv") except IOError: print("Problem writing out PHREEQC data file.") if bracket_charge_balance: # Run PHREEQC using bracketed charge balance for sites for site, site_midf in sitesDict.items(): # Force balance on Calcium phreeqc_df_ca = processMidf( site_midf, os.path.join(sitesDir, site), PHREEQC_PATH, DATABASE_FILE, force_balance="Ca", ) phreeqc_site_file_ca = os.path.join( sitesDir, site, site + "-PHREEQC-Ca.pkl" ) try: pickle.dump(phreeqc_df_ca, open(phreeqc_site_file_ca, "wb")) phreeqc_df_ca.to_csv(phreeqc_site_file_ca[:-3] + "csv") except IOError: print("Problem writing out PHREEQC Ca data file.") # Force balance on Alkalinity phreeqc_df_alk = processMidf( site_midf, os.path.join(sitesDir, site), PHREEQC_PATH, DATABASE_FILE, force_balance="Alk", ) phreeqc_site_file_alk = os.path.join( sitesDir, site, site + "-PHREEQC-Alk.pkl" ) try: pickle.dump(phreeqc_df_alk, open(phreeqc_site_file_alk, "wb")) phreeqc_df_alk.to_csv(phreeqc_site_file_alk[:-3] + "csv") except IOError: print("Problem writing out PHREEQC Alk data file.") def runPHREEQC(startfilename, process_regular=True): """ Process all samples within a site directory using information from the start file. Parameters ---------- startfilename : str Name of start file to find settings for running PHREEQC. """ print("Processing site water chemisty data in PHREEQC...") startfile = xlrd.open_workbook(startfilename) sheet = startfile.sheet_by_index(0) # parse start file to determine what should be done characteristicsBlockStarted = False settingsDict = {} for rownum in range(sheet.nrows): line = sheet.row_values(rownum) if not (characteristicsBlockStarted): if not (line[0][0] == "#"): # ignore comments if not (line[0] == "Characteristic"): settingsDict[line[0]] = line[1] else: # grab the characteristic block column headings characteristicsBlockStarted = True sitesDir = os.path.join( settingsDict["Path to output directory"], settingsDict["Name of output directory"], ) DATABASE_FILE = os.path.join( settingsDict["Path to chemical database"], settingsDict["Name of chemical database"], ) LOG_FILE = os.path.join( settingsDict["Path to output directory"], settingsDict["Name of output directory"], settingsDict["Log file name"], ) PHREEQC_PATH = settingsDict["Path to PHREEQC"] bracket_charge_balance = settingsDict["Force balance on Ca and Alk"] == "Yes" processSites( sitesDir, PHREEQC_PATH, DATABASE_FILE, phreeqcDict=None, regEx="USGS-*", bracket_charge_balance=bracket_charge_balance, process_regular=process_regular, ) if __name__ == "__main__": # get site directory and charge bracketing from command line argument startfilename = sys.argv[1] process_regular = True if len(sys.argv) > 2: if sys.argv[2] == "False": process_regular = False runPHREEQC(startfilename, process_regular=process_regular)