diff --git a/.gitignore b/.gitignore index 2907ce6..67cdf2c 100644 --- a/.gitignore +++ b/.gitignore @@ -9,17 +9,28 @@ ## ipython notebook checkpoint folders **/.ipynb_checkpoints/ +# builds for uploading to PyPI +dist/ +./build/ +prms_python.egg-info/ + venv -*animation* -test/data/models/lbcd/prms.out -test/data/models/lbcd/prms_ic.out -test/data/models/lbcd/statvar.dat notebooks/jupyternb-scenario-series-example/ -docs/build **/utility_functions.ipynb **/scripts/*.data -scripts/ dry_creek **/dry_creek/ prms_python.egg-info + +## test data +test/data/tmp_scenarios/* +test/data/models/lbcd/test-sim-dir/* +*tmp_sim* + +test/data/models/lbcd/prms.out +test/data/models/lbcd/prms_ic.out +test/data/models/lbcd/statvar.dat +test/results + +*animation* diff --git a/CHANGELOG.rst b/CHANGELOG.rst new file mode 100644 index 0000000..7f3ede4 --- /dev/null +++ b/CHANGELOG.rst @@ -0,0 +1,51 @@ +Change log +********** + +Version 1.0.1 +============= + +Substantial improvements and updates to online documentation. Renamed two util +module functions to align with PEP style conventions specifically: +``Kolmogorov_Smirnov`` became ``kolmogorov_smirnov`` and ``calc_emp_CDF`` +became ``calc_emp_cdf``, also renamed ``util.load_data_file`` to +``util.load_data`` for consistency with ``util.load_statvar``. Name changes +are backwards compatible. Add interactive code snippets in online documentation, +add conda environment file. + +Bug Fixes +--------- + +* Fix `prmspy` script errors after prms-python installation with Python 2 + +Version 1.0.0 +============= + +Stable version and first numbered release on PyPI. All functionality tested +and passed on multiple platforms using several PRMS models. + +Major submodules, classes and functions +--------------------------------------- + +* data + - Data +* parameters + - Parameters, modify_params +* simulation + - Simulation, SimulationSeries +* scenario + - Scenario, ScenarioSeries +* optimizer + - Optimizer, OptimizationResult, resample_param +* util + - load_statvar, nash_sutcliffe, and others + +This version also includes the command line interface script "`prmspy`". +Ongoing work on Docs website, includes mostly up-to-date Jupyter notebook +examples. + +Version 0.1.0 +============= + +First numbered version, was not setup for installation using PyPI. Many changes +occured for initial development under this version which were not released. + diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..0bedf48 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,34 @@ +<<<<<<< HEAD:license +Copyright (c) 2016-2019 John Volk and Matthew Turner +======= +Copyright (c) 2016-2018 John Volk and Matt Turner +>>>>>>> 88f12cc75c3e6682f81d3724fb5681e747481b1e:LICENSE.md +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted (subject to the limitations in the disclaimer +below) provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from this + software without specific prior written permission. + +NO EXPRESS OR IMPLIED LICENSES TO ANY PARTY'S PATENT RIGHTS ARE GRANTED BY +THIS LICENSE. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER +IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. diff --git a/MANIFEST.in b/MANIFEST.in new file mode 100644 index 0000000..609cc1d --- /dev/null +++ b/MANIFEST.in @@ -0,0 +1,3 @@ +include LICENSE.md +include environment.yml +graft notebooks diff --git a/README.md b/README.md index ee037ad..583810b 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,34 @@ # PRMS-Python -PRMS-Python provides a Python interface to PRMS data files and for running -PRMS simulations. This module tries to improve the management of PRMS simulation -data while also providing useful "pythonic" tools to do scenario-based PRMS -simulations. By "scenario-based" modeling we mean, for example, parameter -sensitivity analysis, where each "scenario" is an iterative perturbation of -one or many parameters. Another example "scenario-based" modeling exercise would -be climate scenario modeling: what will happen to modeled outputs if the -input meteorological data were to change? +[Online documentation](https://prms-python.github.io/PRMS-Python/build/html/index.html) + +PRMS-Python provides a Python interface to PRMS data files and manages +PRMS simulations. This module aims to improve the efficiency of PRMS +workflows by giving access to PRMS data structures while providing +"pythonic" tools to do scenario-based PRMS simulations. By +"scenario-based" we mean testing model hypotheses associated with model +inputs, outputs, and model structure. For example, parameter sensitivity +analysis, where each "scenario" is an iterative perturbation of one or +many parameters. Another example "scenario-based" modeling exercise +would be climate scenario modeling: what will happen to modeled outputs +if the input meteorological data were to change? + ## Installation -Currently it's clone-then-pip: +PRMS-Python versions are available on the Python Package Index [PyPI](https://pypi.org/project/prms-python/) +and can be installed and upgraded using pip: ``` -git clone https://github.com/northwest-knowledge-network/prms-python +pip install prms-python +``` + +Alternatively clone-then-pip: + +``` +git clone https://github.com/PRMS-Python/PRMS-Python.git +cd PRMS-Python ``` then @@ -24,10 +37,77 @@ then pip install --editable . ``` +Another option is to download the source code as a zip file, unzip it +and within the PRMS-Python root directory run: + +``` +python setup.py install +``` + +## Usage and documentation + +We reccomend starting with the ["getting started"](https://github.com/PRMS-Python/PRMS-Python/blob/master/notebooks/getting_started.ipynb) +Jupyter notebook for file structure rules that PRMS-Python uses and then +moving on to other example notebooks in the [`notebooks` directory](https://github.com/PRMS-Python/PRMS-Python/tree/master/notebooks). Online documentation is available [here](https://prms-python.github.io/PRMS-Python/build/html/index.html). + +## Building documentation + +This project uses the [Sphinx documentation engine for Python](http://www.sphinx-doc.org/en/master/) +The documentation source is located in `docs/source`. Eventually we can +wrap the following steps into a script. But for now, to build the +documentation, go to the `docs/` directory and run + +``` +make html +``` + +If it fails because of missing dependencies, just install the dependencies +it says it's missing. Publishing the docs is now done automatically with any +commits are pushed to the master branch. + + + ## Unit tests @@ -37,3 +117,13 @@ I run them using nose but that's not required. From the root repo directory ``` nosetests -v ``` + +## Contribute + +We welcome anyone seriously interested in contributing to PRMS-Python to do so in anyway they see fit. If you are not sure where to begin you can look for current issues or submit a new issue [here](https://github.com/PRMS-Python/PRMS-Python/issues). You may also [fork](https://help.github.com/articles/fork-a-repo/) PRMS-Python and submit a pull request if you would like to offer your direct changes to the package. + +## Citing PRMS-Python + +If you use PRMS-Python for published work we ask that you please cite the [PRMS-Python manuscript](https://www.sciencedirect.com/science/article/pii/S1364815218308004) as follows: + +Volk, J. M., & Turner, M. A. (2019). PRMS-Python: A Python framework for programmatic PRMS modeling and access to its data structures. Environmental Modelling & Software, 114, 152–165. https://doi.org/10.1016/J.ENVSOFT.2019.01.006 diff --git a/docs/.nojekyll b/docs/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/docs/Makefile b/docs/Makefile index 0fe7dd9..684eb95 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -9,7 +9,7 @@ BUILDDIR = build # User-friendly check for sphinx-build ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) - $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) + $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don\'t have Sphinx installed, grab it from http://sphinx-doc.org/) endif # Internal variables. diff --git a/docs/build/doctrees/api.doctree b/docs/build/doctrees/api.doctree new file mode 100644 index 0000000..8a2797b Binary files /dev/null and b/docs/build/doctrees/api.doctree differ diff --git a/docs/build/doctrees/changelog.doctree b/docs/build/doctrees/changelog.doctree new file mode 100644 index 0000000..f55737a Binary files /dev/null and b/docs/build/doctrees/changelog.doctree differ diff --git a/docs/build/doctrees/citation.doctree b/docs/build/doctrees/citation.doctree new file mode 100644 index 0000000..738f147 Binary files /dev/null and b/docs/build/doctrees/citation.doctree differ diff --git a/docs/build/doctrees/cli.doctree b/docs/build/doctrees/cli.doctree new file mode 100644 index 0000000..52d167f Binary files /dev/null and b/docs/build/doctrees/cli.doctree differ diff --git a/docs/build/doctrees/environment.pickle b/docs/build/doctrees/environment.pickle new file mode 100644 index 0000000..2f025a3 Binary files /dev/null and b/docs/build/doctrees/environment.pickle differ diff --git a/docs/build/doctrees/index.doctree b/docs/build/doctrees/index.doctree new file mode 100644 index 0000000..a681ab9 Binary files /dev/null and b/docs/build/doctrees/index.doctree differ diff --git a/docs/build/doctrees/tutorial.doctree b/docs/build/doctrees/tutorial.doctree new file mode 100644 index 0000000..2202e8c Binary files /dev/null and b/docs/build/doctrees/tutorial.doctree differ diff --git a/docs/build/html/.buildinfo b/docs/build/html/.buildinfo new file mode 100644 index 0000000..373a66a --- /dev/null +++ b/docs/build/html/.buildinfo @@ -0,0 +1,4 @@ +# Sphinx build info version 1 +# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. +config: cc68617934c1a6e4157b7bd31615fc7b +tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/docs/build/html/.nojekyll b/docs/build/html/.nojekyll new file mode 100644 index 0000000..e69de29 diff --git a/docs/build/html/_images/nash-sutcliffe-ex.png b/docs/build/html/_images/nash-sutcliffe-ex.png new file mode 100644 index 0000000..9b70bd2 Binary files /dev/null and b/docs/build/html/_images/nash-sutcliffe-ex.png differ diff --git a/docs/build/html/_images/nash-sutcliffe.png b/docs/build/html/_images/nash-sutcliffe.png new file mode 100644 index 0000000..11d06b6 Binary files /dev/null and b/docs/build/html/_images/nash-sutcliffe.png differ diff --git a/docs/build/html/_images/obs-mod-flow.png b/docs/build/html/_images/obs-mod-flow.png new file mode 100644 index 0000000..d7d4123 Binary files /dev/null and b/docs/build/html/_images/obs-mod-flow.png differ diff --git a/docs/build/html/_modules/index.html b/docs/build/html/_modules/index.html new file mode 100644 index 0000000..1ca77de --- /dev/null +++ b/docs/build/html/_modules/index.html @@ -0,0 +1,131 @@ + + + + +
+ +
+# -*- coding: utf-8 -*-
+'''
+data.py -- holds ``Data`` class for standard PRMS climate input data.
+'''
+
+import pandas as pd
+from shutil import copyfile
+
+[docs]class Data(object):
+ """
+ Object to access or create a PRMS data file with ability to load/assign it to a
+ date-time indexed pandas.DataFrame for data management, analysis and visualization.
+ It can be used to build a new PRMS data file from user defined metadata and a
+ ``pandas.DataFrame`` of PRMS datetime-indexed climatic forcing and observation
+ variables.
+
+ The class properties ``metadata`` and ``data_frame`` can be later assigned if no
+ ``base_file`` is given on initialization, allowing for the creation of PRMS climatic
+ forcing file in a Python environment.
+
+ Keyword Arguments:
+ base_file (str, optional): path to standard PRMS data file
+ na_rep (int, optional): how to represent missing values default = -999
+
+ Attributes:
+ date_header (list): date and time header for PRMS data file
+ valid_input_variables (tuple): valid hydro-climate variables for PRMS data file
+
+ Note:
+ If using the ``Data`` class to create a new data file, it is up to the user
+ to ensure that the metadata and :class:`pandas.DataFrame` assigned are correct
+ and compatible.
+
+ """
+
+ ## data file constant attributes
+ date_header = ['year',
+ 'month',
+ 'day',
+ 'hh',
+ 'mm',
+ 'sec']
+
+ valid_input_variables = ('gate_ht',
+ 'humidity',
+ 'lake_elev',
+ 'pan_evap',
+ 'precip',
+ 'rain_day',
+ 'runoff',
+ 'snowdepth',
+ 'solrad',
+ 'tmax',
+ 'tmin',
+ 'wind_speed')
+
+ def __init__(self, base_file=None, na_rep=-999):
+ self.base_file = base_file
+ self.na_rep = na_rep
+ self._metadata = None
+ self._data_frame = None
+
+ @property
+ def metadata(self):
+ """
+ :obj:`dict`:A property that gets and sets the header information from
+ a standard PRMS climate input data file held in a Python dictionary. As
+ a property it can be assigned directly to overwrite or create a new PRMS
+ data file. As such the user is in control and must supply the correct
+ syntax for PRMS standard data files, e.g. text lines before header should
+ begin with "//". Here is an example of the information gathered and held in
+ this attribute:
+
+ Example:
+ >>> data.metadata
+ {
+ 'data_startline' : 6,
+ 'data_variables' : ['runoff 1', 'runoff 2', 'tmin', 'tmax', 'ppt']
+ 'text_before_header' : "Title of data file \\n //some comments\\nrunoff 2
+ \\ntmin 1\\ntmax 1\\nppt 1\\nrunoff 2\\ntmin 1
+ \\ntmax 1\\nppt 1\\n
+ ########################################\\n"
+ }
+
+ Note:
+ When assigning or creating a new data file, the ``Data.write`` method will
+ assign the appropriate date header that follows the line of number signs "#".
+
+ Raises:
+ ValueError: if data in metadata is accessed before data is assigned,
+ e.g. if accessed to write a PRMS data file from a ``Data`` instance
+ that was initialized without a valid PRMS data file.
+ TypeError: if an object that is not a Python dictionary is assigned.
+
+ """
+ # to avoid overwriting pre-assigned data, check if already exists
+ if isinstance(self._metadata, dict):
+ return self._metadata
+ elif not self.base_file:
+ raise ValueError('No data file was given on initialization')
+
+ ## starting list for variable names in data file
+ input_data_names = []
+ text_before_header = str()
+ ## open data file and read header information
+ with open(self.base_file, 'r') as inf:
+ for idx,l in enumerate(inf):
+ text_before_header+=l
+ if idx == 0: ## first line always string identifier of the file- may use later
+ data_head = l.rstrip()
+ elif l.startswith('/'): ## comment lines
+ continue
+ if l.startswith(Data.valid_input_variables):
+ h = l.split() ## split, first name and second number of columns
+ if int(h[1]) > 1: ## more than one input time series of a particular variable
+ for el in range(int(h[1])):
+ tmp = '{var_name} {var_ind}'.format(var_name = h[0], var_ind = el+1)
+ input_data_names.append(tmp)
+ elif int(h[1]) == 1:
+ input_data_names.append(h[0])
+ if l.startswith('#'): ## end of header info and begin time series input data
+ data_startline = idx+1 ## 0 indexed line of first data entry
+ break
+
+ self._metadata = dict([('data_startline',data_startline),
+ ('data_variables',input_data_names),
+ ('text_before_header',text_before_header)])
+ return self._metadata
+
+ @metadata.setter
+ def metadata(self, dic):
+ if not isinstance(dic, dict):
+ raise TypeError('Must assign a Python dictionary for new Data object/file metadata')
+ self._metadata = dic
+
+ @property
+ def data_frame(self):
+ """
+ A property that gets and sets the climatic forcing data for a standard PRMS
+ climate input data file as a :class:`pandas.DataFrame`.
+
+ Example:
+ d is a Data instance, calling
+
+ >>> d.data_frame
+ input variables runoff 1 runoff 2 runoff 3 precip tmax tmin
+ date
+ 1996-12-27 0.54 1.6 NaN 0.0 46 32.0
+ 1996-12-28 0.65 1.6 NaN 0.0 45 24.0
+ 1996-12-29 0.80 1.6 NaN 0.0 44 28.0
+ 1996-12-30 0.90 1.6 NaN 0.0 51 33.0
+ 1996-12-31 1.00 1.7 NaN 0.0 47 32.0
+
+ shows the date-indexed ``pd.DataFrame`` of the input data that is created
+ when a ``Data`` object is initiated if given a valid ``base_file``, i.e.
+ file path to a PRMS climate data file.
+
+ Raises:
+ ValueError: if attribute is accessed before either assigning a PRMS data
+ file on ``Data`` initialization or not assigning a compatabile
+ date-indexed ``pandas.DataFrame`` of hydro-climate variables.
+ TypeError: if a data type other than ``pandas.DataFrame`` is assigned.
+ """
+ if not self._metadata:
+ self.metadata
+ elif not isinstance(self._data_frame, pd.DataFrame) and self.base_file == None:
+ raise ValueError('No data base_file given on initialization, '\
+ 'therefore you must assign a DataFrame'\
+ +' before accessing the .data_frame attribute!')
+ # to avoid overwriting pre-assigned data
+ elif isinstance(self._data_frame, pd.DataFrame):
+ return self._data_frame
+
+ df = pd.read_csv(self.base_file, header = -1, skiprows = self.metadata['data_startline'],
+ delim_whitespace = True, na_values = [self.na_rep]) ## read data file
+ df.columns = Data.date_header + self.metadata['data_variables']
+ date = pd.Series(pd.to_datetime(df.year * 10000 + df.month * 100 +\
+ df.day, format = '%Y%m%d'), index = df.index)
+ df.index = pd.to_datetime(date) ## assign datetime index
+ df.drop(Data.date_header, axis = 1, inplace = True) ## unneeded columns
+ df.columns.name = 'input variables' ; df.index.name = 'date'
+ self._data_frame = df
+ return self._data_frame
+
+ @data_frame.setter
+ def data_frame(self, df):
+ if not isinstance(df, pd.DataFrame):
+ raise TypeError("Must assign a Pandas.DataFrame object for PRMS data input")
+ self._data_frame = df
+
+[docs] def modify(self, func, vars_to_adjust):
+ """
+ Apply a user defined function to one or more variable(s) in the data file.
+
+ The ``modify`` method allows for inplace modification of one or more
+ time series inputs in the data file based on a user defined function.
+
+ Arguments:
+ func (function): function to apply to each variable in vars_to_adjust
+ vars_to_adjust (list or tuple): collection of variable names to apply func to.
+
+ Returns:
+ None
+
+ Example:
+ Here is an example of loading a data file, modifying the temperature inputs
+ (*tmin* and *tmax*) by adding two degrees to each element, and rewritting the
+ modified data to disk,
+
+ >>> d = Data('path_to_data_file')
+ >>> def f(x):
+ return x + 2
+ >>> d.modify(f,['tmax','tmin'])
+ >>> d.write('data_temp_plus_2')
+ """
+
+ if not isinstance(self._data_frame, pd.DataFrame):
+ self.data_frame # will raise ValueError from data_frame property
+ for v in vars_to_adjust:
+ self._data_frame[v] = self._data_frame[v].apply(func)
+
+[docs] def write(self, out_path):
+ """
+ Writes the current state of the ``Data`` to PRMS text format
+ utilizing the ``Data.metadata`` and ``Data.data_frame`` instance
+ properties. If ``Data.data_frame`` was never accessed or assigned
+ new values then this method simply copies the original PRMS
+ data file to ``out_path``.
+
+ Arguments:
+ out_path (str): full path to save or copy the current PRMS data
+ in PRMS text format.
+
+ Returns:
+ None
+
+ Raises:
+ ValueError: if the ``write`` method is called without assigning either
+ an initial data (``base_file``) path or assigning correct ``metadata``
+ and ``data_frame`` properties.
+
+ """
+ # if file data was never accessed- unchanged
+ if not isinstance(self._data_frame, pd.DataFrame):
+ if self.base_file:
+ copyfile(self.base_file, out_path)
+ else: # if data not from original file and dataframe never assigned
+ raise ValueError('No data base_file was given and'\
+ +' no data was assigned!')
+
+ ## reconstruct PRMS data file format, don't overwrite date-indexed
+ else:
+ df = self._data_frame[self.metadata['data_variables']]
+ df['year'] = self._data_frame.index.year
+ df['month'] = self._data_frame.index.month
+ df['day'] = self._data_frame.index.day
+ df['hh'] = df['mm'] = df['sec'] = 0
+ df = df[Data.date_header + self._metadata['data_variables']]
+ with open(out_path,'w') as outf: # write comment header then data
+ outf.write(self._metadata['text_before_header'])
+ df.to_csv(outf, sep=' ', header=None,\
+ index=False, na_rep=self.na_rep)
+
+
+# -*- coding: utf-8 -*-
+'''
+optimizer.py -- holds ``Optimizer`` and ``OptimizationResult`` classes for
+optimization routines and management conducted on PRMS parameters.
+'''
+from __future__ import print_function
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import os, sys, json, re, shutil
+import multiprocessing as mp
+from copy import copy
+from copy import deepcopy
+from numpy import log10
+from datetime import datetime
+from .data import Data
+from .parameters import Parameters
+from .simulation import Simulation, SimulationSeries
+from .util import load_statvar, nash_sutcliffe, percent_bias, rmse
+
+OPJ = os.path.join
+
+
+[docs]class Optimizer:
+ '''
+ Container for PRMS parameter optimization and related routines.
+
+ Currently the ``monte_carlo`` method provides random parameter
+ resampling routines using uniform and normal random variables.
+
+ Example:
+ >>> from prms_python import Data, Optimizer, Parameters
+ >>> params = Parameters('path/to/parameters')
+ >>> data = Data('path/to/data')
+ >>> control = 'path/to/control'
+ >>> work_directory = 'path/to/create/simulations'
+ >>> optr = Optimizer(
+ params,
+ data,
+ control,
+ work_directory,
+ title='the title',
+ description='desc')
+ >>> measured = 'path/to/measured/csv'
+ >>> statvar_name = 'basin_cfs' # or any other valid statvar
+ >>> params_to_resample = ['dday_intcp', 'dday_slope'] # list of params
+ >>> optr.monte_carlo(measured, params_to_resample, statvar_name)
+
+ '''
+
+ #dic for min/max of parameter allowable ranges, add more when needed
+ param_ranges = {'dday_intcp': (-60.0, 10.0),
+ 'dday_slope': (0.2, 0.9),
+ 'jh_coef': (0.005, 0.06),
+ 'pt_alpha': (1.0, 2.0),
+ 'potet_coef_hru_mo': (1.0, 2.0),
+ 'tmax_index': (-10.0, 110.0),
+ 'tmin_lapse': (-10.0, 10.0),
+ 'soil_moist_max': (0.001, 10.0),
+ 'rain_adj': (0.5, 2.0),
+ 'ppt_rad_adj': (0.0, 0.5),
+ 'radadj_intcp': (0.0, 1.0),
+ 'radadj_slope': (0.0, 1.0),
+ 'radj_sppt': (0.0, 1.0),
+ 'radj_wppt': (0.0, 1.0),
+ 'radmax': (0.1, 1.0)
+ }
+
+ def __init__(self, parameters, data, control_file, working_dir,
+ title, description=None):
+
+ if isinstance(parameters, Parameters):
+ self.parameters = parameters
+ else:
+ raise TypeError('parameters must be instance of Parameters')
+
+ if isinstance(data, Data):
+ self.data = data
+
+ else:
+ raise TypeError('data must be instance of Data')
+
+ input_dir = '{}'.format(os.sep).join(control_file.split(os.sep)[:-1])
+ if not os.path.isfile(OPJ(input_dir, 'statvar.dat')):
+ print('You have no statvar.dat file in your model directory')
+ print('Running PRMS on original data in {} for later comparison'\
+ .format(input_dir))
+ sim = Simulation(input_dir)
+ sim.run()
+
+ if not os.path.isdir(working_dir):
+ os.mkdir(working_dir)
+
+ self.input_dir = input_dir
+ self.control_file = control_file
+ self.working_dir = working_dir
+ self.title = title
+ self.description = description
+ self.measured_arb = None # for arbitrary output methods
+ self.statvar_name = None
+ self.arb_outputs = []
+
+[docs] def monte_carlo(self, reference_path, param_names, statvar_name, \
+ stage, n_sims=10, method='uniform', mu_factor=1,\
+ noise_factor=0.1, nproc=None):
+ '''
+ The ``monte_carlo`` method of ``Optimizer`` performs parameter
+ random resampling techniques to a set of PRMS parameters and
+ executes and manages the corresponding simulations.
+
+ Arguments:
+ reference_path (str): path to measured data for optimization
+ param_names (list): list of parameter names to resample
+ statvar_name (str): name of statisical variable output name for
+ optimization
+ stage (str): custom name of optimization stage e.g. 'ddsolrad'
+
+ Keyword Arguments:
+ n_sims (int): number of simulations to conduct
+ parameter optimization/uncertaitnty analysis.
+ method (str): resampling method for parameters (normal or uniform)
+ mu_factor (float): coefficient to scale mean of the parameter(s)
+ to resample from when using the normal distribution to resample
+ i.e. a value of 1.5 will sample from a normal rv with mean
+ 50% higher than the original parameter mean
+ noise_factor (float): scales the variance of noise to add to
+ parameter values when using normal rv (method='normal')
+ nproc (int): number of processors available to run PRMS simulations
+
+ Returns:
+ None
+ '''
+ if '_' in stage:
+ raise ValueError('stage name cannot contain an underscore')
+ # assign the optimization object a copy of measured data for plots
+ self.measured_arb = pd.Series.from_csv(reference_path,\
+ parse_dates=True)
+ # statistical variable output name
+ self.statvar_name = statvar_name
+
+ start_time = datetime.now().isoformat()
+
+ # resample params for all simulations- potential place to serialize
+ params = []
+ for name in param_names: # create list of lists of resampled params
+ tmp = []
+ for idx in range(n_sims):
+ tmp.append(resample_param(self.parameters, name, how=method,\
+ mu_factor=mu_factor, noise_factor=noise_factor))
+ params.append(list(tmp))
+
+ # SimulationSeries comprised of each resampled param set
+ series = SimulationSeries(
+ Simulation.from_data(
+ self.data, _mod_params(self.parameters,\
+ [params[n][i] for n in range(len(params))],\
+ param_names),
+ self.control_file,
+ OPJ(
+ self.working_dir, # name of sim: first param and mean value
+ '{0}_{1:.10f}'.format(param_names[0], np.mean(params[0][i]))
+ )
+ )
+ for i in range(n_sims)
+ )
+
+ if not nproc:
+ nproc = mp.cpu_count() // 2
+
+ # run
+ outputs = list(series.run(nproc=nproc).outputs_iter())
+ self.arb_outputs.extend(outputs) # for current instance- add outputs
+
+ end_time = datetime.now().isoformat()
+
+ # json metadata for Monte Carlo run
+ meta = { 'params_adjusted' : param_names,
+ 'statvar_name' : self.statvar_name,
+ 'optimization_title' : self.title,
+ 'optimization_description' : self.description,
+ 'start_datetime' : start_time,
+ 'end_datetime' : end_time,
+ 'measured' : reference_path,
+ 'method' : 'Monte Carlo',
+ 'mu_factor' : mu_factor,
+ 'noise_factor' : noise_factor,
+ 'resample': method,
+ 'sim_dirs' : [],
+ 'stage': stage,
+ 'original_params' : self.parameters.base_file,
+ 'nproc': nproc,
+ 'n_sims' : n_sims
+ }
+
+ for output in outputs:
+ meta['sim_dirs'].append(output['simulation_dir'])
+
+ json_outfile = OPJ(self.working_dir, _create_metafile_name(\
+ self.working_dir, self.title, stage))
+
+ with open(json_outfile, 'w') as outf:
+ json.dump(meta, outf, sort_keys=True, indent=4,\
+ ensure_ascii=False)
+
+ print('{0}\nOutput information sent to {1}\n'.\
+ format('-' * 80, json_outfile))
+
+[docs] def plot_optimization(self, freq='daily', method='time_series',\
+ plot_vars='both', plot_1to1=True, return_fig=False,\
+ n_plots=4):
+ """
+ Basic plotting of current optimization results with limited options.
+ Plots measured, original simluated, and optimization simulated variabes
+ Not recommended for plotting results when n_sims is very large, instead
+ use options from an OptimizationResult object, or employ a user-defined
+ method using the result data.
+
+ Keyword Arguments:
+ freq (str): frequency of time series plots, value can be 'daily'
+ or 'monthly' for solar radiation
+ method (str): 'time_series' for time series sub plot of each
+ simulation alongside measured radiation. Another choice is
+ 'correlation' which plots each measured daily solar radiation
+ value versus the corresponding simulated variable as subplots
+ one for each simulation in the optimization. With coefficients
+ of determiniationi i.e. square of pearson correlation coef.
+ plot_vars (str): what to plot alongside simulated srad:
+ 'meas': plot simulated along with measured swrad
+ 'orig': plot simulated along with the original simulated swrad
+ 'both': plot simulated, with original simulation and measured
+ plot_1to1 (bool): if True plot one to one line on correlation
+ scatter plot, otherwise exclude.
+ return_fig (bool): flag whether to return matplotlib figure
+
+ Returns:
+ f (:obj:`matplotlib.figure.Figure`): If kwarg return_fig=True, then return
+ copy of the figure that is generated to the user.
+ """
+ if not self.arb_outputs:
+ raise ValueError('You have not run any optimizations')
+ var_name = self.statvar_name
+ X = self.measured_arb
+ idx = X.index.intersection(load_statvar(self.arb_outputs[0]\
+ ['statvar'])['{}'.format(var_name)].index)
+ X = X[idx]
+ orig = load_statvar(OPJ(self.input_dir, 'statvar.dat'))['{}'\
+ .format(var_name)][idx]
+ meas = self.measured_arb[idx]
+ sims = [load_statvar(out['statvar'])['{}'.format(var_name)][idx] for \
+ out in self.arb_outputs]
+ simdirs = [out['simulation_dir'].split(os.sep)[-1].\
+ replace('_', ' ') for out in self.arb_outputs]
+ var_name = '{}'.format(self.statvar_name)
+ n = len(self.arb_outputs) # number of simulations to plot
+ # user defined number of subplots from first n_plots results
+ if (n > n_plots): n = n_plots
+ # styles for each plot
+ ms = 4 # markersize for all points
+ orig_sty = dict(linestyle='none',markersize=ms,\
+ markerfacecolor='none', marker='s',\
+ markeredgecolor='royalblue', color='royalblue')
+ meas_sty = dict(linestyle='none',markersize=ms+1,\
+ markerfacecolor='none', marker='1',\
+ markeredgecolor='k', color='k')
+ sim_sty = dict(linestyle='none',markersize=ms,\
+ markerfacecolor='none', marker='o',\
+ markeredgecolor='r', color='r')
+ ## number of subplots and rows (two plots per row)
+ nrow = n//2 # round down if odd n
+ ncol = 2
+ odd_n = False
+ if n/2. - nrow == 0.5:
+ nrow+=1 # odd number need extra row
+ odd_n = True
+ ########
+ ## Start plots depnding on key word arguments
+ ########
+ if freq == 'daily' and method == 'time_series':
+ fig, ax = plt.subplots(n, sharex=True, sharey=True,\
+ figsize=(12,n*3.5))
+ axs = ax.ravel()
+ for i,sim in enumerate(sims[:n]):
+ if plot_vars in ('meas', 'both'):
+ axs[i].plot(meas, label='Measured', **meas_sty)
+ if plot_vars in ('orig', 'both'):
+ axs[i].plot(orig, label='Original sim.', **orig_sty)
+ axs[i].plot(sim, **sim_sty)
+ axs[i].set_ylabel('sim: {}'.format(simdirs[i]), fontsize=10)
+ if i == 0: axs[i].legend(markerscale=5, loc='best')
+ fig.subplots_adjust(hspace=0)
+ fig.autofmt_xdate()
+ #monthly means
+ elif freq == 'monthly' and method == 'time_series':
+ # compute monthly means
+ meas = meas.groupby(meas.index.month).mean()
+ orig = orig.groupby(orig.index.month).mean()
+ # change line styles for monthly plots to lines not points
+ for d in (orig_sty, meas_sty, sim_sty):
+ d['linestyle'] = '-'
+ d['marker'] = None
+
+ fig, ax = plt.subplots(nrows=nrow, ncols=ncol, figsize=(12,n*3.5))
+ axs = ax.ravel()
+ for i,sim in enumerate(sims[:n]):
+ if plot_vars in ('meas', 'both'):
+ axs[i].plot(meas, label='Measured', **meas_sty)
+ if plot_vars in ('orig', 'both'):
+ axs[i].plot(orig, label='Original sim.', **orig_sty)
+ sim = sim.groupby(sim.index.month).mean()
+ axs[i].plot(sim, **sim_sty)
+ axs[i].set_ylabel('sim: {}\nmean'.format(simdirs[i]),\
+ fontsize=10)
+ axs[i].set_xlim(0.5,12.5)
+ if i == 0: axs[i].legend(markerscale=5, loc='best')
+ if odd_n: # empty subplot if odd number of simulations
+ fig.delaxes(axs[n])
+ fig.text(0.5, 0.1, 'month')
+ #x-y scatter
+ elif method == 'correlation':
+ ## figure
+ fig, ax = plt.subplots(nrows=nrow, ncols=ncol, figsize=(12,n*3))
+ axs = ax.ravel()
+ ## subplot dimensions
+ meas_min = min(X)
+ meas_max = max(X)
+
+ for i, sim in enumerate(sims[:n]):
+ Y = sim
+ sim_max = max(Y)
+ sim_min = min(Y)
+ m = max(meas_max,sim_max)
+ if plot_1to1:
+ axs[i].plot([0, m], [0, m], 'k--', lw=2) ## one to one line
+ axs[i].set_xlim(meas_min,meas_max)
+ axs[i].set_ylim(sim_min, sim_max)
+ axs[i].plot(X, Y, **sim_sty)
+ axs[i].set_ylabel('sim: {}'.format(simdirs[i]))
+ axs[i].set_xlabel('Measured {}'.format(var_name))
+ axs[i].text(0.05, 0.95,r'$R^2 = {0:.2f}$'.format(\
+ X.corr(Y)**2), fontsize=16,\
+ ha='left', va='center', transform=axs[i].transAxes)
+ if odd_n: # empty subplot if odd number of simulations
+ fig.delaxes(axs[n])
+
+ if return_fig:
+ return fig
+
+def _create_metafile_name(out_dir, opt_title, stage):
+ """
+ Search through output directory where simulations are conducted
+ look for all metadata simulation json files and find out if the
+ current simulation is a replicate. Then use that information to
+ build the correct file name for the output json file. The series
+ are typically run in parallel that is why this step has to be
+ done after running multiple simulations from an optimization stage.
+
+ Arguments:
+ out_dir (str): path to directory with model results, i.e.
+ location where simulation series outputs and optimization
+ json files are located, aka Optimizer.working_dir
+ opt_title (str): optimization instance title for file search
+ stage (str): stage of optimization, e.g. 'swrad', 'pet'
+
+ Returns:
+ name (str): file name for the current optimization simulation series
+ metadata json file. E.g 'dry_creek_swrad_opt.json', or if
+ this is the second time you have run an optimization titled
+ 'dry_creek' the next json file will be returned as
+ 'dry_creek_swrad_opt1.json' and so on with integer increments
+ """
+ meta_re = re.compile(r'^{}_{}_opt(\d*)\.json'.format(opt_title, stage))
+ reps = []
+ for f in os.listdir(out_dir):
+ if meta_re.match(f):
+ nrep = meta_re.match(f).group(1)
+ if nrep == '':
+ reps.append(0)
+ else:
+ reps.append(nrep)
+
+ if not reps:
+ name = '{}_{}_opt.json'.format(opt_title, stage)
+ else:
+ # this is the nth optimization done under the same title
+ n = max(map(int, reps)) + 1
+ name = '{}_{}_opt{}.json'.format(opt_title, stage, n)
+ return name
+
+[docs]def resample_param(params, param_name, how='uniform', mu_factor=1,\
+ noise_factor=0.1):
+ """
+ Resample PRMS parameter by shifting all values by a constant that is
+ taken from a uniform distribution, where the range of the uniform
+ values is equal to the difference between the min and max of the allowable
+ range. The parameter min and max are set in ``Optimizer.param_ranges``.
+ If the resampling method (``how`` argument) is set to 'normal', randomly
+ sample a normal distribution with mean = mean(parameter) X ``mu_factor`` and
+ sigma = param allowable range multiplied by ``noise_factor``. If parameters have
+ array length <= 366 then individual parameter values are resampled otherwise
+ resample all param values at once, e.g. by taking a single random value
+ from the uniform distribution. If they are taking all at once using the
+ normal method then the original values are scaled by mu_factor and a normal
+ random variable with mean=0 and std dev = parameter range X ``noise_factor``.
+
+ Arguments:
+ params (:class:`prms_python.Parameters`): ``Parameters`` object
+ param_name (str): name of PRMS parameter to resample
+
+ Keyword Arguments:
+ how (str): distribution to resample parameters from in the case
+ that each parameter element can be resampled (len <=366)
+ Currently works for uniform and normal distributions.
+ noise_factor (float): factor to multiply parameter range by,
+ use the result as the standard deviation for the normal rand.
+ variable used to add element wise noise. i.e. higher
+ noise_factor will result in higher variance. Must be > 0.
+
+ Returns:
+ ret (:obj:`numpy.ndarry`): ndarray of param after resampling
+
+ Raises:
+ KeyError: if ``param_name`` not a valid parameter name
+ ValueError: if the parameter range has not been set in
+ ``Optimizer.param_ranges``
+ """
+ p_min, p_max = Optimizer.param_ranges.get(param_name,(-1,-1))
+
+ # create dictionary of parameter basic info (not values)
+ param_dic = {param['name']: param for param in params.base_params}
+ if not param_dic.get(param_name):
+ raise KeyError('{} is not a valid parameter'.format(param_name))
+
+ if p_min == p_max == -1:
+ raise ValueError("""{} has not been added to the dictionary of
+ parameters to resample, add it's allowable min and max value
+ to the Optimizer.param_ranges attribute in
+ Optimizer.py""".format(param_name))
+
+ dim_case = None
+ nhru = params.dimensions['nhru']
+ ndims = param_dic.get(param_name)['ndims']
+ dimnames = param_dic.get(param_name)['dimnames']
+ length = param_dic.get(param_name)['length']
+ param = deepcopy(params[param_name])
+
+ # could expand list and check parameter name also e.g. cascade_flg
+ # is a parameter that should not be changed
+ dims_to_not_change = set(['ncascade','ncascdgw','nreach',\
+ 'nsegment'])
+ if (len(set.intersection(dims_to_not_change, set(dimnames))) > 0):
+ raise ValueError("""{} should not be resampled as
+ it relates to the location of cascade flow
+ parameters.""".format(param_name))
+
+ # use param info to get dimension info- e.g. if multidimensional
+ if (ndims == 1 and length <= 366):
+ dim_case = 'resample_each_value' # for smaller param dimensions
+ elif (ndims == 1 and length > 366):
+ dim_case = 'resample_all_values_once' # covers nssr, ngw, etc.
+ elif (ndims == 2 and dimnames[1] == 'nmonths' and \
+ nhru == params.dimensions[dimnames[0]]):
+ dim_case = 'nhru_nmonths'
+ elif not dim_case:
+ raise ValueError('The {} parameter is not set for resampling'.\
+ format(param_name))
+# #testing purposes
+# print('name: ', param_name)
+# print('max_val: ', p_max)
+# print('min_val: ', p_min)
+# print('ndims: ', ndims)
+# print('dimnames: ', dimnames)
+# print('length: ', length)
+# print('resample_method: ', dim_case)
+
+ s = (p_max - p_min) * noise_factor # std_dev (s) default: param_range/10
+ #do resampling based on param dimensions and sampling distribution
+ if dim_case == 'resample_all_values_once':
+ if how == 'uniform':
+ ret = np.random.uniform(low=p_min, high=p_max, size=param.shape)
+ elif how == 'normal': # scale parameter mean if mu_factor given
+ param *= mu_factor
+ tmp = np.random.normal(0, s, size=param.shape)
+ ret = tmp + param
+
+ elif dim_case == 'resample_each_value':
+ ret = param
+ if how == 'uniform':
+ ret = np.random.uniform(low=p_min, high=p_max, size=param.shape)
+ elif how == 'normal': # the original value is considered the mean
+ if len(ret.shape) != 0:
+ for i, el in enumerate(param):
+ mu = el * mu_factor
+ ret[i] = np.random.normal(mu, s)
+ else: # single value parameter
+ mu = param * mu_factor
+ ret = np.random.normal(mu, s)
+
+ # nhru by nmonth dimensional params
+ elif dim_case == 'nhru_nmonths':
+ ret = param
+ if how == 'uniform':
+ for month in range(12):
+ ret[month] = np.random.uniform(low=p_min, high=p_max,\
+ size=param[0].shape)
+ elif how == 'normal':
+ for i, el in enumerate(param):
+ el *= mu_factor
+ tmp = np.random.normal(0, s, size=el.shape)
+ ret[i] = tmp + el
+
+ return ret
+
+def _mod_params(parameters, params, param_names):
+ # loop through list of params and assign their values to Parameter instance
+ ret = copy(parameters)
+ for idx, param in enumerate(params):
+ ret[param_names[idx]] = np.array(param)
+ return ret
+
+
+[docs]class OptimizationResult:
+ """
+ The ``OptimizationResult`` object serves to collect and manage output
+ from an ``Optimizer`` method. Upon initialization and a given optimization
+ stage that was used when running the Optimizer method, e.g. ``monte_carlo``,
+ the class gathers all JSON metadata that was produced for the given stage.
+ The ``OptimizationResult`` has three main user methods: first ``result_table``
+ which returns the top n simulations according to four model performance
+ metrics (Nash-Sutcliffe efficiency (NSE), root-mean squared-error (RMSE),
+ percent bias (PBIAS), and the coefficient of determination (COEF_DET) as
+ calculated against measured data. For example the table may look like:
+
+ >>> ddsolrad_res = OptimizationResult(work_directory, stage=stage)
+ >>> top10 = ddsolrad_res.result_table(freq='monthly',top_n=10)
+ >>> top10
+ ======================== ======== ======= ========= ========
+ ddsolrad parameters NSE RMSE PBIAS COEF_DET
+ ======================== ======== ======= ========= ========
+ orig_params 0.956267 39.4725 -0.885715 0.963116
+ tmax_index_54.2224631748 0.921626 47.6092 -0.849256 0.94402
+ tmax_index_44.8823940703 0.879965 58.9194 5.79603 0.922021
+ tmax_index_47.6835387480 0.764133 82.5918 -4.78896 0.837582
+ ======================== ======== ======= ========= ========
+
+ Second, the ``get_top_ranked_sims`` which returns a dictionary that map
+ key information about the top n ranked simulations, an example returned
+ dictionary may look like:
+
+ >>> {
+ 'dir_name' : ['pathToSim1', 'pathToSim2'],
+ 'param_path' : ['pathToSim1/input/parameters', 'pathToSim2/input/parameters'],
+ 'statvar_path' : ['pathToSim1/output/statvar.dat', 'pathToSim2/output/statvar.dat'],
+ 'params_adjusted' : [[param_names_sim1], [param_names_sim2]]
+ }
+
+ The third method of ``OptimizationResult`` is ``archive`` which essentially
+ opens all parameter and statvar files from each simulation of the given
+ stage and archives the parameters that were modified and their modified values
+ and the statistical variable (PRMS time series output) that is associated with
+ the optimization stage. Other ``Optimizer`` simulation metadata is also gathered
+ and new JSON metadata containing only this information is created and written
+ within a newly created "archived" subdirectory within the same directory that
+ the ``Optimizer`` routine managed simulations. The ``OptimizationResult.archive``
+ method then recursively deletes the simulation data for each of the given stage.
+ """
+
+ def __init__(self, working_dir, stage):
+ """
+ Create an ``OptimizationResult`` instance to manage output and analyse parameter-
+ output relationships as produced by the use of an ``Optimizer`` method of a user
+ defined optimization stage.
+ """
+ self.working_dir = working_dir
+ self.stage = stage
+ self.metadata_json_paths = self._get_optr_jsons(working_dir, stage)
+ self.total_sims = self._count_total_sims()
+ self.statvar_name = self._get_statvar_name(stage)
+ self.measured = self._get_measured(stage)
+ self.input_dir = self._get_input_dir(stage)
+ self.input_params = self._get_input_params(stage)
+
+ # if there are more than one input param for given stage
+ if len(self.input_params) > 1:
+ print(
+"""Warning: there were more than one initial parameter sets used for the
+ optimization for stage: {}. Make sure to compare the the correct input
+ params with their corresponding output sims.""".format(stage))
+ print('\nThis optimization stage used the\
+ following input parameter files:\n{}'.format('\n'.join(self.input_params)))
+
+ def _count_total_sims(self):
+ # total number of simulations of given stage in working directory
+ tracked_dirs = []
+ for f in self.metadata_json_paths[self.stage]:
+ with open(f) as fh:
+ json_data = json.load(fh)
+ tracked_dirs.extend(json_data.get('sim_dirs'))
+ return len(tracked_dirs)
+
+ def _get_optr_jsons(self, work_dir, stage):
+ """
+ Retrieve locations of optimization output jsons which contain
+ metadata needed to understand optimization results.
+ Create dictionary of each optimization with stage as key and lists
+ of corresponding json file paths as values.
+
+ Arguments:
+ work_dir (str): path to directory with model results, i.e.
+ location where simulation series outputs and optimization
+ json files are located, aka Optimizer.working_dir
+ stage (str): the stage ('ddsolrad', 'jhpet', 'flow', etc.) of
+ the optimization in which to gather the jsons
+
+ Returns:
+ ret (dict): dictionary of stage (keys) and lists of
+ json file paths for that stage (values).
+ """
+
+ ret = {}
+ optr_metafile_re = re.compile(r'^.*_{}_opt(\d*)\.json'.\
+ format(stage))
+ ret[stage] = [OPJ(work_dir, f) for f in\
+ os.listdir(work_dir) if\
+ optr_metafile_re.match(f)]
+ return ret
+
+ def _get_input_dir(self, stage):
+ # retrieves the input directory from the first json file of given stage
+ json_file = self.metadata_json_paths[stage][0]
+ with open(json_file) as jf:
+ meta_dic = json.load(jf)
+ return '{}'.format(os.sep).join(meta_dic['original_params'].\
+ split(os.sep)[:-1])
+
+ def _get_input_params(self, stage):
+ json_files = self.metadata_json_paths[stage]
+ param_paths = []
+ for json_file in json_files:
+ with open(json_file) as jf:
+ meta_dic = json.load(jf)
+ param_paths.append(meta_dic['original_params'])
+ return list(set(param_paths))
+
+ def _get_sim_dirs(self, stage):
+ jsons = self.metadata_json_paths[stage]
+ json_files = []
+ sim_dirs = []
+ for inf in jsons:
+ with open(inf) as json_file:
+ json_files.append(json.load(json_file))
+ for json_file in json_files:
+ sim_dirs.extend(json_file['sim_dirs'])
+ # list of all simulation directory paths for stage
+ return sim_dirs
+
+ def _get_measured(self, stage):
+ # only need to open one json file to get this information
+ if not self.metadata_json_paths.get(stage):
+ return # no optimization json files exist for given stage
+ first_json = self.metadata_json_paths[stage][0]
+ with open(first_json) as json_file:
+ json_data = json.load(json_file)
+ measured_series = pd.Series.from_csv(json_data.get('measured'),\
+ parse_dates=True)
+ return measured_series
+
+ def _get_statvar_name(self, stage):
+ # only need to open one json file to get this information
+ try:
+ first_json = self.metadata_json_paths[stage][0]
+ except:
+ raise ValueError("""No optimization has been run for
+ stage: {}""".format(stage))
+ with open(first_json) as json_file:
+ json_data = json.load(json_file)
+ var_name = json_data.get('statvar_name')
+
+ return var_name
+
+ def result_table(self, freq='daily', top_n=5, latex=False):
+ ##TODO: add stats for freq options annual (means or sum)
+
+ sim_dirs = self._get_sim_dirs(self.stage)
+ if top_n >= len(sim_dirs):
+ top_n = len(sim_dirs) + 1 # for returning inclusive last sim
+ sim_names = [path.split(os.sep)[-1] for path in sim_dirs]
+ meas_var = self._get_measured(self.stage)
+ statvar_name = self._get_statvar_name(self.stage)
+ orig_statvar = load_statvar(OPJ(self.input_dir,'statvar.dat'))\
+ [statvar_name]
+
+ result_df = pd.DataFrame(columns=\
+ ['NSE','RMSE','PBIAS','COEF_DET','ABS(PBIAS)'])
+ orig_results = pd.DataFrame(index=['orig_params'],\
+ columns=['NSE','RMSE','PBIAS','COEF_DET'])
+ # get datetime indices that overlap from measured and simulated
+ sim_out = load_statvar(OPJ(sim_dirs[0], 'outputs', 'statvar.dat'))\
+ [statvar_name]
+ idx = meas_var.index.intersection(sim_out.index)
+ meas_var = copy(meas_var[idx])
+ #sim_out = sim_out[idx]
+ orig_statvar = orig_statvar[idx]
+
+ if freq == 'monthly':
+ meas_mo = meas_var.groupby(meas_var.index.month).mean()
+ orig_mo = orig_statvar.groupby(orig_statvar.index.month).mean()
+
+ for i, sim in enumerate(sim_dirs):
+ try:
+ sim_out = load_statvar(OPJ(sim, 'outputs', 'statvar.dat'))\
+ ['{}'.format(statvar_name)]
+ except: # simulation might have been removed or missing
+ pass
+
+ sim_out = sim_out[idx]
+ if freq == 'daily':
+ result_df.loc[sim_names[i]] = [\
+ nash_sutcliffe(meas_var, sim_out),\
+ rmse(meas_var, sim_out),\
+ percent_bias(meas_var,sim_out),\
+ meas_var.corr(sim_out)**2,\
+ np.abs(percent_bias(meas_var, sim_out)) ]
+ orig_results.loc['orig_params'] = [\
+ nash_sutcliffe(orig_statvar,meas_var),\
+ rmse(orig_statvar,meas_var),\
+ percent_bias(orig_statvar,meas_var),\
+ orig_statvar.corr(meas_var)**2]
+
+ elif freq == 'monthly':
+ sim_out = sim_out.groupby(sim_out.index.month).mean()
+ result_df.loc[sim_names[i]] = [\
+ nash_sutcliffe(meas_mo, sim_out),\
+ rmse(meas_mo, sim_out),\
+ percent_bias(meas_mo, sim_out),\
+ meas_mo.corr(sim_out)**2,\
+ np.abs(percent_bias(meas_mo, sim_out)) ]
+ orig_results.loc['orig_params'] = [\
+ nash_sutcliffe(orig_mo,meas_mo),\
+ rmse(orig_mo,meas_mo),\
+ percent_bias(orig_mo,meas_mo),\
+ orig_mo.corr(meas_mo)**2]
+
+ sorted_result = result_df.sort_values(by=['NSE','RMSE','ABS(PBIAS)',\
+ 'COEF_DET'], ascending=[False,True,True,False])
+ sorted_result.columns.name = '{} parameters'.format(self.stage)
+ sorted_result = sorted_result[['NSE','RMSE','PBIAS','COEF_DET']]
+ sorted_result = pd.concat([orig_results,sorted_result])
+
+ if latex: return sorted_result[:top_n].to_latex(escape=False)
+ else: return sorted_result[:top_n]
+
+ def get_top_ranked_sims(self, sorted_df):
+ # use result table to make dic with best param and statvar paths
+ # index of table are simulation directory names
+ ret = {
+ 'dir_name' : [],
+ 'param_path' : [],
+ 'statvar_path' : [],
+ 'params_adjusted' : []
+ }
+
+ json_paths = self.metadata_json_paths[self.stage]
+
+ for el in sorted_df.drop('orig_params').index:
+ ret['dir_name'].append(el)
+ ret['param_path'].append(OPJ(self.working_dir,el,'inputs',\
+ 'parameters'))
+ ret['statvar_path'].append(OPJ(self.working_dir,el,'outputs',\
+ 'statvar.dat'))
+ for f in json_paths:
+ with open(f) as fh:
+ json_data = json.load(fh)
+ if OPJ(self.working_dir, el) in json_data.get('sim_dirs'):
+ ret['params_adjusted'].append(\
+ json_data.get('params_adjusted'))
+
+ return ret
+
+[docs] def archive(self, remove_sims=True, remove_meta=False, metric_freq='daily'):
+ """
+ Create archive directory to hold json files that contain
+ information of adjusted parameters, model output, and performance
+ metrics for each Optimizer simulation of the
+ OptimizationResult.stage in the OptimizationResult.working_dir.
+
+ Keyword Arguments:
+ remove_sims (bool): If True recursively delete all folders
+ and files associated with original simulations of the
+ OptimizationResult.stage in the
+ OptimizationResult.working_dir, if False do not delete
+ simulations.
+ remove_meta (bool): Whether to delete original Optimizer
+ JSON metadata files, default is False.
+ metric_freq (Str): Frequency of output metric computation
+ for recording of model performance. Can be 'daily'
+ (default) or 'monthly'. Note, other results can be computed
+ later with archived results.
+
+ Returns:
+ None
+ """
+ # create output archive directory
+ archive_dir = OPJ(self.working_dir,"{}_archived".format(self.stage))
+ if not os.path.isdir(archive_dir):
+ os.mkdir(archive_dir)
+
+ # create table and use to make mapping dic
+ table = self.result_table(freq=metric_freq,\
+ top_n=self.total_sims, latex=False)
+ map_dic = self.get_top_ranked_sims(table)
+
+ metadata_json_paths = self.metadata_json_paths[self.stage]
+ # get measured optimization variable path
+ first_json = metadata_json_paths[0]
+
+ with open(first_json) as json_file:
+ json_data = json.load(json_file)
+ measured_path = json_data.get('measured')
+
+ # record info for each simulation and archive to JSONs
+ # pandas series and numpy arrays are converted to Python lists
+ # for JSON serialization
+ for i, sim in enumerate(map_dic.get('dir_name')):
+ json_path = OPJ(archive_dir, '{sim}.json'.format(sim=sim))
+ try:
+ output_series = load_statvar(OPJ(self.working_dir, sim,\
+ 'outputs', 'statvar.dat'))\
+ [self.statvar_name]
+ except: # simulation directory was already removed
+ continue
+
+ # look for resampling method info for the particular simulation
+ for f in metadata_json_paths:
+ with open(f) as tmp_file:
+ tmp = json.load(tmp_file)
+ if OPJ(self.working_dir, sim) in tmp.get('sim_dirs'):
+ resample = tmp.get('resample')
+ noise_factor = tmp.get('noise_factor')
+ mu_factor = tmp.get('mu_factor')
+
+ json_data = {
+ 'param_names' : [],
+ 'param_values' : [],
+ 'original_param_path' : self.input_params,
+ 'measured_path' : measured_path,
+ 'output_name' : self.statvar_name,
+ 'output_date_index' : output_series.\
+ index.astype(str).tolist(),
+ 'output_values' : output_series.values.tolist(),
+ 'metric_freq' : metric_freq,
+ 'resample' : resample,
+ 'stage' : self.stage,
+ 'mu_factor' : mu_factor,
+ 'noise_factor' : noise_factor,
+ 'NSE' : table.loc[sim, 'NSE'],
+ 'RMSE' : table.loc[sim, 'RMSE'],
+ 'PBIAS' : table.loc[sim, 'PBIAS'],
+ 'COEF_DET' : table.loc[sim, 'COEF_DET']
+ }
+
+ for param in map_dic.get('params_adjusted')[i]:
+ json_data['param_names'].append(param)
+ json_data['param_values'].append(Parameters(\
+ map_dic.get('param_path')[i])[param]\
+ .tolist())
+ # save JSON file into archive directory
+ with open(json_path, 'w') as outf:
+ json.dump(json_data, outf, sort_keys = True, indent = 4,\
+ ensure_ascii = False)
+
+ # recursively delete all simulation directories after archiving
+ if remove_sims:
+ path = OPJ(self.working_dir, sim)
+ for dirpath, dirnames, filenames in os.walk(path,\
+ topdown=False):
+ shutil.rmtree(dirpath, ignore_errors=True)
+ else:
+ continue
+
+ # optional delete the original JSON metadata
+ if remove_meta:
+ for meta_file in self.metadata_json_paths[self.stage]:
+ try:
+ os.remove(meta_file)
+ except:
+ continue
+
+
+
+# -*- coding: utf-8 -*-
+'''
+parameters.py -- holds ``Parameter`` class with multiple functionality for
+the standard PRMS parameters input file.
+'''
+
+import datetime, calendar
+import io, os
+import itertools
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+
+from matplotlib.backends.backend_pdf import PdfPages
+from mpl_toolkits.axes_grid1 import make_axes_locatable
+from collections import OrderedDict
+
+OPJ = os.path.join
+
+[docs]class Parameters(object):
+ '''
+ Disk-based representation of a PRMS parameter file.
+
+ For the sake of memory efficiency, we only load parameters from
+ ``base_file`` that get modified through item assignment or accessed directly.
+ Internally, a reference is kept to only previously accessed parameter data,
+ so when ``write`` is called most copying is from ``base_file`` directly.
+ When parameters are accessed or modified using the dictionary-like syntax,
+ a ``np.ndarray`` representation of the parameter is returned. As a result
+ ``numpy`` mathematical rules including efficient vectorization of math applied
+ to arrays can be applied to modify parameters directly. The ``Parameter``
+ objects user methods allow for visualization of most PRMS parameters, function
+ based modification of parameters, and a write function that writes the data
+ back to PRMS text format.
+
+ Arguments:
+ base_file (str): path to PRMS parameters file
+
+ Attributes:
+ base_file (str): path to PRMS parameters file
+ base_file_reader (file): file handle of PRMS parameters file
+ dimensions (:obj:`collections.OrderedDict`): dictionary with
+ parameter dimensions as defined in parameters file loaded on
+ initialization
+ base_params (list of dicts): list of dictionaries of parameter
+ metadata loaded on initialization e.g. name, dimension(s), data
+ type, length of data array, and lines where data starts and ends
+ in file
+ param_arrays (dict): dictionary with parameteter names as keys and
+ ``numpy.array`` and ``numpy.ndarray`` representations of parameter
+ values as keys. Initially empty, uses getter and setter functions.
+
+ Example:
+ >>> p = Parameters('path/to/a/parameter/file')
+ >>> p['jh_coef'] = p['jh_coef']*1.1
+ >>> p.write('example_modified_params')
+
+ will read parameter information from the params file to check that
+ *jh_coef* is present in the parameter file, read the lines corresponding
+ to *jh_coef* data and assign the new value as requested. Calling
+ the ``write`` method next will copy all parameters except *jh_coef*
+ to the new parameter file and append the newly modified *jh_coef*
+ to the end of the new file from the modified values stored in the
+ parameter instance ``p``.
+ '''
+
+ def __init__(self, base_file):
+ self.base_file = base_file
+ self.base_file_reader = open(base_file)
+ self.dimensions, self.base_params = self.__read_base(base_file)
+ self.param_arrays = dict()
+
+[docs] def write(self, out_name):
+ """
+ Writes current state of ``Parameters`` to disk in PRMS text format
+
+ To reduce memory usage the ``write`` method copies parameters
+ from the initial ``base_file`` parameter file for all parameters
+ that were never modified.
+
+ Arguments:
+ out_name (str): path to write ``Parameters`` data to PRMS text
+ format.
+
+ Returns:
+ None
+ """
+ data_type_dic = {'1': 'int',
+ '2': 'float'} # retain PRMS data types
+
+ with open(self.base_file, 'r') as base_file:
+ with open(out_name, 'w') as out_file:
+ # write metadata
+ out_file.write('File Auto-generated by PRMS-Python\n')
+ out_file.write(datetime.datetime.now().isoformat() + '\n')
+
+ # # write dimensions
+ out_file.write('** Dimensions **\n')
+
+ # write parameters; pre-sorted by data start line on read
+ name_is_next = False
+ params_start = False
+ write_params_lines = False
+ for l in base_file:
+
+ if not params_start and l.strip() == '** Parameters **':
+ out_file.write('** Parameters **\n')
+ params_start = True
+
+ elif l.strip() == '####':
+ name_is_next = True
+
+ elif name_is_next:
+ name = l.strip().split()[0]
+ if name not in self.param_arrays:
+ out_file.write('####\n')
+ out_file.write(name + '\n')
+ name_is_next = False
+ write_params_lines = True
+ else:
+ write_params_lines = False
+ name_is_next = False
+
+ elif write_params_lines:
+ out_file.write(l.strip() + '\n')
+
+ # write all parameters that had been accessed and/or modified
+ for param, new_arr in self.param_arrays.items():
+
+ out_file.write('####\n')
+
+ param_info = [el for el in self.base_params
+ if el['name'] == param].pop()
+
+ out_file.write(str(param_info['name']) + '\n')
+ out_file.write(str(param_info['ndims']) + '\n')
+ for dimname in param_info['dimnames']:
+ out_file.write(dimname + '\n')
+ out_file.write(str(param_info['length']) + '\n')
+ out_file.write(str(param_info['vartype']) + '\n')
+ out_file.writelines([str(a) + '\n'
+ for a in new_arr.flatten().\
+ astype(data_type_dic[param_info\
+ ['vartype']])])
+
+[docs] def plot(self, nrows, which='all', out_dir=None, xlabel=None,\
+ ylabel=None, cbar_label=None, title=None, mpl_style=None):
+ """
+ Versatile method that plots most parameters in a standard PRMS parameter
+ file assuming the PRMS model was built on a uniform spatial grid.
+
+ Plots parameters as line plots for series or 2D spatial grid depending on
+ parameter dimension. The PRMS parameter file is assumed to hold parameters
+ for a model that was set up on a uniform rectangular grid with the spatial
+ index of HRUs starting in the upper left corner and moving left to
+ right across columns and down rows. Default function is to print four
+ files, each with plots of varying parameter dimensions as explained
+ under Kwargs ``which`` and more detailed explanation in the example
+ `Jupyter notebook <https://github.com/PRMS-Python/PRMS-Python/blob/master/notebooks/param_examples.ipynb>`_.
+
+ Arguments:
+ nrows (int): The number of rows in the PRMS model grid for plotting
+ spatial parameters. Will only work correctly for rectangular gridded models
+ with HRU indices starting in the upper left cell moving left to right
+ across columns and down across rows.
+
+ Keyword Arguments:
+ which (str): name of PRMS parameter to plot or 'all'. If 'all' then
+ the function will print 3 multipage pdfs, one for nhru
+ dimensional parameters, one for nhru by monthly parameters, one
+ for other parameters of length > 1, and one html file containing
+ single valued parameters.
+ out_dir (str): path to an output dir, default current directory
+ xlabel (str): x label for plot(s)
+ ylabel (str): y label for plot(s)
+ cbar_label (str): label for colorbar on spatial plot(s)
+ title (str): plot title
+ mpl_style (str, list): name or list of names of matplotlib style sheets to
+ use for plot(s).
+
+ Returns:
+ None
+
+ Examples:
+ If the plot method is called with the keyword argument ``which`` set
+ to a parameter that has length one, i.e. single valued it will simply
+ print out the value e.g.:
+
+ >>> p = Parameters('path/to/parameters')
+ >>> p.plot(nrows=10, which='radj_sppt')
+ radj_sppt is single valued with value: 0.4924942352224324
+
+ The default action is particularly useful which makes four multi-page
+ pdfs of most PRMS parameters where each file contains parameters
+ of different dimensions e.g.:
+
+ >>> p.plot(nrows=10, which='all', mpl_style='ggplot')
+
+ will produce the following four files named by parameters of certain
+ dimensions:
+
+ >>> import os
+ >>> os.listdir(os.getcwd()) # list files in current directory
+ nhru_param_maps.pdf
+ nhru_by_nmonths_param_maps.pdf
+ non_spatial_param_plots.pdf
+ single_valued_params.html
+
+ """
+ params = self
+
+ if not isinstance(params, Parameters):
+ raise TypeError('params must be instance of Parameters, not '\
+ + str(type(params)))
+ if not out_dir:
+ out_dir = os.getcwd()
+
+ if not os.path.isdir(out_dir):
+ os.mkdir(out_dir)
+
+ nhru = params.dimensions['nhru']
+ ncols = nhru // nrows
+
+ if not mpl_style:
+ mpl_style = 'classic'
+ plt.style.use(mpl_style)
+
+ # make pdfs and html of all parameters seperated in 4 files based on dimension
+ if which == 'all':
+ ## spatial parameters with dimension of length nhru
+ p_names = [param['name'] for param in params.base_params if\
+ param['length'] == nhru and len(param['dimnames'])==1]
+ with PdfPages(OPJ(out_dir,'nhru_param_maps.pdf')) as pdf:
+ for p in p_names:
+ try:
+ plt.figure()
+ ax = plt.gca()
+ im = ax.imshow(params['{}'.format(p)].reshape(nrows,ncols), origin='upper')
+ # origin upper- assumes indices of parameters starts in upper left
+ divider = make_axes_locatable(ax)
+ cax = divider.append_axes("right", size="5%", pad=0.05)
+ plt.colorbar(im, cax=cax)
+ ax.set_title('{}'.format(p))
+ ax.tick_params(left='off', bottom='off', labelleft='off',labelbottom='off')
+ pdf.savefig()
+ plt.close()
+ except:
+ print('{param} parameter failed to plot'.format(param=p))
+
+ ## monthly spatial parameters (on plot per month)
+ p_names = [param['name'] for param in params.base_params if\
+ param['dimnames'][0] == 'nhru' and len(param['dimnames'])==2 ]
+ with PdfPages(OPJ(out_dir,'nhru_by_nmonths_param_maps.pdf')) as pdf:
+ for p in p_names:
+ try:
+ for i in range(12): #month
+ plt.figure()
+ ax = plt.gca()
+ im = ax.imshow(params['{}'.format(p)][i].reshape(nrows, ncols), origin='upper')
+ divider = make_axes_locatable(ax)
+ cax = divider.append_axes("right", size="5%", pad=0.05)
+ plt.colorbar(im, cax=cax)
+ ax.set_title('{} {}'.format(p, calendar.month_name[i+1]))
+ ax.tick_params(left='off', bottom='off', labelleft='off', labelbottom='off')
+ pdf.savefig()
+ plt.close()
+ except:
+ print('{param} for {month} failed to plot'.\
+ format(param=p, month=calendar.month_name[i+1]))
+
+ ## non spatial parameters with dimension length > 1 to be plotted as time series
+ p_names = [param['name'] for param in params.base_params if\
+ ( 1 < param['length'] <= 366 )\
+ and param['dimnames'][0] != 'nhru' ]
+ with PdfPages(OPJ(out_dir,'non_spatial_param_plots.pdf')) as pdf:
+ for p in p_names:
+ try:
+ param_dict = [param for param in params.base_params if param['name'] == p][0]
+ plt.plot(np.arange(1, param_dict['length']+1, 1), params[p])
+ plt.xlabel(param_dict['dimnames'][0])
+ plt.ylabel(p)
+ plt.xlim(0.5, param_dict['length']+0.5)
+ pdf.savefig()
+ plt.close()
+ except:
+ print('{param} parameter failed to plot'.format(param=p))
+
+ ## html table of parameters with dimension length = 1
+ p_names = [param['name'] for param in params.base_params if param['length'] == 1]
+ df = pd.DataFrame()
+ df.index.name = 'parameter'
+ for p in p_names:
+ df.set_value(p, 'value', params[p])
+ df.to_html(OPJ(out_dir,'single_valued_params.html'))
+ ################################################################
+ # plot single parameter, in case of nhru by monthly param,
+ # save multi-page pdf
+ else:
+ param_name = which
+ try:
+ params[which]
+ except:
+ print('{param} is not a valid PRMS parameter'.format(param=param_name))
+ return
+
+ param_dict = [param for param in params.base_params if param['name'] == param_name][0]
+
+ # labels for single plots
+ if not cbar_label:
+ cbar_label = param_name
+ if not title:
+ title = ''
+
+ # if parameter is not spatial, one dimensional, with length greater than one, plot as line
+ if param_dict['ndims'] == 1 and ( 1 < param_dict['length'] <= 366 )\
+ and param_dict['dimnames'][0] != 'nhru':
+ if not xlabel:
+ xlabel = param_dict['dimnames'][0]
+ if not ylabel:
+ ylabel = param_name
+ plt.plot(np.arange(1, param_dict['length']+1,1), params[param_name])
+ plt.xlim(0.5, param_dict['length']+0.5)
+ plt.xlabel(xlabel)
+ plt.ylabel(ylabel)
+ plt.title(title)
+ # if spatial and one dimensional, plot
+ elif param_dict['ndims'] == 1 and param_dict['length'] == params.dimensions['nhru']:
+ if not xlabel:
+ xlabel = ''
+ if not ylabel:
+ ylabel = ''
+ plt.figure()
+ ax = plt.gca()
+ im = ax.imshow(params[param_name].reshape(nrows,ncols), origin='upper')
+ divider = make_axes_locatable(ax)
+ cax = divider.append_axes("right", size="5%", pad=0.05)
+ plt.colorbar(im, cax=cax,label=cbar_label)
+ ax.tick_params(left='off', bottom='off', labelleft='off', labelbottom='off')
+ ax.set_title(title)
+ ax.set_ylabel(ylabel)
+ ax.set_xlabel(xlabel)
+ # spatial monthly parameter
+ elif param_dict['dimnames'][0] == 'nhru' and param_dict['dimnames'][1] == 'nmonths'\
+ and param_dict['ndims'] == 2:
+ if not xlabel:
+ xlabel = ''
+ if not ylabel:
+ ylabel = ''
+ file_name = '{}.pdf'.format(param_name)
+ with PdfPages(OPJ(out_dir, file_name)) as pdf:
+ for i in range(12): #month
+ plt.figure()
+ ax = plt.gca()
+ im = ax.imshow(params['{}'.format(param_name)][i].reshape(nrows,ncols), origin='upper')
+ divider = make_axes_locatable(ax)
+ cax = divider.append_axes("right", size="5%", pad=0.05)
+ plt.colorbar(im, cax=cax)
+ ax.set_title('{} {}'.format(param_name, calendar.month_name[i+1]))
+ ax.set_ylabel(ylabel)
+ ax.set_xlabel(xlabel)
+ ax.tick_params(left='off', bottom='off', labelleft='off', labelbottom='off')
+ pdf.savefig()
+ plt.close()
+ else:
+ val = params[param_name]
+ print('{param} is single valued with value: {v}'.format(param=param_name, v=val))
+
+ def __read_base(self, base_file):
+ "Read base file returning 2-tuple of dimension and params dict"
+
+ params_startline, dimensions = self.__make_dimensions_dict(base_file)
+ base_params = self.__make_parameter_dict(base_file, params_startline)
+
+ return (dimensions, base_params)
+
+ def __make_dimensions_dict(self, base_file):
+ """
+ Extract dimensions and each dimension length. Runs before
+ __make_parameter_dict.
+ """
+ ret = OrderedDict()
+
+ dim_name = ''
+ dim_len = 0
+ # finished = False
+ found_dim_start = False
+ # while not finished:
+ for idx, l in enumerate(self.base_file_reader):
+
+ if l.strip() == '** Dimensions **': # start of dimensions
+ found_dim_start = True
+
+ elif '#' in l: # comments
+ pass
+
+ elif l.strip() == '** Parameters **': # start of parameters
+ dimlines = idx
+ # finished = True
+ break
+
+ elif found_dim_start:
+
+ if dim_name == '':
+ dim_name = l.strip()
+ else:
+ dim_len = int(l)
+ ret.update({dim_name: dim_len})
+ dim_name = ''
+
+ return (dimlines, ret)
+
+ def __make_parameter_dict(self, base_file, params_startline=0):
+ ret = []
+
+ name = ''
+ ndims = 0
+ dimnames = []
+ length = 0
+ vartype = ''
+
+ dimnames_read = 0
+ data_startline = 0
+
+ for idx, l in enumerate(self.base_file_reader):
+
+ if '#' in l:
+ # we have a comment; the next lines will be new
+ # parameter metadata. No data for the first time through, so
+ # we don't want to append an metadata blob with empty values
+ if name:
+ ret.append(
+ dict(
+ name=name,
+ ndims=ndims,
+ dimnames=dimnames,
+ length=length,
+ vartype=vartype,
+ data_startline=data_startline
+ )
+ )
+
+ name = ''
+ ndims = 0
+ dimnames = []
+ length = 0
+ vartype = ''
+ dimnames_read = 0
+
+ elif not name:
+ name = l.strip().split()[0] # in case old format with integer after name
+
+ elif not ndims:
+ ndims = int(l.strip())
+
+ elif not (dimnames_read == ndims):
+ dimnames.append(l.strip())
+ dimnames_read += 1
+
+ elif not length:
+ length = int(l.strip())
+
+ elif not vartype:
+ vartype = l.strip()
+ # advance one from current position and account for starting
+ # to count from zero
+ data_startline = params_startline + idx + 2
+
+ # need to append one more time since iteration will have stopped after
+ # last line
+ ret.append(
+ dict(
+ name=name,
+ ndims=ndims,
+ dimnames=dimnames,
+ length=length,
+ vartype=vartype,
+ data_startline=data_startline
+ )
+ )
+
+ return ret
+
+ def __getitem__(self, key):
+ """
+ Look up a parameter by its name.
+
+ Raises:
+ KeyError if parameter name is not valid
+ """
+ def load_parameter_array(param_metadata):
+
+ startline = param_metadata['data_startline']
+ endline = startline + param_metadata['length'] + 1
+
+ param_slice = itertools.islice(
+ io.open(self.base_file, 'rb'), startline, endline
+ )
+
+ arr = np.genfromtxt(param_slice)
+
+ if param_metadata['ndims'] > 1:
+ dimsizes = [
+ self.dimensions[d] for d in param_metadata['dimnames']
+ ]
+ dimsizes.reverse()
+ arr = arr.reshape(dimsizes)
+
+ return arr
+
+ if key in self.param_arrays:
+ return self.param_arrays[key]
+
+ else:
+ try:
+ param_metadata = [
+ el for el in self.base_params if el['name'] == key
+ ].pop()
+
+ except IndexError:
+ raise KeyError(key)
+
+ arr = load_parameter_array(param_metadata)
+
+ # cache the value for future access (but maybe shouldn't?)
+ self.param_arrays.update({key: arr})
+
+ return arr
+
+ def __setitem__(self, key, value):
+
+ if key in self.param_arrays:
+ cur_arr = self.param_arrays[key]
+ if not value.shape == cur_arr.shape:
+ raise ValueError('New array does not match existing')
+
+ self.param_arrays[key] = value
+
+[docs]def modify_params(params_in, params_out, param_mods=None):
+ '''
+ Given a parameter file in and a dictionary of param_mods, write modified
+ parameters to params_out.
+
+
+ Arguments:
+ params_in (str): location on disk of the base parameter file
+ params_out (str): location on disk where the modified parameters will
+ be written
+
+ Keyword Arguments:
+ param_mods (dict): param name-keyed, param modification function-valued
+
+ Returns:
+ None
+
+ Example:
+ Below we modify the monthly *jh_coef* parameter by increasing it 10%
+ for every month,
+
+ >>> params_in = 'models/lbcd/parameters'
+ >>> params_out = 'scenarios/jh_coef_1.1/params'
+ >>> scale_10pct = lambda x: x * 1.1
+ >>> modify_params(params_in, params_out, {'jh_coef': scale_10pct})
+
+ So param_mods is a dictionary of with keys being parameter names and
+ values a function that operates on a single value. Currently we only
+ accept functions that operate without reference to any other
+ parameters. The function will be applied to every cell, month, or
+ cascade routing rule for which the parameter is defined.
+ '''
+ p_in = Parameters(params_in)
+
+ for k in param_mods:
+ p_in[k] = param_mods[k](p_in[k])
+
+ p_in.write(params_out)
+
+# -*- coding: utf-8 -*-
+'''
+scenario.py -- holds ``Scenario`` and ``ScenarioSeries`` classes for PRMS
+managing parameter-based model scenarios that may be used for hypotheses
+testing.
+'''
+
+import inspect
+import json
+import multiprocessing as mp
+import os
+import shutil
+import uuid
+
+from datetime import datetime
+from .parameters import modify_params, Parameters
+from .data import Data
+from .util import load_statvar
+from .simulation import Simulation
+
+
+[docs]class ScenarioSeries(object):
+ """
+ Create and manage a series of model runs where parameters are modified.
+
+ First initialize the series with an optional title and description.
+ Then to build the series the user provides a list of dictionaries with
+ parameter-function key-value pairs, and optionally a title and
+ description for each dictionary defining the individual scenario.
+
+ The ScenarioSeries' ``build`` method creates a file structure under
+ the series directory (``scenarios_dir``) where each subdirectory is
+ named with a :mod:`uuid` which can be later matched to its title using
+ the metadata in ``scenario_dir/series_metadata.json`` (see :mod:`json`).
+ In the future we may add a way for the user to access the results of
+ the scenario simulations directly through the ``ScenarioSeries``
+ instance, but for now the results are written to disk. Therefore each
+ scenario's title metadata can be used to refer to which parmaters were
+ modified and how for post-processing and analysis. One could also use
+ the description metadata for this purpose.
+
+ Arguments:
+ base_dir (str): path to base inputs; 'control', 'parameters',
+ and 'data' must be present there
+ scenarios_dir (str): directory where scenario data will be written
+ to; will be overwritten or created if it does not exist
+
+ Keyword Arguments:
+ title (str, optional): title of the ScenarioSeries instance
+ description (str, optional): description of the ScenarioSeries
+ instance
+
+ Attributes:
+ metadata (dict): dictionary with title, description, and UUID map
+ dictionary for individiual ``Scenario`` output directories,
+ the UUID dictionary (``uuid_title_map``)is left empty until
+ calling :meth:`ScenarioSeries.build`.
+ scenarios (list): empty list that will be filled with ``Scenario``s
+ after defining them by calling :meth:`ScenarioSeries.build`.
+
+ Example:
+ There are three steps to both ``Scenario`` and ScenarioSeries,
+ first we initialize the object
+
+ >>> series = ScenarioSeries(
+ >>> base_dir = 'dir_with_input_files',
+ >>> scenarios_dir = 'dir_to_run_series',
+ >>> title = 'title_for_group_of_scenarios',
+ >>> description = 'description_for_scenarios'
+ >>> )
+
+ The next step is to "build" the ``ScenarioSeries`` by calling the
+ :meth:`ScenarioSeries.build` method which defines which parameters
+ to modify, how to modify them, and then performs the modification
+ which readies the series to be "run" (the last step). See the
+ :meth:`ScenarioSeries.build` method for the next step example.
+
+ Also see :ref:`scenario_and_scenarioseries_tutorial` for full example
+ """
+
+ def __init__(self, base_dir, scenarios_dir, title=None, description=None):
+ """
+ """
+ self.base_dir = base_dir
+ self.scenarios_dir = scenarios_dir
+ if os.path.exists(scenarios_dir):
+ shutil.rmtree(scenarios_dir)
+
+ os.mkdir(scenarios_dir)
+
+ shutil.copytree(base_dir, os.path.join(scenarios_dir, 'base_inputs'))
+
+ self.metadata = dict(title=title,
+ description=description,
+ uuid_title_map={})
+ self.scenarios = []
+
+ self.outputs = None
+
+[docs] @classmethod
+ def from_parameters_iter(cls, base_directory, parameters_iter,
+ title=None, description=None):
+ '''
+ Alternative way to initialize and build a ``ScenarioSeries`` in one
+ step.
+
+ Create and build a ``ScenarioSeries`` by including the param-keyed-
+ function-valued dictionary (``parameters_iter``) that is otherwise
+ passed in :meth:`ScenarioSeries.build`.
+
+ Arguments:
+ base_directory (str): directory that contains model input files
+ parameters_iter (list of dicts): list of dictionaries for each
+ ``Scenario`` as described in :class:`Scenario` and
+ :meth:`ScenarioSeries.build`.
+ title (str): title for group of scenarios
+ description (str): description for group of scenarios
+
+ Returns:
+ None
+ '''
+ series = cls(base_directory, base_directory,
+ title=title, description=description)
+
+ for parameters in parameters_iter:
+
+ title = parameters['title'] if 'title' in parameters else None
+
+ uu = str(uuid.uuid4())
+
+ series.metadata['uuid_title_map'].update({uu: title})
+
+ scenario_dir = os.path.join(series.scenarios_dir, uu)
+
+ scenario = Scenario(series.base_dir, scenario_dir, title=title)
+
+ scenario.build()
+
+ series.scenarios.append(scenario)
+
+ with open(
+ os.path.join(series.scenarios_dir, 'series_metadata.json'), 'w'
+ ) as f:
+ f.write(json.dumps(series.metadata, indent=2))
+
+ def __len__(self):
+ return len(self.scenarios)
+
+[docs] def build(self, scenarios_list):
+ """
+ Build the scenarios from a list of scenario definitions in dicitonary
+ form.
+
+ Each element of ``scenarios_list`` can have any number of parameters
+ as keys with a function for each value. The other two acceptable keys
+ are ``title`` and ``description`` which will be passed on to each
+ individual Scenario's metadata in ``series_metadata.json`` for future
+ lookups. The ``build`` method also creates a file structure that uses
+ UUID values as individiual ``Scenario`` subdirectories as shown below.
+
+ Arguments:
+ scenarios_list (list): list of dictionaries with key-value
+ pairs being parameter-function definition pairs or
+ title-title string or description-description string.
+ Returns:
+ None
+
+ Examples:
+ Following the initialization of a ``ScenarioSeries`` instance as
+ shown the example docstring there, we "build" the series by
+ defining a list of param named-keyed function-valued
+ dictionaries. This example uses arbitrary functions on two
+ PRMS parameters *snowinfil_max* and *snow_adj*,
+
+ >>> def _function1(x): #Note, function must start with underscore
+ return x * 0.5
+ >>> def _function2(x):
+ return x + 5
+ >>> dic1 = {'snowinfil_max': _function1, 'title': 'scenario1'}
+ >>> dic2 = {'snowinfil_max': _function2,
+ 'snow_adj': function1,
+ 'title': 'scenario2',
+ 'description': 'we adjusted two snow parameters'
+ }
+ >>> example_scenario_list = [dic1, dic2]
+ >>> # now we can build the series
+ >>> series.build(example_scenario_list)
+
+ In this example that follows from :class:`ScenarioSeries` example the
+ file structure that is created by the ``build`` method is as follows::
+
+ dir_to_run_series
+ ├── 670d6352-2852-400a-997e-7b12ba34f0b0
+ │ ├── control
+ │ ├── data
+ │ └── parameters
+ ├── base_inputs
+ │ ├── control
+ │ ├── data
+ │ └── parameters
+ ├── ee9526a9-8fe6-4e88-b357-7dfd7111208a
+ │ ├── control
+ │ ├── data
+ │ └── parameters
+ └── series_metadata.json
+
+ As shown the build method has copied the original inputs from the
+ ``base_dir`` given on initialization of ``ScenarioSeries`` to a new
+ subdirectory of the ``scenarios_dir``, it also applied the
+ modifications to the parameters for both scenarios above and move the
+ input files to their respective directories. At this stage the
+ ``metadata`` will not have updated the UUID map dictionary to each
+ scenarios subdirectory because they have not yet been run. See the
+ :meth:`ScenarioSeries.run` method for further explanation including
+ the final file structure and metadata file contents.
+ """
+
+ title = None
+ description = None
+ for s in scenarios_list:
+
+ if 'title' in s:
+ title = s['title']
+ del s['title']
+
+ if 'description' in s:
+ description = s['description']
+ del s['description']
+
+ uu = str(uuid.uuid4())
+
+ self.metadata['uuid_title_map'].update({uu: title})
+
+ scenario_path = os.path.join(self.scenarios_dir, uu)
+
+ # create Scenario
+ scenario = Scenario(
+ self.base_dir, scenario_path, title=title,
+ description=description
+ )
+
+ # s now only contains parameter keys and function references vals
+ scenario.build(s)
+
+ self.scenarios.append(scenario)
+
+ with open(
+ os.path.join(self.scenarios_dir, 'series_metadata.json'), 'w'
+ ) as f:
+
+ f.write(json.dumps(self.metadata, indent=2))
+
+[docs] def run(self, prms_exec='prms', nproc=None):
+ """
+ Run a "built" ``ScenarioSeries`` and make final updates
+ to file structure and metadata.
+
+ Keyword Arguments:
+ prms_exec (str): name of PRMS executable on $PATH or path to
+ executable. Default = 'prms'
+ nproc (int or None): number of processceors available to
+ parallelize PRMS simulations, if None (default) then use
+ half of what the :mod:`multiprocessing` detects on the
+ machine.
+
+ Returns:
+ None
+
+ Examples:
+ This example starts where the example ends in
+ :meth:`ScenarioSeries.build`, calling ``run`` will run the
+ models for all scenarios and then update the file structure
+ as well as create individual ``Scenario`` metadata files as such::
+
+ dir_to_run_series
+ ├── 5498c21d-d064-45f4-9912-044734fd230e
+ │ ├── inputs
+ │ │ ├── control
+ │ │ ├── data
+ │ │ └── parameters
+ │ ├── metadata.json
+ │ └── outputs
+ │ ├── prms_ic.out
+ │ ├── prms.out
+ │ └── statvar.dat
+ ├── 9d28ec5a-b570-4abb-8000-8dac113cbed3
+ │ ├── inputs
+ │ │ ├── control
+ │ │ ├── data
+ │ │ └── parameters
+ │ ├── metadata.json
+ │ └── outputs
+ │ ├── prms_ic.out
+ │ ├── prms.out
+ │ └── statvar.dat
+ ├── base_inputs
+ │ ├── control
+ │ ├── data
+ │ └── parameters
+ └── series_metadata.json
+
+ As we can see the file structure follows the combined structures
+ as defined by :class:`Simulation` and :class:`Scenario`. The content
+ of the top-level metadata file ``series_metadata.json`` is as such::
+
+ {
+ "title": "title_for_group_of_scenarios",
+ "description": "description_for_scenarios",
+ "uuid_title_map": {
+ "5498c21d-d064-45f4-9912-044734fd230e": "scenario1",
+ "9d28ec5a-b570-4abb-8000-8dac113cbed3": "scenario2"
+ }
+ }
+
+ Therefore one can use the :mod:`json` file to track between UUID's and
+ individual scenario titles. The json files are read as a Python
+ dictionary which makes them particularly convenient. The contents of
+ an individual scenarios ``metadata.json`` file included a string
+ representation of the function(s) that were applied to the
+ paramter(s)::
+
+ {
+ "description": null,
+ "end_datetime": "2018-09-03T00:00:40.793817",
+ "mod_funs_dict": {
+ "snowinfil_max": "def _function1(x):
+ return x * 0.5"
+ },
+ "start_datetime": "2018-09-03T00:00:30.421353",
+ "title": "scenario1"
+ }
+
+ Note:
+ As shown, it is important to give appropriate scenario titles when
+ building a ``ScenarioSeries`` dictionary in order to later
+ understand how parameters were modified in each scenario. If not
+ one would have to rely on the individual ``metadata.json`` files
+ in each scenario directory which may be more cumbersome.
+ """
+ if not nproc:
+ nproc = mp.cpu_count()//2
+
+ pool = mp.Pool(processes=nproc)
+ pool.map(_scenario_runner, self.scenarios)
+
+
+# multiprocessing req the function be def'd at root scope so it's picklable
+def _scenario_runner(scenario, prms_exec='prms'):
+ scenario.run(prms_exec=prms_exec)
+
+
+[docs]class Scenario:
+ """
+ Container for the process in which one modifies input parameters then
+ runs a simulation while tracking metadata.
+
+ Metadata includes a title and description, if provided, plus start/end
+ datetime, and parameter names of parameters that were modified including
+ string representations of the Python modification functions that were
+ applied to each parameter. The metadata file is in :mod:`json` format
+ making it conveniently read as a Python dictionary.
+
+ Arguments:
+ base_dir (str): path to directory that contains initial *control*,
+ *parameter*, and *data* files to be used for ``Scenario``.
+ The *parameters* file in ``base_dir`` will not be modifed
+ instead will be copied to ``scenario_dir`` and them modified.
+ scenario_dir (str): directory path to bundle inputs and outputs
+ title (str, optional): title of ``Scenario``, if given will be
+ added to ``Scenario.metadata`` attribute as well as the
+ ``metadata.json`` file in ``scenario_dir`` written after
+ calling the :func:`Scenario.build()` and :func:`Scenario.run()`
+ methods.
+ description (str, optional): description of ``Scenario``, also
+ is added to ``Scenario.metadata`` as ``title``.
+
+ Attributes:
+ metadata (:class:`scenario.ScenarioMetadata`): a dictionary-like
+ class in :mod:`prms_python.scenario` that tracks ``Scenario`` and
+ ``ScenarioSeries`` imformation including user-defined parameter
+ modifications and descriptions, and file structure.
+
+ Examples:
+ This example is kept simple for clarity, here we adjust a
+ single PRMS parameter *tmin_lapse* by using a single arbitrary
+ mathematical function. We use the example PRMS model included
+ with PRMS-Python for this example,
+
+ >>> input_dir = 'PRMS-Python/test/data/models/lbcd'
+ >>> scenario_directory = 'scenario_testing'
+ >>> title = 'Scenario example'
+ >>> desc = 'adjust tmin_lapse using sine wave function'
+ >>> # create Scenario instance
+ >>> scenario_obj = Scenario
+ (
+ base_dir=input_dir,
+ scenario_dir=scenario_directory,
+ title=title,
+ description=desc
+ )
+
+ Next we need to build a dictionary to modify, in this case
+ *tmin_lapse*, here we use a vectorized sine function
+
+ >>> # build the modification function and dictionary
+ >>> def a_func(arr):
+ return 4 + np.sin(np.linspace(0,2*np.pi,num=len(arr)))
+ >>> # make dictionary with parameter names as keys and modification
+ >>> # function as values
+ >>> param_mod_dic = dict(tmin_lapse=a_func)
+ >>> scenario_obj.build(param_mod_funs=param_mod_dic)
+
+ After building a ``Scenario`` instance the input files are
+ copied to ``scenario_dir`` which was assigned 'scenario_testing'::
+
+ scenario_testing
+ ├── control
+ ├── data
+ └── parameters
+
+
+ After calling ``build`` the input files from ``input_dir`` were
+ first copied to ``scenario_dir`` and then the functions in
+ ``param_mod_dic`` are applied the the parameters names (key)
+ in ``param_mod_dic``. To run the ``Scenario`` use the the ``run``
+ method
+
+ >>> scenario_obj.run()
+
+ Now the simulation is run and the ``metadata.json`` file is created,
+ the final file structure will be similar to this::
+
+ scenario_testing
+ ├── inputs
+ │ ├── control
+ │ ├── data
+ │ └── parameters
+ ├── metadata.json
+ └── outputs
+ ├── prms_ic.out
+ ├── prms.out
+ └── statvar.dat
+
+ Finally, here is what is contained in ``metadata.json`` for this example
+ which is also updates in the :attr:`Scenario.metadata`
+
+ >>> scenario_obj.metadata
+ {
+ 'title': 'Scenario example',
+ 'description': 'adjust tmin_lapse using sine wave function',
+ 'start_datetime': '2018-09-01T19:20:21.723003',
+ 'end_datetime': '2018-09-01T19:20:31.117004',
+ 'mod_funs_dict': {
+ 'tmin_lapse': 'def parab(arr):
+ return 4 + np.sin(np.linspace(0,2*np.pi,num=len(arr)))'
+ }
+ }
+
+ As shown the metadata retirieved the parameter modification function
+ as a string representation of the exact Python function(s) used for
+ modifying the user-defined parameter(s).
+
+ Note:
+ The main differentiator between :class:`Scenario` and
+ :class:`ScenarioSeries` is that ``Scenario`` is designed for modifying
+ one or more parameters of a **single** *parameters* file whereas
+ ``ScenarioSeries`` is designed for modifying and tracking the
+ modification of one or more parameters in **multiple** PRMS
+ *parameters* files, therefore resulting in multiple PRMS simulations.
+
+ """
+
+ def __init__(self, base_dir, scenario_dir,
+ title=None, description=None):
+
+ self.title = title
+ self.description = description
+
+ self.base_dir = base_dir
+ self.scenario_dir = scenario_dir
+
+ self.metadata = ScenarioMetadata(title=title, description=description)
+
+ self.__simulation_ready = False
+
+[docs] def build(self, param_mod_funs=None):
+ """
+ Take a user-defined dictionary with param names as keys and Python
+ functions as values, copy the original input files as given when
+ initializing a :class:`Scenario` instance to the ``simulation_dir``
+ then apply the functions in the user-defined dictionary to the
+ parameters there. The ``build`` method must be called before running
+ the ``Scenario`` (calling :func:`Scenario.run()` ).
+
+ Keyword Arguments:
+ param_mod_funs (dict): dictionary with parameter names as keys
+ and Python functions as values to apply to the names (key)
+
+ Returns:
+ None
+
+ Example:
+ see :class:`Scenario` for a full example.
+
+ Note:
+ If the ``scenario_dir`` that was assigned for the current
+ instance already exists, it will be overwritten when ``build``
+ is invoked.
+ """
+
+ if isinstance(param_mod_funs, dict):
+
+ # create scenario_dir that will be used as Simulation input dir
+ if os.path.isdir(self.scenario_dir):
+ shutil.rmtree(self.scenario_dir)
+
+ os.makedirs(self.scenario_dir)
+ shutil.copy(
+ os.path.join(self.base_dir, 'control'), self.scenario_dir
+ )
+ shutil.copy(
+ os.path.join(self.base_dir, 'data'), self.scenario_dir
+ )
+
+ old_params_path = os.path.join(self.base_dir, 'parameters')
+ new_params_path = os.path.join(self.scenario_dir, 'parameters')
+ if not param_mod_funs:
+ shutil.copy(old_params_path, self.scenario_dir)
+ else:
+ modify_params(old_params_path, new_params_path, param_mod_funs)
+
+ param_mod_funs_metadata = {
+ param_name: inspect.getsource(param_mod_fun)
+ for param_name, param_mod_fun in param_mod_funs.items()
+ }
+
+ self.metadata['mod_funs_dict'] = param_mod_funs_metadata
+
+ self.simulation = Simulation(self.scenario_dir, self.scenario_dir)
+
+ else:
+ self.simulation = Simulation(self.scenario_dir)
+
+ self.__simulation_ready = True
+
+[docs] def run(self, prms_exec='prms'):
+ """
+ Run the PRMS simulation for a *built* ``Scenario`` instance.
+
+ Keyword Arguments:
+ prms_exec (str): name of PRMS executable on $PATH or path to
+ executable
+
+ Returns:
+ None
+
+ Examples:
+ see :class:`Scenario` for full example
+
+ Raises:
+ RuntimeError: if the :func:`Scenario.build` method has not yet
+ been called.
+ """
+ if not self.__simulation_ready:
+ raise RuntimeError(
+ 'Scenario has not yet been prepared: run build_scenario first'
+ )
+
+ self.metadata['start_datetime'] = datetime.now().isoformat()
+ self.simulation.run(prms_exec=prms_exec)
+ self.metadata['end_datetime'] = datetime.now().isoformat()
+
+ self.metadata.write(os.path.join(self.scenario_dir, 'metadata.json'))
+
+
+class ScenarioMetadata:
+
+ def __init__(self, title=None, description=None, start_datetime=None,
+ end_datetime=None, mod_funs_dict=None):
+
+ self.metadata_dict = dict(title=title,
+ description=description,
+ start_datetime=start_datetime,
+ end_datetime=end_datetime,
+ mod_funs_dict=mod_funs_dict)
+
+ def __getitem__(self, key):
+ return self.metadata_dict[key]
+
+ def __setitem__(self, key, value):
+ self.metadata_dict[key] = value
+
+ def __repr__(self):
+ return self.metadata_dict.__repr__()
+
+ def write(self, output_path):
+ with open(output_path, 'w') as f:
+ f.write(json.dumps(self.metadata_dict, ensure_ascii=False,\
+ indent=4, sort_keys=True))
+
+
+
+# -*- coding: utf-8 -*-
+"""
+simulation.py -- Contains ``Simulation`` and ``SimulationSeries`` classes and
+associated functions for managing PRMS simulations at a low level.
+"""
+
+from __future__ import print_function
+import glob
+import multiprocessing as mp
+import os
+import shutil
+import subprocess
+import time
+
+from .data import Data
+from .parameters import Parameters
+from .util import load_statvar
+
+OPJ = os.path.join
+
+[docs]class SimulationSeries(object):
+ '''
+ Series of simulations all to be run through a common interface.
+
+ Utilizes :class:`multiprocessing.Pool` class to parallelize the
+ execution of series of PRMS simulations. SimulationSeries also
+ allows the user to define the PRMS executable command which is
+ set to "prms" as default. It is best to add the prms executable to
+ your $PATH environment variable. Each simulation that is run through
+ ``SimulationSeries`` will follow the strict file structure as defined
+ by :func:`Simulation.run()`. This class is useful particularly for
+ creating new programatic workflows not provided yet by PRMS-Python.
+
+ Arguments:
+ simulations (list or tuple): list of :class:`Simulation` objects
+ to be run.
+
+ Example:
+ Lets say you have already created a series of PRMS models by modifying
+ the input climatic forcing data, e.g. you have 100 *data* files and
+ you want to run each using the same *control* and *parameters* file.
+ For simplicity lets say there is a directory that contains all 100
+ *data* files e.g. data1, data2, ... or whatever they are named and
+ nothing else. This example also assumes that you want each simulation
+ to be run and stored in directories named after the *data* files as
+ shown.
+
+ >>> data_dir = 'dir_that_contains_all_data_files'
+ >>> params = Parameters('path_to_parameter_file')
+ >>> control_path = 'path_to_control'
+ >>> # a list comprehension to make multiple simulations with
+ >>> # different data files, alternatively you could use a for loop
+ >>> sims = [
+ Simulation.from_data
+ (
+ Data(data_file),
+ params,
+ control_path,
+ simulation_dir='sim_{}'.format(data_file)
+ )
+ for data_file in os.listdir(data_dir)
+ ]
+
+ Next we can use ``SimulationSeries`` to run all of these
+ simulations in parrallel. For example we may use 8 logical cores
+ on a common desktop computer.
+
+ >>> sim_series = SimulationSeries(sims)
+ >>> sim_series.run(nprocs=8)
+
+ The ``SimulationSeries.run()`` method will run all 100 simulations
+ where chunks of 8 at a time will be run in parrallel. Inputs and
+ outputs of each simulation will be sent to each simulation's
+ ``simulation_dir`` following the file structure of
+ :func:`Simulation.run()`.
+
+ Note:
+ The :class:`Simulation` and :class:`SimulationSeries` classes
+ are low-level in that they alone do not create metadata for
+ PRMS simulation scenarios. In other words they do not produce
+ any additional files that may help the user know what differs
+ between individual simulations.
+
+ '''
+
+ def __init__(self, simulations):
+ self.series = list(simulations)
+
+[docs] def run(self, prms_exec='prms', nproc=None):
+ """
+ Method to run multiple :class:`Simulation` objects in parrallel.
+
+ Keyword Arguments:
+ prms_exec (str): name of PRMS executable on $PATH or path to
+ executable
+ nproc (int or None): number of logical or physical processors
+ for parrallel execution of PRMS simulations.
+
+ Example:
+ see :class:`SimulationSeries`
+
+ Note:
+ If ``nproc`` is not assigned the deault action is to use half
+ of the available processecors on the machine using the Python
+ :mod:`multiprocessing` module.
+
+ """
+ if not nproc:
+ nproc = mp.cpu_count() // 2
+
+ pool = mp.Pool(processes=nproc)
+ pool.map(_simulation_runner, self.series)
+ pool.close()
+
+ return self
+
+[docs] def outputs_iter(self):
+ '''
+ Return a :class:`generator` of directories with the path to the
+ ``simulation_dir`` as well as paths to the *statvar.dat* output
+ file, and *data* and *parameters* input files used in the simulation.
+
+ Yields:
+ :obj:`dict`: dictionary of paths to simulation directory,
+ input, and output files.
+
+ Example:
+ >>> ser = SimulationSeries(simulations)
+ >>> ser.run()
+ >>> g = ser.outputs_iter()
+
+ Would return something like
+
+ >>> print(g.next())
+ {
+ 'simulation_dir': 'path/to/sim/',
+ 'statvar': 'path/to/statvar',
+ 'data': 'path/to/data',
+ 'parameters': 'path/to/parameters'
+ }
+ '''
+ dirs = list(s.simulation_dir for s in self.series)
+ print(dirs)
+
+ return (
+ {
+ 'simulation_dir': d,
+ 'statvar': OPJ(d, 'outputs', 'statvar.dat'),
+ 'data': OPJ(d, 'inputs', 'data'),
+ 'parameters': OPJ(d, 'inputs', 'parameters')
+ }
+ for d in dirs
+ )
+
+ def __len__(self):
+ return len(list(self.outputs_iter()))
+
+
+def _simulation_runner(sim):
+ sim.run(prms_exec='prms')
+
+
+[docs]class Simulation(object):
+ """
+ Class that runs and manages file structure for a single PRMS simulation.
+
+ The ``Simulation`` class provides low-level managment of a PRMS simulation
+ by copying model input files from ``input_dir`` argument to an output dir
+ ``simulation_dir``. The file stucture for an individual simulation after
+ calling the ``run`` method is simple, two subdirectories "inputs" and
+ "outputs" are created under ``simulation_dir`` and the respective input
+ and output files from the current PRMS simulation are transfered there after
+ the ``Simulation.run()`` method is called which executes the PRMS model,
+ (see examples below in :func:`Simulation.run`).
+
+ A ``Simulation`` instance checks that all required PRMS inputs (control,
+ parameters, data) exist in the expected locations. If simulation_dir is
+ provided and does not exist, it will be created. If it does exist it will
+ be overwritten.
+
+ Keyword Arguments:
+ input_dir (str): path to directory that contains control, parameter,
+ and data files for the simulation
+ simulation_dir (str): directory path to bundle inputs and outputs
+
+ Example:
+ see :func:`Simulation.run()`
+
+ Raises:
+ RuntimeError: if input directory does not contain a PRMS *data*,
+ *parameters*, and *control* file.
+
+ """
+ def __init__(self, input_dir=None, simulation_dir=None):
+ # check if model input paths exist
+ idir = input_dir
+ self.input_dir = idir
+ self.simulation_dir = simulation_dir
+ if idir is not None:
+ self.control_path = os.path.join(idir, 'control')
+ self.parameters_path = os.path.join(idir, 'parameters')
+ self.data_path = os.path.join(idir, 'data')
+
+ if not os.path.exists(self.control_path):
+ raise RuntimeError('Control file missing from ' + idir)
+
+ if not os.path.exists(self.parameters_path):
+ raise RuntimeError('Parameter file missing from ' + idir)
+
+ if not os.path.exists(self.data_path):
+ raise RuntimeError('Data file missing from ' + idir)
+ # build output (simulation_dir) and move input files there
+ if simulation_dir is not None:
+ self.simulation_dir = simulation_dir
+ if simulation_dir and simulation_dir != input_dir:
+
+ if os.path.exists(simulation_dir):
+ shutil.rmtree(simulation_dir)
+
+ os.mkdir(simulation_dir)
+
+ shutil.copy(self.control_path, simulation_dir)
+ shutil.copy(self.data_path, simulation_dir)
+ shutil.copy(self.parameters_path, simulation_dir)
+
+ self.control_path = os.path.join(simulation_dir, 'control')
+ self.parameters_path = os.path.join(simulation_dir,
+ 'parameters')
+ self.data_path = os.path.join(simulation_dir, 'data')
+
+ else:
+ self.control_path = None
+ self.parameters_path = None
+ self.data_path = None
+ self.simulation_dir = None
+
+ self.has_run = False
+
+[docs] @classmethod
+ def from_data(cls, data, parameters, control_path, simulation_dir):
+ '''
+ Create a ``Simulation`` from a :class:`Data` and :class:`Parameter` object,
+ plus a path to the *control* file, and providing a ``simulation_dir``
+ where the simulation should be run.
+
+ Arguments:
+ data (:class:`Data`): ``Data`` object for simulation
+ parameters (:class:`Parameters`): ``Parameters`` object for simulation
+ control_path (str): path to control file
+ simulation_dir (str): path to directory where simulations will be
+ run and where input and output will be stored. If it exists it will
+ be overwritten.
+
+ Returns:
+ :class:`Simulation` ready to be run using ``simulation_dir`` for
+ inputs and outputs
+
+ Example:
+
+ >>> d = Data('path_to_data_file')
+ >>> p = Parameters('path_to_parameters_file')
+ >>> c = 'path_to_control_file'
+ >>> sim_dir = 'path_to_create_simulation'
+ >>> sim = Simulation.from_data(d, p, c, sim_dir)
+ >>> sim.run()
+
+ Raises:
+ TypeError: if ``data`` and ``parameters`` arguments are not of type
+ :class:`Data` and :class:`Parameters`
+ '''
+
+ if not isinstance(data, Data):
+ raise TypeError('data must be instance of Data')
+
+ if not isinstance(parameters, Parameters):
+ raise TypeError('parameters must be instance of Parameters, not '\
+ + str(type(parameters)))
+
+ if os.path.exists(simulation_dir):
+ shutil.rmtree(simulation_dir)
+
+ os.makedirs(simulation_dir)
+
+ sim = cls()
+ sim.simulation_dir = simulation_dir
+
+ sd = simulation_dir
+
+ data_path = OPJ(sd, 'data')
+ data.write(data_path)
+ params_path = OPJ(sd, 'parameters')
+ parameters.write(params_path)
+ shutil.copy(control_path, OPJ(sd, 'control'))
+
+ return sim
+
+[docs] def run(self, prms_exec='prms'):
+ """
+ Run a ``Simulation`` instance using PRMS input files from ``input_dir``
+ and copy to the ``Simulation`` file structure under ``simulation_dir`` if
+ given, otherwise leave PRMS input output unstructured and in ``input_dir``
+
+ This method runs a single PRMS simulation from a ``Simulation`` instance,
+ waits until the process has completed and then transfers model input and
+ output files to respective newly created directories. See example of the
+ file structure that is created under different workflows of the ``run``
+ method below.
+
+ Keyword Arguments:
+ prms_exec (str): name of PRMS executable on $PATH or path to executable
+
+ Examples:
+ If we create a :class:`Simulation` instance by only assigning the
+ ``input_dir`` argument and call its ``run`` method the model will be
+ run in the ``input_dir`` and all model input and output files will
+ remain in ``input_dir``,
+
+ >>> import os
+ >>> input_dir = os.path.join(
+ 'PRMS-Python',
+ 'prms_python',
+ 'models',
+ 'lbcd'
+ )
+ >>> os.listdir(input_dir)
+ ['data',
+ 'data_3deg_upshift',
+ 'parameters',
+ 'parameters_adjusted',
+ 'control']
+ >>> sim = Simulation(input_dir)
+ >>> sim.run()
+ >>> os.listdir(input_dir) # all input and outputs in input_dir
+ ['data',
+ 'data_3deg_upshift',
+ 'parameters',
+ 'parameters_adjusted',
+ 'control',
+ 'statvar.dat',
+ 'prms_ic.out',
+ 'prms.out' ]
+
+ Instead if we assigned a path for ``simulation_dir`` keyword
+ argument and then called ``run``, i.e.
+
+ >>> sim = Simulation(input_dir, 'path_simulation')
+ >>> sim.run()
+
+ the files structure for the PRMS simulation created by ``Simulation.run()``
+ would be::
+
+ path_simulation
+ ├── inputs
+ │ ├── control
+ │ ├── data
+ │ └── parameters
+ └── outputs
+ ├── data_3deg_upshift
+ ├── parameters_adjusted
+ ├── prms_ic.out
+ ├── prms.out
+ └── statvar.dat
+
+ Note:
+ As shown in the last example, currently the ``Simulation.run`` routine only
+ recognizes the *data*, *parameters*. and *control* file as PRMS inputs,
+ all other files found in ``input_dir`` before *and* after normal completion
+ of the PRMS simulation will be transferred to ``simulation_dir/outputs/``.
+ """
+ cwd = os.getcwd()
+
+ if self.simulation_dir:
+ os.chdir(self.simulation_dir)
+
+ else:
+ os.chdir(self.input_dir)
+
+ p = subprocess.Popen(
+ prms_exec + ' control', shell=True, stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE
+ )
+
+ prms_finished = False
+ checked_once = False
+ while not prms_finished:
+
+ if not checked_once:
+ p.communicate()
+ checked_once = True
+
+ poll = p.poll()
+ prms_finished = poll >= 0
+
+ self.has_run = True
+ # avoid too many files open error
+ p.stdout.close()
+ p.stderr.close()
+
+ if self.simulation_dir:
+ os.mkdir('inputs')
+ os.mkdir('outputs')
+ shutil.move('data', 'inputs')
+ shutil.move('parameters', 'inputs')
+ shutil.move('control', 'inputs')
+
+ # all remaining files are outputs
+ for g in glob.glob('*'):
+ if not os.path.isdir(g):
+ shutil.move(g, 'outputs')
+
+ os.chdir(cwd)
+
+# -*- coding: utf-8 -*-
+"""
+util.py -- Utilities for working with PRMS data or other functionality that aren't
+appropriate to put elsewhere at this time.
+"""
+import warnings
+import os, shutil, json
+import numpy as np
+import pandas as pd
+
+def calc_emp_CDF(data):
+ # changed function name for PEP 8 style
+ warnings.warn("calc_emp_CDF is deprecated, please use "+\
+ "util.calc_emp_cdf instead", DeprecationWarning)
+ return calc_emp_cdf(data)
+
+def calc_emp_cdf(data):
+ """
+ Create empirical CDF of arbitrary data
+
+ Arguments:
+ data (array_like) : array to calculate CDF on
+
+ Returns:
+ X (numpy.ndarray) : array of x values of CDF (sorted data)
+
+ F (numpy.ndarray) : array of CDF values for each X value or cumulative
+ exceedence probability, in [0,1].
+ """
+ n_bins = len(data)
+ X = np.sort(data)
+ F = np.array(range(n_bins))/float(n_bins)
+ return X,F
+
+def Kolmogorov_Smirnov(uncond, cond, n_bins=10000):
+ # changed function name for PEP 8 style
+ warnings.warn("Kolmogorov_Smirnov is deprecated, please use "+\
+ "util.komogorov_smirnov instead", DeprecationWarning)
+ return kolmogorov_smirnov(uncond, cond, n_bins=10000)
+
+def kolmogorov_smirnov(uncond, cond, n_bins=10000):
+ """
+ Calculate the Kolmogorov-Smirnov statistic between two datasets by first
+ computing their empirical CDFs
+
+ Arguments:
+ uncond (array_like) : data for creating the unconditional CDF.
+ cond (array_like) : data for creating the conditional CDF
+ n_bins (int) : number of bins for both CDFs, note if n_bins > length
+ of either dataset then CDF values are interpolated by numpy
+
+ Returns:
+ KS (float) : Kolmogorov-Smirnov statistic, i.e. absolute max distance
+ between uncond and cond CDFs
+ """
+ # create unconditional CDF (F_Uc)
+ H,X = np.histogram(uncond, bins=n_bins, normed=True)
+ dx = X[1] - X[0]
+ F_Uc = np.cumsum(H)*dx
+ # create conditional CDF (F_C)
+ H,X = np.histogram(cond, bins=n_bins, normed=True)
+ dx = X[1] - X[0]
+ F_C = np.cumsum(H)*dx
+ # Calc max absolulte divergence
+ KS = np.max(np.abs(F_Uc - F_C))
+ return KS
+
+def remove_all_optimization_sims_of_other_stage(work_directory, stage):
+ """
+ Track number of simulation directories not tracked by a specific stage
+ and recursively delete them and their contents. This was created to avoid
+ having nutracked simulations in an optimizer working directory for example
+ when an optimization method was interupted before data was saved to a meta
+ data file.
+
+ Arguments:
+ work_directory (str) : Directory to look for Optimization metadata
+ json files and simulation directories to keep or remove.
+ stage (str) : Optimization stage that will not have its simulation
+ data deleted. All other stages if any are found in metadata files
+ will have their associated simulation directories deleted.
+ Returns:
+ None
+ """
+ from .optimizer import OptimizationResult # avoid circular import
+
+ try:
+ result = OptimizationResult(work_directory,stage=stage)
+ tracked_dirs = []
+ for f in result.metadata_json_paths[stage]:
+ with open(f) as fh:
+ json_data = json.load(fh)
+ tracked_dirs.extend(json_data.get('sim_dirs'))
+ count = 0
+ for d in os.listdir(result.working_dir):
+ path = os.path.join(result.working_dir, d)
+ if path in tracked_dirs:
+ continue
+ elif os.path.isdir(path) and '_archived' not in path:
+ count+=1
+ for dirpath, dirnames, filenames in os.walk(path,\
+ topdown=False):
+ shutil.rmtree(dirpath, ignore_errors=True)
+
+ # if no json file in working dir for given stage, delete any other sim dirs
+ except:
+ count = 0
+ for d in os.listdir(work_directory):
+ path = os.path.join(work_directory, d)
+ if os.path.isdir(path) and '_archived' not in path:
+ count+=1
+ for dirpath, dirnames, filenames in os.walk(path,\
+ topdown=False):
+ shutil.rmtree(dirpath, ignore_errors=True)
+
+ print('deleted {} simulations that were either not tracked by a JSON file'\
+ .format(count) + ' or were not part of {} optimization stage'\
+ .format(stage))
+
+
+def delete_files(work_directory, file_name=''):
+ """
+ Recursively delete all files of a certain name from multiple PRMS
+ simulations that are within a given directory. Can be useful to removw
+ large files that are no longer needed. For example initial condition
+ output files are often large and not always used, similarly animation,
+ data, control, ... files may no longer be needed.
+
+ Arguments:
+ work_directory (str) : path to directory with simulations.
+ file_name (str) : Name of the PRMS input or output file(s) to be
+ removed, default = '' empty string- nothing will be deleted.
+
+ Example:
+ e.g. if you have several simulation directories:
+
+ >>> "test/results/intcp:-26.50_slope:0.49",
+ "test/results/intcp:-11.68_slope:0.54",
+ "test/results/intcp:-4.70_slope:0.51",
+ "test/results/intcp:-35.39_slope:0.39",
+ "test/results/intcp:-20.91_slope:0.41"
+
+ each of these contains an '/inputs' folder with a duplicate data
+ file that you would like to delete. In this case, delete all
+ data files like so:
+
+ >>> work_dir = 'test/results/'
+ >>> delete_files(work_dir, file_name='data')
+
+ Returns:
+ None
+ """
+ for dirpath, dirnames, filenames in os.walk(work_directory, topdown=False):
+ paths = (os.path.join(dirpath, filename) for filename in filenames\
+ if filename == file_name)
+ for path in paths:
+ os.remove(path)
+
+[docs]def load_statvar(statvar_file):
+ """
+ Read the statvar file and load into a datetime indexed
+ Pandas dataframe object
+
+ Arguments:
+ statvar_file (str): statvar file path
+ Returns:
+ (pandas.DataFrame) Pandas DataFrame of PRMS variables date indexed
+ from statvar file
+ """
+ # make list of statistical output variables for df header
+ column_list = ['index',
+ 'year',
+ 'month',
+ 'day',
+ 'hh',
+ 'mm',
+ 'sec']
+
+ # append to header list the variables present in the file
+ with open(statvar_file, 'r') as inf:
+ for idx, l in enumerate(inf):
+ # first line is always number of stat variables
+ if idx == 0:
+ n_statvars = int(l)
+ elif idx <= n_statvars and idx != 0:
+ column_list.append(l.rstrip().replace(' ', '_'))
+ else:
+ break
+
+ # arguments for read_csv function
+ missing_value = -999
+ skiprows = n_statvars+1
+ df = pd.read_csv(
+ statvar_file, delim_whitespace=True, skiprows=skiprows,
+ header=-1, na_values=[missing_value]
+ )
+
+ # apply correct header names using metadata retrieved from file
+ df.columns = column_list
+ date = pd.Series(
+ pd.to_datetime(df.year*10000+df.month*100+df.day, format='%Y%m%d'),
+ index=df.index
+ )
+
+ # make the df index the datetime for the time series data
+ df.index = pd.to_datetime(date)
+
+ # drop unneeded columns
+ df.drop(['index', 'year', 'month', 'day', 'hh', 'mm', 'sec'],
+ axis=1, inplace=True)
+
+ # name dataframe axes (index,columns)
+ df.columns.name = 'statistical_variables'
+ df.index.name = 'date'
+
+ return df
+
+
+def load_data_file(data_file):
+ # changed function name for PEP 8 style
+ warnings.warn("load_data_file is deprecated, please use "+\
+ "util.load_data instead", DeprecationWarning)
+ return load_data(data_file)
+
+[docs]def load_data(data_file):
+ """
+ Read the data file and load into a datetime indexed Pandas dataframe object.
+
+ Arguments:
+ data_file (str): data file path
+ Returns:
+ df (pandas.DataFrame): Pandas dataframe of input time series data
+ from data file with datetime index
+ """
+ # valid input time series that can be put into a data file
+ valid_input_variables = ('gate_ht',
+ 'humidity',
+ 'lake_elev',
+ 'pan_evap',
+ 'precip',
+ 'rain_day',
+ 'runoff',
+ 'snowdepth',
+ 'solrad',
+ 'tmax',
+ 'tmin',
+ 'wind_speed')
+ # starting list of names for header in dataframe
+ column_list = ['year',
+ 'month',
+ 'day',
+ 'hh',
+ 'mm',
+ 'sec']
+ # append to header list the variables present in the file
+ with open(data_file, 'r') as inf:
+ for idx, l in enumerate(inf):
+
+ # first line always string identifier of the file- may use later
+ if idx == 0:
+ data_head = l.rstrip()
+
+ elif l.startswith('/'): # comment lines
+ continue
+
+ # header lines with name and number of input variables
+ if l.startswith(valid_input_variables):
+ # split line into list, first element name and
+ # second number of columns
+ h = l.split()
+
+ # more than one input time series of a particular variable
+ if int(h[1]) > 1:
+ for el in range(int(h[1])):
+ tmp = '{var_name}_{var_ind}'.format(var_name=h[0],
+ var_ind=el+1)
+ column_list.append(tmp)
+ elif int(h[1]) == 1:
+ column_list.append(h[0])
+ # end of header info and begin time series input data
+ if l.startswith('#'):
+ skip_line = idx+1
+ break
+
+ # read data file into pandas dataframe object with correct header names
+ missing_value = -999 # missing data representation
+ df = pd.read_csv(data_file, header=-1, skiprows=skip_line,
+ delim_whitespace=True, na_values=[missing_value])
+
+ # apply correct header names using metadata retrieved from file
+ df.columns = column_list
+
+ # create date column
+ date = pd.Series(
+ pd.to_datetime(df.year*10000+df.month*100+df.day, format='%Y%m%d'),
+ index=df.index
+ )
+
+ df.index = pd.to_datetime(date) # make the df index the datetime
+
+ # drop unneeded columns
+ df.drop(['year', 'month', 'day', 'hh', 'mm', 'sec'], axis=1, inplace=True)
+ df.columns.name = 'input variables'
+ df.index.name = 'date' # name dataframe axes (index,columns)
+ return df
+
+
+[docs]def nash_sutcliffe(observed, modeled):
+ """
+ Calculates the Nash-Sutcliffe Goodness-of-fit
+
+ Arguments:
+ observed (numpy.ndarray): historic observational data
+
+ modeled (numpy.ndarray): model output with matching time index
+ """
+ numerator = sum((observed - modeled)**2)
+ denominator = sum((observed - np.mean(observed))**2)
+
+ return 1 - (numerator/denominator)
+
+def percent_bias(observed, modeled):
+ """
+ Calculates percent bias
+
+ Arguments:
+ observed (numpy.ndarray): historic observational data
+
+ modeled (numpy.ndarray): model output with matching time index
+ """
+ return 100 * ( sum( modeled - observed ) / sum( observed ) )
+
+
+def rmse(observed, modeled):
+ """
+ Calculates root mean squared error
+
+ Arguments:
+ observed (numpy.ndarray): historic observational data
+
+ modeled (numpy.ndarray): model output with matching time index
+ """
+ return np.sqrt( sum((observed - modeled)**2) / len(observed) )
+
+
+
+