diff --git a/.travis.yml b/.travis.yml index f3e7ad95..a6f9e0cb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -28,7 +28,8 @@ before_install: - "pip install --global-option=build_ext --global-option='-I$GDALINST/include' --global-option='-L$GDALINST/lib' --global-option='-R$GDALINST/lib' GDAL==$GDAL_MINOR_VERSION" install: - pip install -I pytest>=3.0.0 - - pip install -r requirements.txt . + - pip install -r requirements.txt - pip install . script: + - python setup.py build_sphinx - py.test -v ce/tests diff --git a/README.md b/README.md index 5fd1195c..21667498 100644 --- a/README.md +++ b/README.md @@ -33,6 +33,28 @@ $ source venv/bin/activate (venv)$ pip install -e . ``` +### Building the Documentation + +Building the docs requires the package to be installed first, as docstrings from installed modules are used to generate code documentation. + +``` +pip install -e . +pyenv/bin/python setup.py build_sphinx +``` + +HTML documentation will be in the `doc/build/html` directory. + +Sphinx can also generate a PDF by invoking the TeXlive toolchain, if it is installed on your machine. On Ubuntu, you will to install the following packages before sphinx can successfully create a PDF: +* latexmk +* texlive-latex-recommended +* texlive-fonts-recommended +* texlive-latex-extra + +You can then create a PDF from the `doc` directory with this command: +``` +make latexpdf +``` + ### Running the dev server A development server can be run locally by using the Flask command line interface documented [here](http://flask.pocoo.org/docs/0.12/cli/). In general, you need to set one environment variable FLASK_APP=ce.wsgi:app and can optionally set FLASK_DEBUG=1 for live code reloading. diff --git a/ce/api/data.py b/ce/api/data.py index a1173aa2..408fced1 100644 --- a/ce/api/data.py +++ b/ce/api/data.py @@ -20,22 +20,28 @@ def data(sesh, model, emission, time, area, variable, timescale='other', Args: sesh (sqlalchemy.orm.session.Session): A database Session object + model (str): Short name for some climate model (e.g "CGCM3") - emission (str): Short name for some emission scenario (e.g. - "historical+rcp85") + + emission (str): Short name for some emission scenario + (e.g."historical+rcp85") + time (int): Timestep index (0-based) representing the time of year; 0-11 for monthly, 0-3 for seasonal, 0 for annual datasets. + area (str): WKT polygon of selected area + variable (str): Short name of the variable to be returned + timescale (str): Description of the resolution of time to be returned (e.g. "monthly" or "yearly") + ensemble_name (str): Name of ensemble Returns: dict: - - Empty dictionary if there exist no files matching the provided - model and emissions scenario. + Empty dictionary if there exist no files matching the provided + model and emissions scenario. Otherwise returns a single dict keyed on the run id for all runs that match the model and emissions scenario. values are a diff --git a/ce/api/grid.py b/ce/api/grid.py index fc6b860c..1c8b119e 100644 --- a/ce/api/grid.py +++ b/ce/api/grid.py @@ -25,10 +25,11 @@ def grid(sesh, id_): Args: sesh (sqlalchemy.orm.session.Session): A database Session object + id_ (str): Unique id which is a key to the data file requested Returns: - dict: Empty dictionary if model_id is not found in the database. + dict: Empty dictionary if id_ is not found in the database. Otherwise, returns a single dict with the key of the file's unique_id and the value consisting of a nested dictionary with diff --git a/ce/api/lister.py b/ce/api/lister.py index 4d0e2119..b1d14c1c 100644 --- a/ce/api/lister.py +++ b/ce/api/lister.py @@ -8,14 +8,17 @@ def lister(sesh, ensemble_name='ce_files', model=None): ''' Args sesh (sqlalchemy.orm.session.Session): A database Session object + ensemble (str): Some named ensemble + model (str): Short name for some climate model (e.g "CGCM3") Returns: list of all unique_ids within that ensemble and/or model. - For example: - ensemble = default, model = PRISM (assuming PRISM group is in 'ce' ensemble) + For example, given ensemble = default, model = PRISM + (assuming PRISM files are in 'ce_files' ensemble):: + [ tmax_monClim_PRISM_historical_run1_198101-201012, tmin_monClim_PRISM_historical_run1_198101-201012, diff --git a/ce/api/metadata.py b/ce/api/metadata.py index 08f2017b..79926094 100644 --- a/ce/api/metadata.py +++ b/ce/api/metadata.py @@ -18,6 +18,7 @@ def metadata(sesh, model_id): Args: sesh (sqlalchemy.orm.session.Session): A database Session object + model_id (str): Unique id which is a key to the data file requested Returns: diff --git a/ce/api/models.py b/ce/api/models.py index 7e6194ca..12bb158a 100644 --- a/ce/api/models.py +++ b/ce/api/models.py @@ -9,13 +9,12 @@ def models(sesh, ensemble_name='ce_files'): Args sesh (sqlalchemy.orm.session.Session): A database Session object - Returns list of all models available: + Returns list of all models available:: - [ - model_short_name1, - model_short_name2, - ... - ] + [ + model_short_name1, + model_short_name2, + ] ''' ensemble = sesh.query(Ensemble).filter(Ensemble.name == ensemble_name).first() diff --git a/ce/api/multimeta.py b/ce/api/multimeta.py index 75ed7a43..531d3950 100644 --- a/ce/api/multimeta.py +++ b/ce/api/multimeta.py @@ -23,7 +23,9 @@ def multimeta(sesh, ensemble_name='ce_files', model=''): Args: sesh (sqlalchemy.orm.session.Session): A database Session object + ensemble (str): Some named ensemble + model (str): Short name for some climate model (e.g "CGCM3") Returns: diff --git a/ce/api/multistats.py b/ce/api/multistats.py index b32d9d20..7314d56c 100644 --- a/ce/api/multistats.py +++ b/ce/api/multistats.py @@ -19,18 +19,26 @@ def multistats(sesh, ensemble_name='ce_files', model='', emission='', time=0, Args: sesh (sqlalchemy.orm.session.Session): A database Session object + ensemble_name (str): The name of the application-level ensemble (e.g. "ce_files") + model (str): Short name for some climate model (e.g "CGCM3") to be used as a filter + emission (str): Short name for some emission scenario (e.g. "historical+rcp85") to be used as a filter + time (int): Timestep index (0-based) representing the time of year; 0-11 for monthly, 0-3 for seasonal, 0 for annual datasets. + area (str): WKT polygon of selected area + variable (str): Short name of the variable to be returned + timescale (str): Description of the resolution of time to be returned (e.g. "monthly" or "yearly") + cell_method (str): Statistical operation applied to variable in a climatological dataset (e.g "mean" or "standard_deviation"). Defaulted to "mean". diff --git a/ce/api/stats.py b/ce/api/stats.py index 8380347b..35df23f4 100644 --- a/ce/api/stats.py +++ b/ce/api/stats.py @@ -31,10 +31,14 @@ def stats(sesh, id_, time, area, variable): Args: sesh (sqlalchemy.orm.session.Session): A database Session object + id_ (str): Unique id which is a key to the data file requested + time (int): Timestep index (0-based) representing the time of year; 0-11 for monthly, 0-3 for seasonal, 0 for annual datasets. + area (str): WKT polygon of selected area + variable (str): Short name of the variable to be returned Returns: diff --git a/ce/api/streamflow/watershed.py b/ce/api/streamflow/watershed.py index 99649bb8..314c9667 100644 --- a/ce/api/streamflow/watershed.py +++ b/ce/api/streamflow/watershed.py @@ -41,8 +41,16 @@ def watershed(sesh, station, ensemble_name): :param station: (string) Location of drainage point, WKT POINT format :param ensemble_name: (string) Name of the ensemble containing data files backing providing data for this request. - :return: (dict) representation for JSON response object. See function - `worker` for details. + :return: dict representation for JSON response object with the following + attributes: + area: Area of the watershed + + elevation: Minimum and maximum elevations + + shape: A GeoJSON object representing the outline of the watershed; + a concave hull of the cell rectangles. + + hypsometric_curve: Elevation-area histogram of the watershed This function is primarily responsible for finding the relevant data files and converting their contents to `VicDataGrid` objects for consumption by @@ -95,13 +103,7 @@ def worker(station_lonlat, flow_direction, elevation, area, hypso_params=None): :param flow_direction: (VicDataGrid) Flow direction grid :param elevation: (VicDataGrid) Elevation grid :param area: (VicDataGrid) Area grid - :return: dict representation for JSON response object with the following - attributes: - area: Area of the watershed - elevation: Minimum and maximum elevations - shape: A GeoJSON object representing the outline of the watershed; - a concave hull of the cell rectangles. - hypsometric_curve: Elevation-area histogram of the watershed + :return: (dict) representation for JSON response object; see watershed() for details """ if hypso_params is None: # Default parameters cover total range of BC elevations from diff --git a/ce/api/timeseries.py b/ce/api/timeseries.py index 30273a5d..a758510a 100644 --- a/ce/api/timeseries.py +++ b/ce/api/timeseries.py @@ -16,8 +16,11 @@ def timeseries(sesh, id_, area, variable): Args: sesh (sqlalchemy.orm.session.Session): A database Session object + id_ (str): Unique id which is a key to the data file requested + area (str): WKT polygon of selected area + variable (str): Short name of the variable to be returned Returns: diff --git a/doc/Makefile b/doc/Makefile new file mode 100644 index 00000000..d0c3cbf1 --- /dev/null +++ b/doc/Makefile @@ -0,0 +1,20 @@ +# Minimal makefile for Sphinx documentation +# + +# You can set these variables from the command line, and also +# from the environment for the first two. +SPHINXOPTS ?= +SPHINXBUILD ?= sphinx-build +SOURCEDIR = source +BUILDDIR = build + +# Put it first so that "make" without argument is like "make help". +help: + @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + +.PHONY: help Makefile + +# Catch-all target: route all unknown targets to Sphinx using the new +# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). +%: Makefile + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) diff --git a/doc/make.bat b/doc/make.bat new file mode 100644 index 00000000..6247f7e2 --- /dev/null +++ b/doc/make.bat @@ -0,0 +1,35 @@ +@ECHO OFF + +pushd %~dp0 + +REM Command file for Sphinx documentation + +if "%SPHINXBUILD%" == "" ( + set SPHINXBUILD=sphinx-build +) +set SOURCEDIR=source +set BUILDDIR=build + +if "%1" == "" goto help + +%SPHINXBUILD% >NUL 2>NUL +if errorlevel 9009 ( + echo. + echo.The 'sphinx-build' command was not found. Make sure you have Sphinx + echo.installed, then set the SPHINXBUILD environment variable to point + echo.to the full path of the 'sphinx-build' executable. Alternatively you + echo.may add the Sphinx directory to PATH. + echo. + echo.If you don't have Sphinx installed, grab it from + echo.http://sphinx-doc.org/ + exit /b 1 +) + +%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% +goto end + +:help +%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% + +:end +popd diff --git a/doc/source/api/api-index.rst b/doc/source/api/api-index.rst new file mode 100644 index 00000000..ddfb6f90 --- /dev/null +++ b/doc/source/api/api-index.rst @@ -0,0 +1,19 @@ +API Documentation +================= + +.. mdinclude:: api-overview.md + +.. toctree:: + :maxdepth: 1 + :caption: The backend exposes the following API endpoints: + + data + grid + lister + metadata + models + multimeta + multistats + stats + timeseries + watershed \ No newline at end of file diff --git a/doc/source/api/api-overview.md b/doc/source/api/api-overview.md new file mode 100644 index 00000000..da4f6e6c --- /dev/null +++ b/doc/source/api/api-overview.md @@ -0,0 +1,11 @@ +Documentation for each API endpoint is automatically generated from the code and docstring for that API's main function and may not be entirely user-friendly. There are some minor differences between the internal workings of the API function and the process of querying them over the web. + +The query URL is constructed from a base url ending in a slash, followed by the name of the endpoint, a question mark, and then one or more parameters of the form `attribute=value', seperated by ampersands. Parameters supplied via query URL should be web-encoded so that they will be correctly parsed. + +The automatically generated API documentation describes a `sesh` (database session) argument to each API function. Database sessions are supplied by the query parser and does not need to be given in the query URL. + +For example, the `multimeta` function has a signature of `ce.api.multimeta(sesh, ensemble_name='ce_files', model='')` + +The query URL `https://base_url/multimeta?ensemble_name=ce_files&model=CanESM2` calls the `multimeta` endpoint and supplies two arguments for the `multimeta` function: `ensemble_name` is "ce_files" and `model` is CanESM2. `sesh` is not supplied in the query URL. + +The API function return values are converted to JSON for the endpoint response. \ No newline at end of file diff --git a/doc/source/api/data-api-usage.md b/doc/source/api/data-api-usage.md new file mode 100644 index 00000000..379d3c8a --- /dev/null +++ b/doc/source/api/data-api-usage.md @@ -0,0 +1,5 @@ +This endpoint accepts parameters describing a collection of datasets and a time of year and extracts data for the requested time of year from all datasets that match the parameters and contain it as one or more timeseries. + +It shows how a variable changes over the long term. For example, with six datasets representing different climatologies it would return mean daily August precipitation from 1961-1990, 1971-2000, 1981-2010, 2010-2039, 2040-2069, and 2070-2099 as a single timeseries. + +This slices the data along a different axis than the `timeseries` endpoint, which shows values of a variable within a given dataset, and would return a timeseries consisting of mean daily precipitation for January 1961-1990, February 1961-1990, etc. instead. \ No newline at end of file diff --git a/doc/source/api/data-api.rst b/doc/source/api/data-api.rst new file mode 100644 index 00000000..253a7a89 --- /dev/null +++ b/doc/source/api/data-api.rst @@ -0,0 +1,13 @@ +.. To update documentation on the data endpoint, update the docstring in the + code or data-api-usage.md. + + +data +==== +.. mdinclude:: data-api-usage.md + +.. mdinclude:: sesh-not-needed.md + +------ + +.. autofunction:: ce.api.data \ No newline at end of file diff --git a/doc/source/api/grid-api-usage.md b/doc/source/api/grid-api-usage.md new file mode 100644 index 00000000..1d013902 --- /dev/null +++ b/doc/source/api/grid-api-usage.md @@ -0,0 +1,3 @@ +This endpoint is not currently used by the PCEX frontend. It was originally intended to provide information needed for a user to select a polygon of interest on a map; this functionality is now handled by queries to the ncWMS map server. + +This endpoint may be of use for systems that want to designate spatial areas of especial interest but are not using ncWMS. \ No newline at end of file diff --git a/doc/source/api/grid-api.rst b/doc/source/api/grid-api.rst new file mode 100644 index 00000000..8673ecfe --- /dev/null +++ b/doc/source/api/grid-api.rst @@ -0,0 +1,13 @@ +.. To update documentation on the grid endpoint, update the docstring in the + code, or grid-api-usage.md. + + +grid +==== +.. mdinclude:: grid-api-usage.md + +.. mdinclude:: sesh-not-needed.md + +------ + +.. autofunction:: ce.api.grid \ No newline at end of file diff --git a/doc/source/api/lister-api-usage.md b/doc/source/api/lister-api-usage.md new file mode 100644 index 00000000..8ce9308b --- /dev/null +++ b/doc/source/api/lister-api-usage.md @@ -0,0 +1,3 @@ +This endpoint returns a list of unique identification strings corresponding to all available datafiles. + +It is not currently used by the PCEX frontend; it has been superceded by the `multimeta` endpoint. Like `lister`, `multimeta` returns a list of `unique_id` strings associated with all available files in an ensemble, but `multimeta` additionally returns metadata attributes describing the contents of each file. For most applications, it is probably more efficient to call `multimeta` than to call `lister` and then make a seperate query to get metadata describing each individual dataset to determine which ones are of interest. \ No newline at end of file diff --git a/doc/source/api/lister-api.rst b/doc/source/api/lister-api.rst new file mode 100644 index 00000000..58b37853 --- /dev/null +++ b/doc/source/api/lister-api.rst @@ -0,0 +1,13 @@ +.. To update documentation on the lister endpoint, update the docstring in the + code or lister-api-usage.md. + + +lister +====== +.. mdinclude:: lister-api-usage.md + +.. mdinclude:: sesh-not-needed.md + +------ + +.. autofunction:: ce.api.lister \ No newline at end of file diff --git a/doc/source/api/metadata-api-usage.md b/doc/source/api/metadata-api-usage.md new file mode 100644 index 00000000..1f99ec26 --- /dev/null +++ b/doc/source/api/metadata-api-usage.md @@ -0,0 +1 @@ +This endpoint returns detailed metadata on a single file. In addition to returning attributes describing the data in the file, it returns a list of all timestamps available within the file. This allows a user to request a map image from the map server corresponding to a specific timestamp. \ No newline at end of file diff --git a/doc/source/api/metadata-api.rst b/doc/source/api/metadata-api.rst new file mode 100644 index 00000000..8198a8c1 --- /dev/null +++ b/doc/source/api/metadata-api.rst @@ -0,0 +1,25 @@ +.. Documentation on metadata endpoint is split over three locations: the function's docstring, + metadata-api-usage.md, which explains the general usage of the endpoint, and this file, which + takes advantage of the sphinx RST "warning" functionality to post a warning about parameter names. + +metadata +======== +.. mdinclude:: metadata-api-usage.md + +.. mdinclude:: sesh-not-needed.md + +.. warning:: + Parameter names for this endpoint are not consistent with parameter names for the other + endpoints. Every other endpoint uses the word "model" to refer to the General Circulation + Model (GCM) or Regional Climate Model (RCM) that produced a particular dataset. + + This endpoint uses the "model_id" parameter to reger to a dataset's unique identification + string, which is called "id_" in every other endpoint. + + This is a holdover from a much older data design when all data from each model was + in a single file. + + +------ + +.. autofunction:: ce.api.metadata \ No newline at end of file diff --git a/doc/source/api/models-api-usage.md b/doc/source/api/models-api-usage.md new file mode 100644 index 00000000..ee6bec77 --- /dev/null +++ b/doc/source/api/models-api-usage.md @@ -0,0 +1,3 @@ +This endpoint returns a list of all models in a given ensemble. "Model" in this case usually means the Global Climate Model or Regional Climate Model that simulated the data in the dataset, though in some circumstances it may refer to a different type of model, such as an interpolation algorithm used to grid observational data or elevation data. + +In PCEX, it has been superceded by the `multimeta` query, which returns a list of all datasets, with attributes describing the contents of each dataset, including the model. It's usually more convenient to just collect all the available models from the list of all available datasets. \ No newline at end of file diff --git a/doc/source/api/models-api.rst b/doc/source/api/models-api.rst new file mode 100644 index 00000000..cec227d2 --- /dev/null +++ b/doc/source/api/models-api.rst @@ -0,0 +1,13 @@ +.. To update documentation on the models endpoint, update the docstring in the + code, or models-api-usage.md. + + +models +====== +.. mdinclude:: models-api-usage.md + +.. mdinclude:: sesh-not-needed.md + +------ + +.. autofunction:: ce.api.models \ No newline at end of file diff --git a/doc/source/api/multimeta-api-usage.md b/doc/source/api/multimeta-api-usage.md new file mode 100644 index 00000000..2a6ffb42 --- /dev/null +++ b/doc/source/api/multimeta-api-usage.md @@ -0,0 +1,16 @@ +This API endpoint provides a list of all the datasets available in a given ensemble. Datasets are identified with a unique identification string. Additional metadata describing the contents of each dataset is provided. + +This endpoint is intended to provide an overview of all available datasets to enable a caller to decide which datasets are of further interest for numerical data or mapping. It does not return detailed temporal metadata or any spatial metadata; see the `grid` and `metadata` endpoints for more detailed metadata about temporal or spatial extent of a dataset. + +## Metadata attributes +* `institution`: The research institution that created this dataset +* `model_id`: A short abbreviation for the General Circulation Model, Regional Climate Model, or interpolation algorithm that output this dataset +* `model_name`: The full name of the model that created this dataset +* `experiment`: The emissions scenario used to model this data. Emissions scenarios represent a range of possible future projections for greenhouse gas concentration in the atmosphere, typically one of the Representative Concentration Pathways (RCP). May be "historical" for datasets based on historical data +* `variables`: A list of variables in this dataset, with name and a short description. Variables are the numerical quantities being measured or projected, such as maximum temperature, precipitation, or derived indices. +* `ensemble_member`: A model may be run multiple times with different initialization conditions; data from these runs is distinguished by the ensemble_member attribute +* `timescale`: The temporal resolution of the data. `monthly`, `seasonal`, or `yearly` +* `multi_year_mean`: Whether or not this datafile is a climatological aggregate. In a climatological aggregate dataset, the value at each timestamp represents a combination of values at that timestamp across multiple years. For example, a 1961-1990 climatological mean would have one value for January that represented the mean value of January 1961, January 1962, etc. +* `start_date`: The start of the temporal interval described by this dataset +* `end_date`: The end of the temporal interval described by this dataset +* `modtime`: The most recent data this dataset was updated. Useful for determining whether to cache data. \ No newline at end of file diff --git a/doc/source/api/multimeta-api.rst b/doc/source/api/multimeta-api.rst new file mode 100644 index 00000000..840885cb --- /dev/null +++ b/doc/source/api/multimeta-api.rst @@ -0,0 +1,13 @@ +.. To update documentation on the multimeta endpoint, update the docstring in the + code, or multimeta-api-usage.md. + + +multimeta +========= +.. mdinclude:: multimeta-api-usage.md + +.. mdinclude:: sesh-not-needed.md + +------ + +.. autofunction:: ce.api.multimeta \ No newline at end of file diff --git a/doc/source/api/multistats-api-usage.md b/doc/source/api/multistats-api-usage.md new file mode 100644 index 00000000..b6c16534 --- /dev/null +++ b/doc/source/api/multistats-api-usage.md @@ -0,0 +1 @@ +This endpoint is similar to the `stats` endpoint, but instead of accepting a unique identification string and returning statistics for a timestamp in a single file, it accepts metadata parameters and returns statistics for the selected timestamp from all files that meet those parameters and contain the selected timestamp, organized by the unique ID string. This supports applications that display how statistical measures of a variable change over time or between simulated runs. \ No newline at end of file diff --git a/doc/source/api/multistats-api.rst b/doc/source/api/multistats-api.rst new file mode 100644 index 00000000..df0c19f4 --- /dev/null +++ b/doc/source/api/multistats-api.rst @@ -0,0 +1,13 @@ +.. To update documentation on the multistats endpoint, update the docstring in the + code, or multistats-api-usage.md. + + +multistats +========== +.. mdinclude:: multistats-api-usage.md + +.. mdinclude:: sesh-not-needed.md + +------ + +.. autofunction:: ce.api.multistats \ No newline at end of file diff --git a/doc/source/api/sesh-not-needed.md b/doc/source/api/sesh-not-needed.md new file mode 100644 index 00000000..9538387f --- /dev/null +++ b/doc/source/api/sesh-not-needed.md @@ -0,0 +1 @@ +Web queries made to the API do not supply a `sesh` parameter; it will be automatically generated by the query parser upon query receipt. \ No newline at end of file diff --git a/doc/source/api/stats-api-usage.md b/doc/source/api/stats-api-usage.md new file mode 100644 index 00000000..81f2cfd2 --- /dev/null +++ b/doc/source/api/stats-api-usage.md @@ -0,0 +1 @@ +This API endpoint returns statistical measures for the selected timestamp in the selected file. To access identical statistical measures for several datasets at once, see the multistat endpoint. \ No newline at end of file diff --git a/doc/source/api/stats-api.rst b/doc/source/api/stats-api.rst new file mode 100644 index 00000000..b8a5251e --- /dev/null +++ b/doc/source/api/stats-api.rst @@ -0,0 +1,13 @@ +.. To update documentation on the stats endpoint, update the docstring in the + code, or stats-api-usage.md. + + +stats +===== +.. mdinclude:: stats-api-usage.md + +.. mdinclude:: sesh-not-needed.md + +------ + +.. autofunction:: ce.api.stats \ No newline at end of file diff --git a/doc/source/api/timeseries-api-usage.md b/doc/source/api/timeseries-api-usage.md new file mode 100644 index 00000000..3f414404 --- /dev/null +++ b/doc/source/api/timeseries-api-usage.md @@ -0,0 +1,5 @@ +This API returns the mean value of each timestamp in the selected datafile, as indicated by that file's unique identification string. Almost all files available to this endpoint represent climatologies; in these cases, this endpoint returns an annual cycle, with 12 entries (monthly), 4 entries (seasonal), or 1 entry (annual), depending on the resolution of the requested dataset. + +While generating annual cycle data from climatologies is the expected use case of this endpoint, it can also return timestamped data from non-climatological files. One value per timestamp will be returned. + +To see how the value of a variable at a particular timestamp changes over the long term, instead of the course of a year, use the `data` endpoint. \ No newline at end of file diff --git a/doc/source/api/timeseries-api.rst b/doc/source/api/timeseries-api.rst new file mode 100644 index 00000000..d0db209d --- /dev/null +++ b/doc/source/api/timeseries-api.rst @@ -0,0 +1,13 @@ +.. To update documentation on the timeseries endpoint, update the docstring in the + code, or timeseries-api-usage.md. + + +timeseries +========== +.. mdinclude:: timeseries-api-usage.md + +.. mdinclude:: sesh-not-needed.md + +------ + +.. autofunction:: ce.api.timeseries \ No newline at end of file diff --git a/doc/source/api/watershed-api-usage.md b/doc/source/api/watershed-api-usage.md new file mode 100644 index 00000000..2f4a5a10 --- /dev/null +++ b/doc/source/api/watershed-api-usage.md @@ -0,0 +1,21 @@ +This API endpoint provides contextual information about the watershed that drains to the point specified by the parameters. It is intended to clarify and provide context for data about streamflow in watershed. + +Every grid cell is defined as flowing into a single other grid cell, so this data is most reliable for larger watersheds representing at least ten grid cells, and completely inappropriate for creeks or culverts smaller than a single grid cell. At small scales, streamflow variations within grid cells, not capturable by a gridded dataset, play too large a role. + +## Hypsometric curve +The `hypsometric_curve` object defines a histogram of area by elevation. + +* Elevation bins are of equal width, `w = elevation_bin_width`. +* Elevation bin `k` is bounded by elevations `ek` and `ek+1`, where `ek = e0 + (k * w)` for `0 <= k < n` +* where `e0 = elevation_bin_start` and `n = elevation_num_bins` +* The sum of areas of all cells with elevation falling into elevation bin `k` is given by `ak = cumulative_areas[k]`. +* Units of elevation and area are specified by the properties `elevation_units` and `area_units`. + +### Gaps in the hypsometric curve, or "empty" bins +For large areas of the earth and reasonably large elevation bins, we expect to see non-zero cumulative areas for each elevation bin between the minimum and maximum elevation over that area. In other words, there should be at least some area at each elevation in the histogram. + +However, for small areas with steep topography, it is common to see some of the elevation bins between min and max elevation with zero area. This is not an error in either the computation or the data that feeds it. It is instead a product of the fact that `n` surface grid cells can represent at most `n` elevations. + +Consider the most extreme case of `n = 2` cells that happen to be positioned at the edge of a steep sided valley. One cell is in the valley bottom with an average elevation of 100 m. The other is cell, just adjacent to it, mostly covers the highland above with an average elevation of 500 m. In a histogram with 100 m bin width, we'd see non-zero areas for the 100 m bin and the 500 m bin, but zero areas for the 200 m, 300 m, and 400 m elevation bins, and in the graph these would look like gaps. + +We can see a similar effect for other small values of `n > 2` in steep terrain too. Once `n` becomes large enough, then the likelihood of an elevation bin not having some cells is quite low and these gaps do not appear. \ No newline at end of file diff --git a/doc/source/api/watershed-api.rst b/doc/source/api/watershed-api.rst new file mode 100644 index 00000000..db63d548 --- /dev/null +++ b/doc/source/api/watershed-api.rst @@ -0,0 +1,13 @@ +.. To update documentation on the watershed endpoint, update the docstring in the + code, or watershed-api-usage.md. + + +streamflow/watershed +==================== +.. mdinclude:: watershed-api-usage.md + +.. mdinclude:: sesh-not-needed.md + +------ + +.. autofunction:: ce.api.watershed \ No newline at end of file diff --git a/doc/source/conf.py b/doc/source/conf.py new file mode 100644 index 00000000..21a2ed16 --- /dev/null +++ b/doc/source/conf.py @@ -0,0 +1,59 @@ +# Configuration file for the Sphinx documentation builder. +# +# This file only contains a selection of the most common options. For a full +# list see the documentation: +# https://www.sphinx-doc.org/en/master/usage/configuration.html + +# -- Path setup -------------------------------------------------------------- + +# If extensions (or modules to document with autodoc) are in another directory, +# add these directories to sys.path here. If the directory is relative to the +# documentation root, use os.path.abspath to make it absolute, like shown here. +# +# import os +# import sys +# sys.path.insert(0, os.path.abspath('.')) +import sys +import os +sys.path.insert(0, os.path.abspath('../../ce')) + +# -- Project information ----------------------------------------------------- + +project = 'climate-explorer-backend' +copyright = '2019, James Hiebert' +author = 'James Hiebert' + +# The full version, including alpha/beta/rc tags +release = '1.1.1' + + +# -- General configuration --------------------------------------------------- + +# Add any Sphinx extension module names here, as strings. They can be +# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom +# ones. +extensions = ['sphinx.ext.autodoc', 'm2r'] + +# Add any paths that contain templates here, relative to this directory. +templates_path = ['_templates'] + +# List of patterns, relative to source directory, that match files and +# directories to ignore when looking for source files. +# This pattern also affects html_static_path and html_extra_path. + +# We're using it to supress warnings on *.md files inlined with the include +# command, which for some reason, sphinx doesn't count as source files. +exclude_patterns = ['api/*-usage.md', 'api/api-overview.md', 'api/sesh-not-needed.md'] + + +# -- Options for HTML output ------------------------------------------------- + +# The theme to use for HTML and HTML Help pages. See the documentation for +# a list of builtin themes. +# +html_theme = 'alabaster' + +# Add any paths that contain custom static files (such as style sheets) here, +# relative to this directory. They are copied after the builtin static files, +# so a file named "default.css" will overwrite the builtin "default.css". +html_static_path = ['_static'] \ No newline at end of file diff --git a/doc/source/index.rst b/doc/source/index.rst new file mode 100644 index 00000000..df9a5757 --- /dev/null +++ b/doc/source/index.rst @@ -0,0 +1,19 @@ +.. climate-explorer-backend documentation master file, created by + sphinx-quickstart on Thu Dec 12 14:30:23 2019. + You can adapt this file completely to your liking, but it should at least + contain the root `toctree` directive. + +PCIC Climate Explorer Backend Documentation +============================================ + +This backend serves climate data and metadata to the +`PCIC Climate Explorer (PCEX) `_ +and other applications. + +.. toctree:: + :maxdepth: 2 + :caption: Contents: + + overview + workflow + api/api-index diff --git a/doc/source/overview.md b/doc/source/overview.md new file mode 100644 index 00000000..0cfb5e1e --- /dev/null +++ b/doc/source/overview.md @@ -0,0 +1,51 @@ +# Overview + +## Data Matrix +This set of APIs provide queries for retrieving statistical and summary data from multidimensional data matrix containing climate data. + +The data is presented as a regular matrix. Each dataset has a list of latitudes, a list of longitudes, a list of timestamps, and a list of variables such as temperature, precipitation, or derived values like growing degree days. A value may be accessed for every combination of latitude, longitude, timestamp, and variable, which represents the value of that variable at that particular point in time and space. + +These data are interpolated from real-world observations or generated by General Circulation Models or Regional Climate Models, which are able to provide complete data coverage. Individual points may be masked off as missing, but this is typically only done to mark the boundaries of irregular geographic extents, like coastlines. The data should be assumed to be dense within the spatial and temporal boundaries. + + +## Data Access +This system is not designed to provide individual access to every point in the data matrix, but calculates and makes available summary data over user-defined spatial and temporal scales to support analysis and visualization. All numerical API endpoints accept a spatial area of interest (specified in URL-encoded WKT as a point or polygon) and return the mean (or other specified statistical measure) of all cells within the designated area. If no area is designated, the mean across the entire spatial extent of the dataset is returned. + +Datafiles are organized by temporal range and resolution. + +## Modeled Data + +Most data accessible by the API is climate data from a General Circulation Model (GCM) or Regional Climate Model (RCM). These numerical models represent physical processes in the atmosphere, ocean, cryosphere, and land surface of the earth. They simulate the response of the global climate system. Model outputs are meteorological variables such as temperature and precipitation. Temporally, they typically cover the period 1950 to 2100. Data for any dates in the future are termed "projections." Other variables represent statistical indices calculated from the meteorological variables output by models, and are associated with the same model that output the base data. + +A small minority of the datasets accessible by this API are not GCM or RCM outputs, but are intended to help provide additional context to the GCM and RCM outputs. These contextual data were instead created by models that interpolate observed data to fill spatial and temporal gaps. These non-GCM interpolated datasets have no projection data associated with dates in the future. + +### Emissions Scenarios + +Each dataset has an associated emissions scenario. Emissions scenarios represent a range of possible future projections for greenhouse gas concentrations. GCMs and RCMs are typically driven by historical data on greenhouse gas concentrations when modeling the past and present, before using an emissions scenario to provide input on greenhouse gas concentrations to drive future projections, typically a Representative Concentration Pathway scenario defined by the IPCC. + +Datasets created by interpolative models feature only historical emissions scenarios. + +The parameter name `experiment` is used to designate an emissions scenario in the API and code. + +### Runs + +Each dataset has an associated run string, which represents the initialization settings of the simulating model. A GCM or RCM may be run multiple times with different initialization conditions. A collection of related runs with different initializations of the same model comprise a statistical ensemble, which can be used to give some idea of the range of possible outcomes of the model system. + +GCMs and RCMs follow a standard encoding for the members of a statistical ensemble, `rXiXpX`, which is provided to the API as the parameter `ensemble_member`: +* rX where X is an integer representing the realization of the run +* iX where X is an integer representing the initialization method of this run +* pX where X is an integer representing the physics version used for this run. + +Interpolative models can't typically be run multiple times to create a statistical ensemble, and the concept of an ensemble_member code doesn't apply. Nevertheless, for uniformity, we have generalized from the common GCM case, and datasets output by interpolative models have an `ensemble_member` string that does not follow the `rXiXpX` encoding, such as "n/a" or "nominal". + +## Climatological Aggregates + +While GCM and RCM models output daily or sub-daily projections, most data accessible via this API is normalized as multi-year climatological statistics with monthly, seasonal, or annual resolution. + +The statistical methods used to generate the monthly, seasonal, or annual values vary with the nature of the data. For example, a monthly mean precipitation value (`pr`) may be calculated by taking the mean daily precipitation for every day in a month. A maximum one-day monthly precipitation value (`rx1day`) may be calculated by taking the single largest total precipitation that falls on one day in the month. + +Most of the data is then further aggregated between years over a specified amount of time, typically 30 years, the standard in climate science. For example, the January value of a climatological aggregate dataset for 1961-1990 represents the statistically aggregated values for January 1961, January 1962, and so on up to January 1990. The February value represents February 1961, February 1962, and so on. + +A series of these overlapping 30-year aggregated climatologies is generated from the entire timespan of the model output data. The aggregating function used to create multi-year climatological datasets is either `mean`, to show long term trends in the numerical quantity being measured, or `stdev`, to show long term variability of the numerical quantity being measured. + +A small number of non-aggregated datasets are available, but they are not the primary purpose of this system, and many of the queries are not available for these datasets. These datasets are removed from circulation and replaced with aggregated datasets when possible. \ No newline at end of file diff --git a/doc/source/workflow.md b/doc/source/workflow.md new file mode 100644 index 00000000..0905a5b4 --- /dev/null +++ b/doc/source/workflow.md @@ -0,0 +1,31 @@ +# Typical Workflows + +## List Available Datasets + +Datafiles are organized into ensembles containing all data needed for a specific purpose. (The term "ensemble" is a misnomer in this usage; a more appropriate term would be "collection." For historical reasons the term "ensemble" is embedded in the code and it is not easily changed at this point.) + +To view a list of all datasets in a particular ensemble, query the `multimeta` API. The `multimeta` API gives each datafile as a unique identifier string and a collection of attributes describing the data contained within that file. After a datafile of interest has been determined from its metadata attributes, its unique identifier string may be used to request the data. + +The `multi_year_mean` attribute is an important attribute of datafiles. Datafiles with `multi_year_mean` equal to `true` represent climatological aggregates. Each individual value in these files represents a mean or stanard deviation calculated across multiple years, typically thirty years, which is standard in climate science. For example, a monthly climatological mean might cover 1961-1990, but feature only twelve timestamps. The January timestamp is the mean of the value for January 1961, January 1962, and so on up to January 1990. The February timestamp is the mean of the values for February 1961, February 1962, and so on. Climatological means may be monthly, seasonal, or annual. This API primarily supports analysis of climatological datasets, and more analysis options are available for them. + +Datasets with `multi_year_mean` equal to `false` represent nominal time datasets; no aggregation has been done between years. A monthly dataset covering 1961-1990 would feature 360 timestamps. + +## Request Numerical Data From A Climatological Aggregate Datafile + +The `timeseries` endpoint returns a timeseries with the mean value for each timestamp in the datafile. It requires a datafile's unique identification string, and optionally a spatial area of interest defined as a Well Known Text (WKT) Polygon or Point. For a climatological aggregate datafile, the resulting timeseries represents an average annual cycle over the period described by the dataset. The annual cycle may have twelve monthly values, four seasonal values, or a single annual value. + +The `stats` endpoint returns statistical measures (`mean`, `stdev`, `min`, `max`, and `median`) of a single dataset identified by its unique identification string. The timestep of interest is defined by a temporal index. An optional spatial area of interest may be defined as a WKT Polygon or Point. The statistical measures will be calculated over the time and space extent within the dataset. + +You may wish to compare the statistical measures of a related set of climatological aggregate datafiles. The `multistats` query functions similarly to the `stats` query, but on several files that share common parameters at once. The `multistats` query may be called with parameters that describe a set of datasets by specifying all parameters except the start and end dates, as well as a time index and optional spatial area of interest. It responds with the same information as the `stats` query, but for every datafile that matches the parameters. + +Similarly, the `data` API is also queried by submitting parameters that describe a set of datafiles by specifying all parameters except the start and end dates, as well as a time index and optional spatial area of interest. It returns a timeseries constructed from all climatological aggregate files that meet the parameters. For example, it would return the January value for the 1961-1990 climatology, the January value for the 1971-2000 climatology, etc, to make a timeseries showing projected long-term change over time of the mean value for January. + +## Request A Non-Climatological Timeseries + +Fewer options are available for datafiles with the `multi_year_mean` attribute of `false`, which have no guarenteed temporal structure or organization. The `timeseries` API endpoint requires a datafile's unique identification string and optionally a spatial area of interest. It responds with a timeseries consisting of the average values over the area of interest, one for each timestamp in the datafile. The timestamps need not be evenly spaced. + +## Request a Map + +In order to request a data map image from PCIC's ncWMS server, two piece of information are requred. Firstly, the identification string of the file is required. The identification string of the file of interest can be determined from the `multimeta` query, which lists all files available in a collection. + +A timestamp is also needed for ncWMS. The `metadata` API endpoint can be accessed with the datafile's unique identification string, and provides a list of timestamps available in the file, which would be usable by ncWMS. \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index e951d623..7149f0d1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,6 +9,11 @@ GDAL~=2.3 rasterio~=1.0 sqlalchemy==1.2.18 contexttimer==0.3.3 + +# For documentation +sphinx +m2r + # For testing pytest python-dateutil diff --git a/setup.py b/setup.py index 3ce64d16..4dbfc9f2 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,11 @@ import os from setuptools import setup, find_packages +from warnings import warn +try: + from sphinx.setup_command import BuildDoc +except ImportError: + warn("Could not import sphinx. You won't be able to build the docs") def recursive_list(pkg_dir, basedir): def find(): @@ -40,5 +45,15 @@ def find(): package_data={ 'ce': ['tests/data/*.nc', 'templates/*.html'] + recursive_list('ce/', 'ce/static'), }, + cmdclass = { + 'build_sphinx': BuildDoc + }, + command_options={ + 'build_sphinx': { + 'project': ('setup.py', "ce"), + 'version': ('setup.py', __version__), + 'source_dir': ('setup.py', 'doc/source') + } + }, zip_safe=False )