Merge pull request #114 from pacificclimate/sphinx-doc

Add sphinx documentation of API functionality
pacificclimate · Jan 2, 2020 · ab35507 · ab35507
2 parents b3b1562 + 064752a
commit ab35507
Show file tree

Hide file tree

Showing 43 changed files with 543 additions and 24 deletions.
diff --git a/.travis.yml b/.travis.yml
@@ -28,7 +28,8 @@ before_install:
   - "pip install --global-option=build_ext --global-option='-I$GDALINST/include' --global-option='-L$GDALINST/lib' --global-option='-R$GDALINST/lib' GDAL==$GDAL_MINOR_VERSION"
 install:
   - pip install -I pytest>=3.0.0
-  - pip install -r requirements.txt .
+  - pip install -r requirements.txt
   - pip install .
 script:
+  - python setup.py build_sphinx 
   - py.test -v ce/tests
diff --git a/README.md b/README.md
@@ -33,6 +33,28 @@ $ source venv/bin/activate
 (venv)$ pip install -e .
 ```
 
+### Building the Documentation
+
+Building the docs requires the package to be installed first, as docstrings from installed modules are used to generate code documentation. 
+
+```
+pip install -e .
+pyenv/bin/python setup.py build_sphinx
+```
+
+HTML documentation will be in the `doc/build/html` directory. 
+
+Sphinx can also generate a PDF by invoking the TeXlive toolchain, if it is installed on your machine. On Ubuntu, you will to install the following packages before sphinx can successfully create a PDF:
+* latexmk
+* texlive-latex-recommended
+* texlive-fonts-recommended
+* texlive-latex-extra
+
+You can then create a PDF from the `doc` directory with this command:
+```
+make latexpdf
+```
+
 ### Running the dev server
 
 A development server can be run locally by using the Flask command line interface documented [here](http://flask.pocoo.org/docs/0.12/cli/). In general, you need to set one environment variable FLASK_APP=ce.wsgi:app and can optionally set FLASK_DEBUG=1 for live code reloading.

diff --git a/ce/api/data.py b/ce/api/data.py
@@ -20,22 +20,28 @@ def data(sesh, model, emission, time, area, variable, timescale='other',
 
     Args:
         sesh (sqlalchemy.orm.session.Session): A database Session object
+        
         model (str): Short name for some climate model (e.g "CGCM3")
-        emission (str): Short name for some emission scenario (e.g.
-            "historical+rcp85")
+        
+        emission (str): Short name for some emission scenario
+            (e.g."historical+rcp85")
+        
         time (int): Timestep index (0-based) representing the time of year;
             0-11 for monthly, 0-3 for seasonal, 0 for annual datasets.
+        
         area (str): WKT polygon of selected area
+        
         variable (str): Short name of the variable to be returned
+        
         timescale (str): Description of the resolution of time to be
             returned (e.g. "monthly" or "yearly")
+        
         ensemble_name (str): Name of ensemble
 
     Returns:
         dict:
-
-        Empty dictionary if there exist no files matching the provided
-        model and emissions scenario.
+            Empty dictionary if there exist no files matching the provided
+            model and emissions scenario.
 
         Otherwise returns a single dict keyed on the run id for all
         runs that match the model and emissions scenario. values are a

diff --git a/ce/api/grid.py b/ce/api/grid.py
@@ -25,10 +25,11 @@ def grid(sesh, id_):
 
     Args:
         sesh (sqlalchemy.orm.session.Session): A database Session object
+        
         id_ (str): Unique id which is a key to the data file requested
      
     Returns:
-        dict: Empty dictionary if model_id is not found in the database.
+        dict: Empty dictionary if id_ is not found in the database.
 
         Otherwise, returns a single dict with the key of the file's
         unique_id and the value consisting of a nested dictionary with

diff --git a/ce/api/lister.py b/ce/api/lister.py
@@ -8,14 +8,17 @@ def lister(sesh, ensemble_name='ce_files', model=None):
     '''
     Args
         sesh (sqlalchemy.orm.session.Session): A database Session object
+        
         ensemble (str): Some named ensemble
+        
         model (str): Short name for some climate model (e.g "CGCM3")
 
     Returns:
         list of all unique_ids within that ensemble and/or model.
 
-        For example:
-            ensemble = default, model = PRISM (assuming PRISM group is in 'ce' ensemble)
+        For example, given ensemble = default, model = PRISM
+        (assuming PRISM files are in 'ce_files' ensemble)::
+
             [
                 tmax_monClim_PRISM_historical_run1_198101-201012,
                 tmin_monClim_PRISM_historical_run1_198101-201012,

diff --git a/ce/api/metadata.py b/ce/api/metadata.py
@@ -18,6 +18,7 @@ def metadata(sesh, model_id):
 
     Args:
         sesh (sqlalchemy.orm.session.Session): A database Session object
+        
         model_id (str): Unique id which is a key to the data file requested
 
     Returns:

diff --git a/ce/api/models.py b/ce/api/models.py
@@ -9,13 +9,12 @@ def models(sesh, ensemble_name='ce_files'):
     Args
         sesh (sqlalchemy.orm.session.Session): A database Session object
 
-    Returns list of all models available:
+    Returns list of all models available::
 
-    [
-    model_short_name1,
-    model_short_name2,
-    ...
-    ]
+        [
+        model_short_name1,
+        model_short_name2,
+        ]
     '''
 
     ensemble = sesh.query(Ensemble).filter(Ensemble.name == ensemble_name).first()

diff --git a/ce/api/multimeta.py b/ce/api/multimeta.py
@@ -23,7 +23,9 @@ def multimeta(sesh, ensemble_name='ce_files', model=''):
 
     Args:
         sesh (sqlalchemy.orm.session.Session): A database Session object
+        
         ensemble (str): Some named ensemble
+        
         model (str): Short name for some climate model (e.g "CGCM3")
 
     Returns:

diff --git a/ce/api/multistats.py b/ce/api/multistats.py
@@ -19,18 +19,26 @@ def multistats(sesh, ensemble_name='ce_files', model='', emission='', time=0,
 
     Args:
         sesh (sqlalchemy.orm.session.Session): A database Session object
+        
         ensemble_name (str): The name of the application-level ensemble
             (e.g. "ce_files")
+        
         model (str): Short name for some climate model (e.g "CGCM3") to
             be used as a filter
+        
         emission (str): Short name for some emission scenario (e.g.
             "historical+rcp85") to be used as a filter
+        
         time (int): Timestep index (0-based) representing the time of year;
             0-11 for monthly, 0-3 for seasonal, 0 for annual datasets.
+        
         area (str): WKT polygon of selected area
+        
         variable (str): Short name of the variable to be returned
+        
         timescale (str): Description of the resolution of time to be
             returned (e.g. "monthly" or "yearly")
+        
         cell_method (str): Statistical operation applied to variable in a
             climatological dataset (e.g "mean" or "standard_deviation").
             Defaulted to "mean".

diff --git a/ce/api/stats.py b/ce/api/stats.py
@@ -31,10 +31,14 @@ def stats(sesh, id_, time, area, variable):
 
     Args:
         sesh (sqlalchemy.orm.session.Session): A database Session object
+        
         id_ (str): Unique id which is a key to the data file requested
+        
         time (int): Timestep index (0-based) representing the time of year;
             0-11 for monthly, 0-3 for seasonal, 0 for annual datasets.
+        
         area (str): WKT polygon of selected area
+        
         variable (str): Short name of the variable to be returned
 
     Returns:

diff --git a/ce/api/streamflow/watershed.py b/ce/api/streamflow/watershed.py
@@ -41,8 +41,16 @@ def watershed(sesh, station, ensemble_name):
     :param station: (string) Location of drainage point, WKT POINT format
     :param ensemble_name: (string) Name of the ensemble containing data files backing
         providing data for this request.
-    :return: (dict) representation for JSON response object. See function
-        `worker` for details.
+    :return: dict representation for JSON response object with the following
+        attributes:
+            area: Area of the watershed
+            
+            elevation: Minimum and maximum elevations
+            
+            shape: A GeoJSON object representing the outline of the watershed;
+                a concave hull of the cell rectangles.
+            
+            hypsometric_curve: Elevation-area histogram of the watershed
 
     This function is primarily responsible for finding the relevant data files
     and converting their contents to `VicDataGrid` objects for consumption by
@@ -95,13 +103,7 @@ def worker(station_lonlat, flow_direction, elevation, area, hypso_params=None):
     :param flow_direction: (VicDataGrid) Flow direction grid
     :param elevation: (VicDataGrid) Elevation grid
     :param area: (VicDataGrid) Area grid
-    :return: dict representation for JSON response object with the following
-        attributes:
-            area: Area of the watershed
-            elevation: Minimum and maximum elevations
-            shape: A GeoJSON object representing the outline of the watershed;
-                a concave hull of the cell rectangles.
-            hypsometric_curve: Elevation-area histogram of the watershed
+    :return: (dict) representation for JSON response object; see watershed() for details
     """
     if hypso_params is None:
         # Default parameters cover total range of BC elevations from

diff --git a/ce/api/timeseries.py b/ce/api/timeseries.py
@@ -16,8 +16,11 @@ def timeseries(sesh, id_, area, variable):
 
     Args:
         sesh (sqlalchemy.orm.session.Session): A database Session object
+        
         id_ (str): Unique id which is a key to the data file requested
+        
         area (str): WKT polygon of selected area
+        
         variable (str): Short name of the variable to be returned
 
     Returns:

diff --git a/doc/Makefile b/doc/Makefile
@@ -0,0 +1,20 @@
+# Minimal makefile for Sphinx documentation
+#
+
+# You can set these variables from the command line, and also
+# from the environment for the first two.
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SOURCEDIR     = source
+BUILDDIR      = build
+
+# Put it first so that "make" without argument is like "make help".
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+
+.PHONY: help Makefile
+
+# Catch-all target: route all unknown targets to Sphinx using the new
+# "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
diff --git a/doc/make.bat b/doc/make.bat
@@ -0,0 +1,35 @@
+@ECHO OFF
+
+pushd %~dp0
+
+REM Command file for Sphinx documentation
+
+if "%SPHINXBUILD%" == "" (
+	set SPHINXBUILD=sphinx-build
+)
+set SOURCEDIR=source
+set BUILDDIR=build
+
+if "%1" == "" goto help
+
+%SPHINXBUILD% >NUL 2>NUL
+if errorlevel 9009 (
+	echo.
+	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
+	echo.installed, then set the SPHINXBUILD environment variable to point
+	echo.to the full path of the 'sphinx-build' executable. Alternatively you
+	echo.may add the Sphinx directory to PATH.
+	echo.
+	echo.If you don't have Sphinx installed, grab it from
+	echo.http://sphinx-doc.org/
+	exit /b 1
+)
+
+%SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+goto end
+
+:help
+%SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
+
+:end
+popd
diff --git a/doc/source/api/api-index.rst b/doc/source/api/api-index.rst
@@ -0,0 +1,19 @@
+API Documentation
+=================
+
+.. mdinclude:: api-overview.md
+
+.. toctree::
+   :maxdepth: 1
+   :caption: The backend exposes the following API endpoints:
+
+   data <data-api>
+   grid <grid-api>
+   lister <lister-api>
+   metadata <metadata-api>
+   models <models-api>
+   multimeta <multimeta-api>
+   multistats <multistats-api>
+   stats <stats-api>
+   timeseries <timeseries-api>
+   watershed <watershed-api>
diff --git a/doc/source/api/api-overview.md b/doc/source/api/api-overview.md
@@ -0,0 +1,11 @@
+Documentation for each API endpoint is automatically generated from the code and docstring for that API's main function and may not be entirely user-friendly. There are some minor differences between the internal workings of the API function and the process of querying them over the web.
+
+The query URL is constructed from a base url ending in a slash, followed by the name of the endpoint, a question mark, and then one or more parameters of the form `attribute=value', seperated by ampersands. Parameters supplied via query URL should be web-encoded so that they will be correctly parsed.
+
+The automatically generated API documentation describes a `sesh` (database session) argument to each API function. Database sessions are supplied by the query parser and does not need to be given in the query URL.
+
+For example, the `multimeta` function has a signature of `ce.api.multimeta(sesh, ensemble_name='ce_files', model='')`
+
+The query URL `https://base_url/multimeta?ensemble_name=ce_files&model=CanESM2` calls the `multimeta` endpoint and supplies two arguments for the `multimeta` function: `ensemble_name` is "ce_files" and `model` is CanESM2. `sesh` is not supplied in the query URL.
+
+The API function return values are converted to JSON for the endpoint response.
diff --git a/doc/source/api/data-api-usage.md b/doc/source/api/data-api-usage.md
@@ -0,0 +1,5 @@
+This endpoint accepts parameters describing a collection of datasets and a time of year and extracts data for the requested time of year from all datasets that match the parameters and contain it as one or more timeseries.
+
+It shows how a variable changes over the long term. For example, with six datasets representing different climatologies it would return mean daily  August precipitation from 1961-1990, 1971-2000, 1981-2010, 2010-2039, 2040-2069, and 2070-2099 as a single timeseries.
+
+This slices the data along a different axis than the `timeseries` endpoint, which shows values of a variable within a given dataset, and would return a timeseries consisting of mean daily precipitation for January 1961-1990, February 1961-1990, etc. instead.
diff --git a/doc/source/api/data-api.rst b/doc/source/api/data-api.rst
@@ -0,0 +1,13 @@
+.. To update documentation on the data endpoint, update the docstring in the
+   code or data-api-usage.md.
+
+
+data
+====
+.. mdinclude:: data-api-usage.md
+
+.. mdinclude:: sesh-not-needed.md
+
+------
+
+.. autofunction:: ce.api.data
diff --git a/doc/source/api/grid-api-usage.md b/doc/source/api/grid-api-usage.md
@@ -0,0 +1,3 @@
+This endpoint is not currently used by the PCEX frontend. It was originally intended to provide information needed for a user to select a polygon of interest on a map; this functionality is now handled by queries to the ncWMS map server. 
+
+This endpoint may be of use for systems that want to designate spatial areas of especial interest but are not using ncWMS.
diff --git a/doc/source/api/grid-api.rst b/doc/source/api/grid-api.rst
@@ -0,0 +1,13 @@
+.. To update documentation on the grid endpoint, update the docstring in the
+   code, or grid-api-usage.md.
+
+
+grid
+====
+.. mdinclude:: grid-api-usage.md
+
+.. mdinclude:: sesh-not-needed.md
+
+------
+
+.. autofunction:: ce.api.grid
diff --git a/doc/source/api/lister-api-usage.md b/doc/source/api/lister-api-usage.md
@@ -0,0 +1,3 @@
+This endpoint returns a list of unique identification strings corresponding to all available datafiles. 
+
+It is not currently used by the PCEX frontend; it has been superceded by the `multimeta` endpoint. Like `lister`, `multimeta` returns a list of `unique_id` strings associated with all available files in an ensemble, but `multimeta` additionally returns metadata attributes describing the contents of each file. For most applications, it is probably more efficient to call `multimeta` than to call `lister` and then make a seperate query to get metadata describing each individual dataset to determine which ones are of interest.
diff --git a/doc/source/api/lister-api.rst b/doc/source/api/lister-api.rst
@@ -0,0 +1,13 @@
+.. To update documentation on the lister endpoint, update the docstring in the
+   code or lister-api-usage.md.
+
+
+lister
+======
+.. mdinclude:: lister-api-usage.md
+
+.. mdinclude:: sesh-not-needed.md
+
+------
+
+.. autofunction:: ce.api.lister
diff --git a/doc/source/api/metadata-api-usage.md b/doc/source/api/metadata-api-usage.md
@@ -0,0 +1 @@
+This endpoint returns detailed metadata on a single file. In addition to returning attributes describing the data in the file, it returns a list of all timestamps available within the file. This allows a user to request a map image from the map server corresponding to a specific timestamp.
-Original file line number
+Diff line change
@@ Expand Up / @@ -18,6 +18,7 @@ def metadata(sesh, model_id): @@
         Args:
             sesh (sqlalchemy.orm.session.Session): A database Session object
             model_id (str): Unique id which is a key to the data file requested
         Returns:
@@ Expand Down @@
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		This endpoint is not currently used by the PCEX frontend. It was originally intended to provide information needed for a user to select a polygon of interest on a map; this functionality is now handled by queries to the ncWMS map server.

		This endpoint may be of use for systems that want to designate spatial areas of especial interest but are not using ncWMS.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		This endpoint returns a list of unique identification strings corresponding to all available datafiles.

		It is not currently used by the PCEX frontend; it has been superceded by the `multimeta` endpoint. Like `lister`, `multimeta` returns a list of `unique_id` strings associated with all available files in an ensemble, but `multimeta` additionally returns metadata attributes describing the contents of each file. For most applications, it is probably more efficient to call `multimeta` than to call `lister` and then make a seperate query to get metadata describing each individual dataset to determine which ones are of interest.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		This endpoint returns detailed metadata on a single file. In addition to returning attributes describing the data in the file, it returns a list of all timestamps available within the file. This allows a user to request a map image from the map server corresponding to a specific timestamp.