diff --git a/README.rst b/README.rst index 8efc168..5f3154d 100644 --- a/README.rst +++ b/README.rst @@ -4,9 +4,10 @@ xbatcher: Batch Generation from Xarray Datasets |Build Status| |codecov| |docs| |pypi| |conda-forge| |license| -Xbatcher is a small library for iterating xarray DataArrays in batches. The -goal is to make it easy to feed xarray datasets to machine learning libraries -such as PyTorch_ or TensorFlow_. View the |docs| for more info. +Xbatcher is a small library for iterating Xarray DataArrays and Datasets in +batches. The goal is to make it easy to feed Xarray objects to machine +learning libraries such as PyTorch_ or TensorFlow_. View the |docs| for more +info. .. _TensorFlow: https://www.tensorflow.org/ diff --git a/doc/_static/logo.svg b/doc/_static/logo.svg new file mode 100644 index 0000000..1d14a7c --- /dev/null +++ b/doc/_static/logo.svg @@ -0,0 +1,58 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/doc/_static/switcher.json b/doc/_static/switcher.json new file mode 100644 index 0000000..6418298 --- /dev/null +++ b/doc/_static/switcher.json @@ -0,0 +1,19 @@ +[ + { + "name": "dev", + "version": "latest", + "url": "https://xbatcher.readthedocs.io/en/latest/" + }, + { + "version": "0.3.0", + "url": "https://xbatcher.readthedocs.io/en/v0.3.0/" + }, + { + "version": "0.2.0", + "url": "https://xbatcher.readthedocs.io/en/v0.2.0/" + }, + { + "version": "0.1.0", + "url": "https://xbatcher.readthedocs.io/en/0.1.0/" + } +] diff --git a/doc/api.rst b/doc/api.rst index 6475e88..9cb79e3 100644 --- a/doc/api.rst +++ b/doc/api.rst @@ -16,7 +16,7 @@ Core BatchGenerator BatchSchema -Xbatcher xarray accessors +Xbatcher Xarray accessors ========================= .. currentmodule:: xarray diff --git a/doc/conf.py b/doc/conf.py index 4fc2d4a..0dd73fc 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -15,6 +15,7 @@ # type: ignore import datetime +import os import sys import sphinx_autosummary_accessors @@ -141,6 +142,30 @@ # a list of builtin themes. # tml_theme = 'default' html_theme = "pydata_sphinx_theme" +html_logo = "_static/logo.svg" +html_favicon = "_static/logo.svg" + +# The following is from the pydata-sphinx-theme settings (https://github.com/pydata/pydata-sphinx-theme/blob/main/docs/conf.py) +# Define the json_url for our version switcher. +json_url = "https://xbatcher.readthedocs.io/en/latest/_static/switcher.json" + +# Define the version we use for matching in the version switcher. +version_match = os.environ.get("READTHEDOCS_VERSION") +# If READTHEDOCS_VERSION doesn't exist, we're not on RTD +# If it is an integer, we're in a PR build and the version isn't correct. +if not version_match or version_match.isdigit(): + # For local development, infer the version to match from the package. + release = xbatcher.__version__ + if "dev" in release or "post" in release or "rc" in release: + version_match = "latest" + # We want to keep the relative reference if we are in dev mode + # but we want the whole url if we are effectively in a released version + json_url = "_static/switcher.json" + else: + version_match = "v" + release + +print(f"release: {release}") + # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the @@ -148,8 +173,22 @@ html_theme_options = { "search_bar_position": "sidebar", "github_url": "https://github.com/xarray-contrib/xbatcher", + "switcher": { + "json_url": json_url, + "version_match": version_match, + }, + "logo": { + "text": "Xbatcher", + "alt_text": "Xbatcher", + }, + "navbar_align": "left", # [left, content, right] For testing that the navbar items align properly + "navbar_center": ["version-switcher", "navbar-nav"], } +# Define the json_url for our version switcher. + +json_url = "https://xbatcher.readthedocs.io/en/latest/_static/switcher.json" + # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] @@ -162,7 +201,7 @@ # The name of an image file (relative to this directory) to place at the top # of the sidebar. -html_logo = None +# html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 diff --git a/doc/contributing.rst b/doc/contributing.rst index 3bc4bfd..b42e421 100644 --- a/doc/contributing.rst +++ b/doc/contributing.rst @@ -1,8 +1,8 @@ .. _contributing: -************************ -Contributing to xbatcher -************************ +****************** +Contributing guide +****************** .. note:: diff --git a/doc/demo.ipynb b/doc/demo.ipynb index b0ec97d..3139be9 100644 --- a/doc/demo.ipynb +++ b/doc/demo.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "sticky-exhibit", "metadata": {}, @@ -10,7 +11,7 @@ "Author: Cindy Chiao\n", "\n", "## What is xbatcher? \n", - "Xbatcher is a small library for iterating through xarray objects (DataArrays and Datasets) in batches. The goal is to make it easy to feed xarray objects to machine learning libraries such as Keras and PyTorch. \n", + "Xbatcher is a small library for iterating through Xarray objects (DataArrays and Datasets) in batches. The goal is to make it easy to feed Xarray objects to machine learning libraries such as Keras and PyTorch. \n", "\n", "## What is included in this notebook?\n", "* showcase current abilities with example data \n", @@ -197,13 +198,14 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "spectacular-reading", "metadata": {}, "source": [ "## Last batch behavior\n", "\n", - "If the input ds is not divisible by the specified `input_dims`, the remainder will be discarded instead of having a fractional batch. See https://github.com/xarray-contrib/xbatcher/issues/5 for more on this topic." + "If the input ds is not divisible by the specified `input_dims`, the remainder will be discarded instead of having a fractional batch. See https://github.com/xarray-contrib/xbatcher/discussions/82 for more on this topic." ] }, { @@ -260,11 +262,12 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "harmful-benefit", "metadata": {}, "source": [ - "We can inspect the samples in a batch for a lat/lon pixel, noting that the overlap only applies within a batch and not across. Thus, within the 20 time points in a batch, we can get 11 samples each with 10 time points and 9 time points allowed to overlap." + "We can inspect the samples in a batch for a lat/lon pixel, noting that the overlap applies across batches." ] }, { @@ -365,6 +368,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "persistent-culture", "metadata": {}, @@ -375,16 +379,14 @@ "\n", "Additional features of interest can include: \n", "\n", - "1. Handling overlaps across batches. The common use case of batching in machine learning training involves generating all samples, then group them into batches. When overlap is enabled, this yields different results compared to first generating batches then creating possible samples within each batch. \n", + "1. Shuffling/randomization of samples across batches. It is often desirable for each batch to be grouped randomly instead of along a specific dimension. \n", "\n", - "2. Shuffling/randomization of samples across batches. It is often desirable for each batch to be grouped randomly instead of along a specific dimension. \n", + "2. Be efficient in terms of memory usage. In the case where overlap is enabled, each sample would comprised of mostly repetitive values compared to adjacent samples. It would be beneficial if each batch/sample is generated lazily to avoid storing these extra duplicative values. \n", "\n", - "3. Be efficient in terms of memory usage. In the case where overlap is enabled, each sample would comprised of mostly repetitive values compared to adjacent samples. It would be beneficial if each batch/sample is generated lazily to avoid storing these extra duplicative values. \n", + "3. Handling preprocessing steps. For example, data augmentation, scaling/normalization, outlier detection, etc. \n", "\n", - "4. Handling preprocessing steps. For example, data augmentation, scaling/normalization, outlier detection, etc. \n", "\n", - "\n", - "More thoughts on 1. and 2. can be found in [this issue](https://github.com/xarray-contrib/xbatcher/issues/30). Interested users are welcomed to comment or submit other issues in GitHub. " + "More thoughts on 1. can be found in [this discussion](https://github.com/xarray-contrib/xbatcher/discussions/78). Interested users are welcomed to comment or submit other issues in GitHub. " ] } ], @@ -404,7 +406,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.13" + "version": "3.8.13 | packaged by conda-forge | (default, Mar 25 2022, 06:04:14) \n[Clang 12.0.1 ]" }, "vscode": { "interpreter": { diff --git a/doc/index.rst b/doc/index.rst index db25016..97e5d1f 100644 --- a/doc/index.rst +++ b/doc/index.rst @@ -1,9 +1,9 @@ xbatcher: Batch Generation from Xarray Datasets =============================================== -Xbatcher is a small library for iterating xarray DataArrays in batches. The -goal is to make it easy to feed xarray datasets to machine learning libraries -such as Keras_. +Xbatcher is a small library for iterating Xarray DataArrays and Datasets in +batches. The goal is to make it easy to feed Xarray objects to machine learning +libraries such as Keras_. .. _Keras: https://keras.io/ @@ -51,7 +51,7 @@ Or via PyPI:: Basic Usage ----------- -Let's say we have an xarray dataset +Let's say we have an Xarray Dataset .. ipython:: python diff --git a/doc/roadmap.rst b/doc/roadmap.rst index 58a13d3..bfb7d8c 100644 --- a/doc/roadmap.rst +++ b/doc/roadmap.rst @@ -1,7 +1,7 @@ .. _roadmap: -Development Roadmap -=================== +Roadmap +======= Authors: Joe Hamman and Ryan Abernathey Date: February 7, 2019 diff --git a/xbatcher/generators.py b/xbatcher/generators.py index d0fcfaf..9f43d16 100644 --- a/xbatcher/generators.py +++ b/xbatcher/generators.py @@ -16,7 +16,7 @@ class BatchSchema: """ A representation of the indices and stacking/transposing parameters needed - to generator batches from Xarray Datasets and DataArrays using + to generator batches from Xarray DataArrays and Datasets using xbatcher.BatchGenerator. Parameters @@ -94,7 +94,7 @@ def _gen_batch_selectors( ) -> BatchSelectorSet: """ Create batch selectors dict, which can be used to create a batch - from an xarray data object. + from an Xarray data object. """ # Create an iterator that returns an object usable for .isel in xarray patch_selectors = self._gen_patch_selectors(ds) @@ -336,7 +336,7 @@ def _maybe_stack_batch_dims( class BatchGenerator: - """Create generator for iterating through xarray datarrays / datasets in + """Create generator for iterating through Xarray DataArrays / Datasets in batches. Parameters