From a4d831377a5843cc32d4cdb4005d66cbd2f3e0e8 Mon Sep 17 00:00:00 2001
From: Oliver Holworthy <oholworthy@nvidia.com>
Date: Wed, 14 Dec 2022 10:41:38 +0000
Subject: [PATCH 1/3] Add YAML linter (Prettier) and re-format YAML Files

---
 .github/release-drafter.yml                   |  44 ++--
 .github/workflows/blossom-ci.yml              | 230 +++++++++---------
 .github/workflows/conda-env-create.yml        |  30 +--
 .github/workflows/cpu-ci.yml                  | 144 +++++------
 .github/workflows/docs-preview-pr.yaml        |   2 +-
 .gitlab-ci.yml                                |  23 +-
 .pre-commit-config.yaml                       |  37 +--
 .prettierignore                               |   2 +
 .../environments/nvtabular_aws_sagemaker.yml  |   2 +-
 docs/source/toc.yaml                          |  12 +-
 10 files changed, 266 insertions(+), 260 deletions(-)
 create mode 100644 .prettierignore

diff --git a/.github/release-drafter.yml b/.github/release-drafter.yml
index 128781c7054..82e4c7e4d29 100644
--- a/.github/release-drafter.yml
+++ b/.github/release-drafter.yml
@@ -1,27 +1,27 @@
 categories:
-- title: '⚠ Breaking Changes'
-  labels:
-  - 'breaking'
-- title: '🐜 Bug Fixes'
-  labels:
-  - 'bug'
-- title: '🚀 Features'
-  labels:
-  - 'feature'
-  - 'enhancement'
-- title: '📄 Documentation'
-  labels:
-  - 'documentation'
-  - 'examples'
-- title: '🔧 Maintenance'
-  labels:
-  - 'build'
-  - 'dependencies'
-  - 'chore'
-  - 'ci'
-change-template: '- $TITLE @$AUTHOR (#$NUMBER)'
+  - title: "⚠ Breaking Changes"
+    labels:
+      - "breaking"
+  - title: "🐜 Bug Fixes"
+    labels:
+      - "bug"
+  - title: "🚀 Features"
+    labels:
+      - "feature"
+      - "enhancement"
+  - title: "📄 Documentation"
+    labels:
+      - "documentation"
+      - "examples"
+  - title: "🔧 Maintenance"
+    labels:
+      - "build"
+      - "dependencies"
+      - "chore"
+      - "ci"
+change-template: "- $TITLE @$AUTHOR (#$NUMBER)"
 exclude-labels:
-  - 'skip-changelog'
+  - "skip-changelog"
 template: |
   ## What’s Changed
 
diff --git a/.github/workflows/blossom-ci.yml b/.github/workflows/blossom-ci.yml
index 44ba751f375..30013868509 100644
--- a/.github/workflows/blossom-ci.yml
+++ b/.github/workflows/blossom-ci.yml
@@ -1,115 +1,115 @@
-# Copyright (c) 2020-2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# A workflow to trigger ci on hybrid infra (github + self hosted runner)
-name: Blossom-CI
-on:
-  issue_comment:
-    types: [created]
-  workflow_dispatch:
-      inputs:
-          platform:
-            description: 'runs-on argument'
-            required: false
-          args:
-            description: 'argument'
-            required: false
-jobs:
-  Authorization:
-    name: Authorization
-    runs-on: blossom
-    outputs:
-      args: ${{ env.args }}
-
-    # This job only runs for pull request comments
-    if: contains( '\
-      albert17,\
-      benfred,\
-      bschifferer,\
-      EvenOldridge,\
-      gabrielspmoreira,\
-      jperez999,\
-      karlhighley,\
-      marcromeyn,\
-      benfred,\
-      rjzamora,\
-      rnyak,\
-      sararb,\
-      ', format('{0},', github.actor)) && github.event.comment.body == '/blossom-ci'
-    steps:
-      - name: Check if comment is issued by authorized person
-        run: blossom-ci
-        env:
-          OPERATION: 'AUTH'
-          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
-
-  Vulnerability-scan:
-    name: Vulnerability scan
-    needs: [Authorization]
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v3
-        with:
-          repository: ${{ fromJson(needs.Authorization.outputs.args).repo }}
-          ref: ${{ fromJson(needs.Authorization.outputs.args).ref }}
-          lfs: 'true'
-
-      # repo specific steps
-      #- name: Setup java
-      #  uses: actions/setup-java@v1
-      #  with:
-      #    java-version: 1.8
-
-      # add blackduck properties https://synopsys.atlassian.net/wiki/spaces/INTDOCS/pages/631308372/Methods+for+Configuring+Analysis#Using-a-configuration-file
-      #- name: Setup blackduck properties
-      #  run: |
-      #       PROJECTS=$(mvn -am dependency:tree | grep maven-dependency-plugin | awk '{ out="com.nvidia:"$(NF-1);print out }' | grep rapids | xargs | sed -e 's/ /,/g')
-      #       echo detect.maven.build.command="-pl=$PROJECTS -am" >> application.properties
-      #       echo detect.maven.included.scopes=compile >> application.properties
-
-      - name: Run blossom action
-        uses: NVIDIA/blossom-action@main
-        env:
-          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-          REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
-        with:
-          args1: ${{ fromJson(needs.Authorization.outputs.args).args1 }}
-          args2: ${{ fromJson(needs.Authorization.outputs.args).args2 }}
-          args3: ${{ fromJson(needs.Authorization.outputs.args).args3 }}
-
-  Job-trigger:
-    name: Start ci job
-    needs: [Vulnerability-scan]
-    runs-on: blossom
-    steps:
-      - name: Start ci job
-        run: blossom-ci
-        env:
-          OPERATION: 'START-CI-JOB'
-          CI_SERVER: ${{ secrets.CI_SERVER }}
-          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-
-  Post-processing:
-    name: Post processing
-    runs-on: blossom
-    if : github.event_name == 'workflow_dispatch'
-    steps:
-      - name: Start post processing
-        run: blossom-ci
-        env:
-          OPERATION: 'POST-PROCESSING'
-          CI_SERVER: ${{ secrets.CI_SERVER }}
-          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# A workflow to trigger ci on hybrid infra (github + self hosted runner)
+name: Blossom-CI
+on:
+  issue_comment:
+    types: [created]
+  workflow_dispatch:
+    inputs:
+      platform:
+        description: "runs-on argument"
+        required: false
+      args:
+        description: "argument"
+        required: false
+jobs:
+  Authorization:
+    name: Authorization
+    runs-on: blossom
+    outputs:
+      args: ${{ env.args }}
+
+    # This job only runs for pull request comments
+    if: contains( '\
+      albert17,\
+      benfred,\
+      bschifferer,\
+      EvenOldridge,\
+      gabrielspmoreira,\
+      jperez999,\
+      karlhighley,\
+      marcromeyn,\
+      benfred,\
+      rjzamora,\
+      rnyak,\
+      sararb,\
+      ', format('{0},', github.actor)) && github.event.comment.body == '/blossom-ci'
+    steps:
+      - name: Check if comment is issued by authorized person
+        run: blossom-ci
+        env:
+          OPERATION: "AUTH"
+          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
+
+  Vulnerability-scan:
+    name: Vulnerability scan
+    needs: [Authorization]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v3
+        with:
+          repository: ${{ fromJson(needs.Authorization.outputs.args).repo }}
+          ref: ${{ fromJson(needs.Authorization.outputs.args).ref }}
+          lfs: "true"
+
+      # repo specific steps
+      #- name: Setup java
+      #  uses: actions/setup-java@v1
+      #  with:
+      #    java-version: 1.8
+
+      # add blackduck properties https://synopsys.atlassian.net/wiki/spaces/INTDOCS/pages/631308372/Methods+for+Configuring+Analysis#Using-a-configuration-file
+      #- name: Setup blackduck properties
+      #  run: |
+      #       PROJECTS=$(mvn -am dependency:tree | grep maven-dependency-plugin | awk '{ out="com.nvidia:"$(NF-1);print out }' | grep rapids | xargs | sed -e 's/ /,/g')
+      #       echo detect.maven.build.command="-pl=$PROJECTS -am" >> application.properties
+      #       echo detect.maven.included.scopes=compile >> application.properties
+
+      - name: Run blossom action
+        uses: NVIDIA/blossom-action@main
+        env:
+          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }}
+        with:
+          args1: ${{ fromJson(needs.Authorization.outputs.args).args1 }}
+          args2: ${{ fromJson(needs.Authorization.outputs.args).args2 }}
+          args3: ${{ fromJson(needs.Authorization.outputs.args).args3 }}
+
+  Job-trigger:
+    name: Start ci job
+    needs: [Vulnerability-scan]
+    runs-on: blossom
+    steps:
+      - name: Start ci job
+        run: blossom-ci
+        env:
+          OPERATION: "START-CI-JOB"
+          CI_SERVER: ${{ secrets.CI_SERVER }}
+          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+
+  Post-processing:
+    name: Post processing
+    runs-on: blossom
+    if: github.event_name == 'workflow_dispatch'
+    steps:
+      - name: Start post processing
+        run: blossom-ci
+        env:
+          OPERATION: "POST-PROCESSING"
+          CI_SERVER: ${{ secrets.CI_SERVER }}
+          REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.github/workflows/conda-env-create.yml b/.github/workflows/conda-env-create.yml
index ae3781c739e..518683a4c2b 100644
--- a/.github/workflows/conda-env-create.yml
+++ b/.github/workflows/conda-env-create.yml
@@ -3,12 +3,12 @@ name: Test conda env creation
 on:
   schedule:
     # * is a special character in YAML so you have to quote this string
-    - cron:  '30 14 * * *'
+    - cron: "30 14 * * *"
   workflow_dispatch:
     inputs:
       logLevel:
-        description: 'Log level'
-        default: 'warning'
+        description: "Log level"
+        default: "warning"
 
 jobs:
   build-linux:
@@ -19,15 +19,15 @@ jobs:
         target: ["11.0", "11.2"]
 
     steps:
-    - uses: actions/checkout@v3
-    - name: Set up Python 3.7
-      uses: actions/setup-python@v4
-      with:
-        python-version: 3.7
-    - name: Add conda to system path
-      run: |
-        # $CONDA is an environment variable pointing to the root of the miniconda directory
-        echo $CONDA/bin >> $GITHUB_PATH
-    - name: Create Conda environment with CUDA ${{ matrix.target }}
-      run: |
-        conda env create --file conda/environments/nvtabular_dev_cuda${{ matrix.target }}.yml --name nvtabular_dev_cuda${{ matrix.target }}
+      - uses: actions/checkout@v3
+      - name: Set up Python 3.7
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.7
+      - name: Add conda to system path
+        run: |
+          # $CONDA is an environment variable pointing to the root of the miniconda directory
+          echo $CONDA/bin >> $GITHUB_PATH
+      - name: Create Conda environment with CUDA ${{ matrix.target }}
+        run: |
+          conda env create --file conda/environments/nvtabular_dev_cuda${{ matrix.target }}.yml --name nvtabular_dev_cuda${{ matrix.target }}
diff --git a/.github/workflows/cpu-ci.yml b/.github/workflows/cpu-ci.yml
index 41a5689ebaa..8fef8258bbb 100644
--- a/.github/workflows/cpu-ci.yml
+++ b/.github/workflows/cpu-ci.yml
@@ -3,11 +3,11 @@ name: CPU CI
 on:
   workflow_dispatch:
   push:
-    branches: [ main ]
+    branches: [main]
     tags:
       - v*
   pull_request:
-    branches: [ main ]
+    branches: [main]
 
 jobs:
   build:
@@ -18,78 +18,78 @@ jobs:
         os: [ubuntu-latest]
 
     steps:
-    - uses: actions/checkout@v3
-      with:
-       fetch-depth: 0
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install Ubuntu packages
-      run: |
-        sudo apt-get update -y
-        sudo apt-get install -y protobuf-compiler
-    - name: Install and upgrade python packages
-      run: |
-        python -m pip install --upgrade pip setuptools==59.4.0 wheel tox pybind11
-        python -m pip uninstall protobuf -y
-        python -m pip install --no-binary=protobuf protobuf
-    - name: Run tests
-      run: |
-        ref_type=${{ github.ref_type }}
-        branch=main
-        if [[ $ref_type == "tag"* ]]
-        then
-          raw=$(git branch -r --contains ${{ github.ref_name }})
-          branch=${raw/origin\/}
-        fi
-        tox -e test-cpu -- $branch
-    - name: Generate package for pypi
-      run: |
-        python setup.py sdist bdist_wheel
-    - name: Upload pypi artifacts to github
-      uses: actions/upload-artifact@v3
-      with:
-        name: dist
-        path: dist
-    - name: Generate package for conda
-      id: conda_build
-      run: |
-        conda update conda
-        conda install conda-build pybind11
-        conda build --python ${{ matrix.python-version }} . -c defaults -c conda-forge -c numba -c rapidsai -c nvidia --output-folder ./conda_packages
-        export CONDA_PACKAGE=$(conda build --python ${{ matrix.python-version }} . -c defaults -c conda-forge -c numba -c rapidsai -c nvidia --output-folder ./conda_packages --output)
-        echo "conda_package : $CONDA_PACKAGE"
-        echo "conda_package=$CONDA_PACKAGE" >> $GITHUB_OUTPUT
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install Ubuntu packages
+        run: |
+          sudo apt-get update -y
+          sudo apt-get install -y protobuf-compiler
+      - name: Install and upgrade python packages
+        run: |
+          python -m pip install --upgrade pip setuptools==59.4.0 wheel tox pybind11
+          python -m pip uninstall protobuf -y
+          python -m pip install --no-binary=protobuf protobuf
+      - name: Run tests
+        run: |
+          ref_type=${{ github.ref_type }}
+          branch=main
+          if [[ $ref_type == "tag"* ]]
+          then
+            raw=$(git branch -r --contains ${{ github.ref_name }})
+            branch=${raw/origin\/}
+          fi
+          tox -e test-cpu -- $branch
+      - name: Generate package for pypi
+        run: |
+          python setup.py sdist bdist_wheel
+      - name: Upload pypi artifacts to github
+        uses: actions/upload-artifact@v3
+        with:
+          name: dist
+          path: dist
+      - name: Generate package for conda
+        id: conda_build
+        run: |
+          conda update conda
+          conda install conda-build pybind11
+          conda build --python ${{ matrix.python-version }} . -c defaults -c conda-forge -c numba -c rapidsai -c nvidia --output-folder ./conda_packages
+          export CONDA_PACKAGE=$(conda build --python ${{ matrix.python-version }} . -c defaults -c conda-forge -c numba -c rapidsai -c nvidia --output-folder ./conda_packages --output)
+          echo "conda_package : $CONDA_PACKAGE"
+          echo "conda_package=$CONDA_PACKAGE" >> $GITHUB_OUTPUT
 
-    - name: Upload conda artifacts to github
-      uses: actions/upload-artifact@v3
-      with:
-        name: conda
-        path: ${{ steps.conda_build.outputs.conda_package }}
+      - name: Upload conda artifacts to github
+        uses: actions/upload-artifact@v3
+        with:
+          name: conda
+          path: ${{ steps.conda_build.outputs.conda_package }}
 
-    # Build docs, treat warnings as errors
-    - name: Building docs
-      run: |
-        tox -e docs
-    - name: Upload HTML
-      uses: actions/upload-artifact@v3
-      with:
-        name: html-build-artifact
-        path: docs/build/html
-        if-no-files-found: error
-        retention-days: 1
-    - name: Store PR information
-      run: |
-        mkdir ./pr
-        echo ${{ github.event.number }}              > ./pr/pr.txt
-        echo ${{ github.event.pull_request.merged }} > ./pr/merged.txt
-        echo ${{ github.event.action }}              > ./pr/action.txt
-    - name: Upload PR information
-      uses: actions/upload-artifact@v3
-      with:
-        name: pr
-        path: pr/
+      # Build docs, treat warnings as errors
+      - name: Building docs
+        run: |
+          tox -e docs
+      - name: Upload HTML
+        uses: actions/upload-artifact@v3
+        with:
+          name: html-build-artifact
+          path: docs/build/html
+          if-no-files-found: error
+          retention-days: 1
+      - name: Store PR information
+        run: |
+          mkdir ./pr
+          echo ${{ github.event.number }}              > ./pr/pr.txt
+          echo ${{ github.event.pull_request.merged }} > ./pr/merged.txt
+          echo ${{ github.event.action }}              > ./pr/action.txt
+      - name: Upload PR information
+        uses: actions/upload-artifact@v3
+        with:
+          name: pr
+          path: pr/
 
   release:
     name: Release
diff --git a/.github/workflows/docs-preview-pr.yaml b/.github/workflows/docs-preview-pr.yaml
index 09c45d1eede..408e8b8cedb 100644
--- a/.github/workflows/docs-preview-pr.yaml
+++ b/.github/workflows/docs-preview-pr.yaml
@@ -10,4 +10,4 @@ env:
 
 jobs:
   preview:
-    uses: nvidia-merlin/.github/.github/workflows/docs-preview-pr-common.yaml@main
\ No newline at end of file
+    uses: nvidia-merlin/.github/.github/workflows/docs-preview-pr-common.yaml@main
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 6a380ae170a..e026a9b0e64 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -1,6 +1,5 @@
 before_script:
 
-
 image: docker:latest
 
 variables:
@@ -8,33 +7,33 @@ variables:
   GIT_SUBMODULE_STRATEGY: normal
 
 services:
-- docker:dind
+  - docker:dind
 
 stages:
-- docs
-- test
+  - docs
+  - test
 
 test_build:
   cache:
-        key: "$CI_COMMIT_REF_SLUG"
+    key: "$CI_COMMIT_REF_SLUG"
   stage: test
-  tags: 
-  - RapidsDL
+  tags:
+    - RapidsDL
   script:
-  - pwd
-  - docker run --runtime=nvidia --rm -p 48888:8888 -p 48787:8787 -p 48786:8786 -p 43000:3000 --ipc=host --name dev_tip_test -v $(pwd):/rapidsdl/ gitlab-master.nvidia.com:5005/rapidsdl/docker/rapidsdl_nite:latest /bin/bash -c "source activate rapids && cd /rapidsdl/nvtabular && black --check . && flake8 && isort -c && py.test --cov-config tests/unit/.coveragerc --cov-report term-missing --cov-fail-under 70 --cov=. tests && chmod -R 777 /rapidsdl/"
-  - sudo chmod -R 777 $(pwd)
+    - pwd
+    - docker run --runtime=nvidia --rm -p 48888:8888 -p 48787:8787 -p 48786:8786 -p 43000:3000 --ipc=host --name dev_tip_test -v $(pwd):/rapidsdl/ gitlab-master.nvidia.com:5005/rapidsdl/docker/rapidsdl_nite:latest /bin/bash -c "source activate rapids && cd /rapidsdl/nvtabular && black --check . && flake8 && isort -c && py.test --cov-config tests/unit/.coveragerc --cov-report term-missing --cov-fail-under 70 --cov=. tests && chmod -R 777 /rapidsdl/"
+    - sudo chmod -R 777 $(pwd)
 
 pages:
   stage: docs
   tags:
-  - RapidsDL
+    - RapidsDL
   script:
     - mkdir public
     - docker run --runtime=nvidia --rm -p 48888:8888 -p 48787:8787 -p 48786:8786 -p 43000:3000 --ipc=host --name dev_tip_test -v $(pwd):/rapidsdl/ gitlab-master.nvidia.com:5005/rapidsdl/docker/rapidsdl_nite:latest /bin/bash -c "source activate rapids && cd /rapidsdl/nvtabular/docs/ && make html; chmod -R 777 /rapidsdl/"
     - ls nvtabular/docs/build/html/
     - cp -r nvtabular/docs/build/html/* public
-    - ls public  
+    - ls public
   artifacts:
     paths:
       - public
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index aef31365943..f98772e34a1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -7,44 +7,49 @@ repos:
   - repo: https://github.com/timothycrosley/isort
     rev: 5.10.1
     hooks:
-    - id: isort
-      additional_dependencies: [toml]
-      exclude: examples/.*
+      - id: isort
+        additional_dependencies: [toml]
+        exclude: examples/.*
   # code style
   - repo: https://github.com/python/black
     rev: 22.6.0
     hooks:
-    - id: black
+      - id: black
   - repo: https://github.com/pycqa/pylint
     rev: v2.14.1
     hooks:
-    - id: pylint
+      - id: pylint
   - repo: https://github.com/pycqa/flake8
     rev: 3.9.2
     hooks:
-    - id: flake8
+      - id: flake8
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: v2.7.1
+    hooks:
+      - id: prettier
+        types_or: [yaml]
   # notebooks
   - repo: https://github.com/s-weigand/flake8-nb
     rev: v0.5.2
     hooks:
-    - id: flake8-nb
-      files: \.ipynb$
-      exclude: bench/.*
+      - id: flake8-nb
+        files: \.ipynb$
+        exclude: bench/.*
   # documentation
   - repo: https://github.com/econchick/interrogate
     rev: 1.5.0
     hooks:
-    - id: interrogate
-      exclude: ^(docs|bench|examples|tests|setup.py|versioneer.py)
-      args: [--config=pyproject.toml]
+      - id: interrogate
+        exclude: ^(docs|bench|examples|tests|setup.py|versioneer.py)
+        args: [--config=pyproject.toml]
   - repo: https://github.com/codespell-project/codespell
     rev: v2.2.1
     hooks:
-    - id: codespell
-      exclude: .github/.*
+      - id: codespell
+        exclude: .github/.*
   # security
   - repo: https://github.com/PyCQA/bandit
     rev: 1.7.4
     hooks:
-    - id: bandit
-      args: [--verbose, -ll, -x, tests,examples,bench]
+      - id: bandit
+        args: [--verbose, -ll, -x, tests, examples, bench]
diff --git a/.prettierignore b/.prettierignore
new file mode 100644
index 00000000000..6af02b58e72
--- /dev/null
+++ b/.prettierignore
@@ -0,0 +1,2 @@
+build/
+conda/
diff --git a/conda/environments/nvtabular_aws_sagemaker.yml b/conda/environments/nvtabular_aws_sagemaker.yml
index 160e9266942..f1d0380eeb3 100644
--- a/conda/environments/nvtabular_aws_sagemaker.yml
+++ b/conda/environments/nvtabular_aws_sagemaker.yml
@@ -19,4 +19,4 @@ dependencies:
   - numba>=0.53.0
   - dlpack
   - scikit-learn
-  - asvdb
\ No newline at end of file
+  - asvdb
diff --git a/docs/source/toc.yaml b/docs/source/toc.yaml
index 74ac6a7279a..823ac46c517 100644
--- a/docs/source/toc.yaml
+++ b/docs/source/toc.yaml
@@ -9,12 +9,12 @@ subtrees:
       - file: examples/index.md
         title: Example Notebooks
         entries:
-        - file: examples/01-Getting-started.ipynb
-          title: Getting Started with NVTabular
-        - file: examples/02-Advanced-NVTabular-workflow.ipynb
-          title: Advanced NVTabular Workflow
-        - file: examples/03-Running-on-multiple-GPUs-or-on-CPU.ipynb
-          title: Run on multi-GPU or CPU-only
+          - file: examples/01-Getting-started.ipynb
+            title: Getting Started with NVTabular
+          - file: examples/02-Advanced-NVTabular-workflow.ipynb
+            title: Advanced NVTabular Workflow
+          - file: examples/03-Running-on-multiple-GPUs-or-on-CPU.ipynb
+            title: Run on multi-GPU or CPU-only
       - file: api
         title: API Documentation
       - file: resources/index

From ed0dfc845a9422b47c73095dfa8e48b3b13d21a1 Mon Sep 17 00:00:00 2001
From: Oliver Holworthy <oholworthy@nvidia.com>
Date: Wed, 14 Dec 2022 10:44:27 +0000
Subject: [PATCH 2/3] Enable linter/formatter (Prettier) for Markdown files

---
 .github/ISSUE_TEMPLATE/bug_report.md          |  11 +-
 .../ISSUE_TEMPLATE/documentation-request.md   |   3 +-
 .github/ISSUE_TEMPLATE/feature_request.md     |   3 +-
 .github/ISSUE_TEMPLATE/operator_request.md    |  14 +-
 .github/ISSUE_TEMPLATE/research_question.md   |   3 +-
 .github/ISSUE_TEMPLATE/submit-question.md     |   3 +-
 .github/ISSUE_TEMPLATE/task.md                |   4 +-
 .pre-commit-config.yaml                       |   2 +-
 CHANGELOG.md                                  | 187 +++++++++---------
 CONTRIBUTING.md                               |  30 +--
 README.md                                     |  48 ++---
 bench/examples/MultiGPUBench.md               |  67 ++++---
 docs/README.md                                |  17 +-
 docs/source/core_features.md                  |  48 ++---
 docs/source/resources/architecture.md         |  17 +-
 docs/source/resources/cloud_integration.md    |  24 ++-
 docs/source/resources/links.md                |  40 ++--
 examples/README.md                            |   1 +
 18 files changed, 266 insertions(+), 256 deletions(-)

diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index 474a8e3a4ec..080188c0bf1 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -3,8 +3,7 @@ name: Bug report
 about: Create a bug report to help us improve NVTabular
 title: "[BUG]"
 labels: "? - Needs Triage, bug"
-assignees: ''
-
+assignees: ""
 ---
 
 **Describe the bug**
@@ -17,10 +16,10 @@ Follow this guide http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-rep
 A clear and concise description of what you expected to happen.
 
 **Environment details (please complete the following information):**
- - Environment location: [Bare-metal, Docker, Cloud(specify cloud provider)]
- - Method of NVTabular install: [conda, Docker, or from source]
-   - If method of install is [Docker], provide `docker pull` & `docker run` commands used
- 
+
+- Environment location: [Bare-metal, Docker, Cloud(specify cloud provider)]
+- Method of NVTabular install: [conda, Docker, or from source]
+  - If method of install is [Docker], provide `docker pull` & `docker run` commands used
 
 **Additional context**
 Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/documentation-request.md b/.github/ISSUE_TEMPLATE/documentation-request.md
index 89a026f343c..0a5b2a2a59b 100644
--- a/.github/ISSUE_TEMPLATE/documentation-request.md
+++ b/.github/ISSUE_TEMPLATE/documentation-request.md
@@ -3,8 +3,7 @@ name: Documentation request
 about: Report incorrect or needed documentation
 title: "[DOC]"
 labels: "? - Needs Triage, doc"
-assignees: ''
-
+assignees: ""
 ---
 
 ## Report incorrect documentation
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
index 9ccebb4db5f..d487e456b8f 100644
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -3,8 +3,7 @@ name: Feature request
 about: Suggest an idea for NVTabular
 title: "[FEA]"
 labels: "? - Needs Triage, feature request"
-assignees: ''
-
+assignees: ""
 ---
 
 **Is your feature request related to a problem? Please describe.**
diff --git a/.github/ISSUE_TEMPLATE/operator_request.md b/.github/ISSUE_TEMPLATE/operator_request.md
index 5d2f0632c9a..7df84eb632c 100644
--- a/.github/ISSUE_TEMPLATE/operator_request.md
+++ b/.github/ISSUE_TEMPLATE/operator_request.md
@@ -1,4 +1,5 @@
 ---
+
 name: Operator request
 about: Suggest an operator for NVTabular to do [...]
 
@@ -6,13 +7,14 @@ about: Suggest an operator for NVTabular to do [...]
 A clear and concise description of any alternative solutions or features you've considered.
 
 **Describe the solution you'd like**
-A clear and concise description of the operation you'd like to perform on the column.  Please include:
- - Type (Feature Engineering or Preprocessing)
- - input column type(s)
- - output column type(s)
- - Expected transformation of the data after application
+A clear and concise description of the operation you'd like to perform on the column. Please include:
+
+- Type (Feature Engineering or Preprocessing)
+- input column type(s)
+- output column type(s)
+- Expected transformation of the data after application
 
-**Optional: Describe operation stages in detail***
+**Optional: Describe operation stages in detail\***
 Statistics per chunk: Ex. compute the mean, stdev and count of the column
 Statistics combine: Ex. combine means & stdevs after normalizing by total count
 Apply: (value-mean)/stdev
diff --git a/.github/ISSUE_TEMPLATE/research_question.md b/.github/ISSUE_TEMPLATE/research_question.md
index fff685e8161..f48c1f37c9f 100644
--- a/.github/ISSUE_TEMPLATE/research_question.md
+++ b/.github/ISSUE_TEMPLATE/research_question.md
@@ -3,8 +3,7 @@ name: Research Question
 about: Longer term research related to NVTabular
 title: "[REA]"
 labels: "? - Proposed research topic"
-assignees: ''
-
+assignees: ""
 ---
 
 **What questions are you trying to answer? Please describe.**
diff --git a/.github/ISSUE_TEMPLATE/submit-question.md b/.github/ISSUE_TEMPLATE/submit-question.md
index 46d47942e56..66136d0326f 100644
--- a/.github/ISSUE_TEMPLATE/submit-question.md
+++ b/.github/ISSUE_TEMPLATE/submit-question.md
@@ -3,8 +3,7 @@ name: Submit question
 about: Ask a general question about NVTabular
 title: "[QST]"
 labels: "? - Needs Triage, question"
-assignees: ''
-
+assignees: ""
 ---
 
 **What is your question?**
diff --git a/.github/ISSUE_TEMPLATE/task.md b/.github/ISSUE_TEMPLATE/task.md
index 83695c9fdf3..3bbd71358e6 100644
--- a/.github/ISSUE_TEMPLATE/task.md
+++ b/.github/ISSUE_TEMPLATE/task.md
@@ -3,12 +3,10 @@ name: Task
 about: A general task that we're tracking in Github
 title: "[Task]"
 labels: ""
-assignees: ''
-
+assignees: ""
 ---
 
 **What needs doing**
 
-
 **Additional context**
 Add any other context, code examples, or references to existing implementations about the task here.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index f98772e34a1..296226c385d 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -27,7 +27,7 @@ repos:
     rev: v2.7.1
     hooks:
       - id: prettier
-        types_or: [yaml]
+        types_or: [yaml, markdown]
   # notebooks
   - repo: https://github.com/s-weigand/flake8-nb
     rev: v0.5.2
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f58c9b42e43..5601398ae24 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,167 +3,170 @@
 # NVTabular v0.7.1 (2 November 2021)
 
 ## Improvements
-* Add LogOp support for list features [#1153](https://github.com/NVIDIA-Merlin/NVTabular/issues/1153)
-* Add Normalize operator support for list features [#1154](https://github.com/NVIDIA-Merlin/NVTabular/issues/1154)
-* Add DataLoader.epochs() method and Dataset.to_iter(epochs=) argument [#1147](https://github.com/NVIDIA-Merlin/NVTabular/pull/1147)
-* Add ValueCount operator for recording of multihot min and max list lengths [#1171](https://github.com/NVIDIA-Merlin/NVTabular/pull/1171)
+
+- Add LogOp support for list features [#1153](https://github.com/NVIDIA-Merlin/NVTabular/issues/1153)
+- Add Normalize operator support for list features [#1154](https://github.com/NVIDIA-Merlin/NVTabular/issues/1154)
+- Add DataLoader.epochs() method and Dataset.to_iter(epochs=) argument [#1147](https://github.com/NVIDIA-Merlin/NVTabular/pull/1147)
+- Add ValueCount operator for recording of multihot min and max list lengths [#1171](https://github.com/NVIDIA-Merlin/NVTabular/pull/1171)
 
 ## Bug Fixes
 
-* Fix Criteo inference [#1198](https://github.com/NVIDIA-Merlin/NVTabular/issues/1198)
-* Fix performance regressions in Criteo benchmark [#1222](https://github.com/NVIDIA-Merlin/NVTabular/issues/1222)
-* Fix error in JoinGroupby op [#1167](https://github.com/NVIDIA-Merlin/NVTabular/issues/1222)
-* Fix Filter/JoinExternal key error [#1143](https://github.com/NVIDIA-Merlin/NVTabular/issues/1143)
-* Fix LambdaOp transforming dependency values [#1185](https://github.com/NVIDIA-Merlin/NVTabular/issues/)
-* Fix reading parquet files with list columns from GCS [#1155](https://github.com/NVIDIA-Merlin/NVTabular/issues/1155)
-* Fix TargetEncoding with dependencies as the target [#1165](https://github.com/NVIDIA-Merlin/NVTabular/issues/1165)
-* Fix Categorify op to calculate unique count stats for Nulls [#1159](https://github.com/NVIDIA-Merlin/NVTabular/issues/1159)
+- Fix Criteo inference [#1198](https://github.com/NVIDIA-Merlin/NVTabular/issues/1198)
+- Fix performance regressions in Criteo benchmark [#1222](https://github.com/NVIDIA-Merlin/NVTabular/issues/1222)
+- Fix error in JoinGroupby op [#1167](https://github.com/NVIDIA-Merlin/NVTabular/issues/1222)
+- Fix Filter/JoinExternal key error [#1143](https://github.com/NVIDIA-Merlin/NVTabular/issues/1143)
+- Fix LambdaOp transforming dependency values [#1185](https://github.com/NVIDIA-Merlin/NVTabular/issues/)
+- Fix reading parquet files with list columns from GCS [#1155](https://github.com/NVIDIA-Merlin/NVTabular/issues/1155)
+- Fix TargetEncoding with dependencies as the target [#1165](https://github.com/NVIDIA-Merlin/NVTabular/issues/1165)
+- Fix Categorify op to calculate unique count stats for Nulls [#1159](https://github.com/NVIDIA-Merlin/NVTabular/issues/1159)
 
 # NVTabular v0.7.0 (23 September 2021)
 
 ## Improvements
 
-* Add column tagging API [#943](https://github.com/NVIDIA/NVTabular/issues/943)
-* Export dataset schema when writing out datasets [#948](https://github.com/NVIDIA/NVTabular/issues/948)
-* Make dataloaders aware of schema [#947](https://github.com/NVIDIA/NVTabular/issues/947)
-* Standardize a Workflows representation of its output columns [#372](https://github.com/NVIDIA/NVTabular/issues/372)
-* Add multi-gpu training example using PyTorch Distributed [#775](https://github.com/NVIDIA/NVTabular/issues/775)
-* Speed up reading Parquet files from remote storage like GCS or S3 [#1119](https://github.com/NVIDIA/NVTabular/pull/1119)
-* Add utility to convert TFRecord datasets to Parquet [#1085](https://github.com/NVIDIA/NVTabular/pull/1085)
-* Add multi-gpu training example using PyTorch Distributed [#775](https://github.com/NVIDIA/NVTabular/issues/775)
-* Add multihot support for PyTorch inference [#719](https://github.com/NVIDIA/NVTabular/issues/719)
-* Add options to reserve categorical indices in the Categorify() op [#1074](https://github.com/NVIDIA/NVTabular/issues/1074)
-* Update notebooks to work with CPU only systems [#960](https://github.com/NVIDIA/NVTabular/issues/960)
-* Save output from Categorify op in a single table for HugeCTR [#946](https://github.com/NVIDIA/NVTabular/issues/946)
-* Add a keyset file for HugeCTR integration [#1049](https://github.com/NVIDIA/NVTabular/issues/1049)
+- Add column tagging API [#943](https://github.com/NVIDIA/NVTabular/issues/943)
+- Export dataset schema when writing out datasets [#948](https://github.com/NVIDIA/NVTabular/issues/948)
+- Make dataloaders aware of schema [#947](https://github.com/NVIDIA/NVTabular/issues/947)
+- Standardize a Workflows representation of its output columns [#372](https://github.com/NVIDIA/NVTabular/issues/372)
+- Add multi-gpu training example using PyTorch Distributed [#775](https://github.com/NVIDIA/NVTabular/issues/775)
+- Speed up reading Parquet files from remote storage like GCS or S3 [#1119](https://github.com/NVIDIA/NVTabular/pull/1119)
+- Add utility to convert TFRecord datasets to Parquet [#1085](https://github.com/NVIDIA/NVTabular/pull/1085)
+- Add multi-gpu training example using PyTorch Distributed [#775](https://github.com/NVIDIA/NVTabular/issues/775)
+- Add multihot support for PyTorch inference [#719](https://github.com/NVIDIA/NVTabular/issues/719)
+- Add options to reserve categorical indices in the Categorify() op [#1074](https://github.com/NVIDIA/NVTabular/issues/1074)
+- Update notebooks to work with CPU only systems [#960](https://github.com/NVIDIA/NVTabular/issues/960)
+- Save output from Categorify op in a single table for HugeCTR [#946](https://github.com/NVIDIA/NVTabular/issues/946)
+- Add a keyset file for HugeCTR integration [#1049](https://github.com/NVIDIA/NVTabular/issues/1049)
 
 ## Bug Fixes
 
-* Fix category counts written out by the Categorify op [#1128](https://github.com/NVIDIA/NVTabular/issues/1128)
-* Fix HugeCTR inference example [#1130](https://github.com/NVIDIA/NVTabular/pull/1130)
-* Fix make_feature_column_workflow bug in Categorify if features have vocabularies of varying size. [#1062](https://github.com/NVIDIA/NVTabular/issues/1062)
-* Fix TargetEncoding op on CPU only systems [#976](https://github.com/NVIDIA/NVTabular/issues/976)
-* Fix writing empty partitions to Parquet files [#1097](https://github.com/NVIDIA/NVTabular/issues/1097)
+- Fix category counts written out by the Categorify op [#1128](https://github.com/NVIDIA/NVTabular/issues/1128)
+- Fix HugeCTR inference example [#1130](https://github.com/NVIDIA/NVTabular/pull/1130)
+- Fix make_feature_column_workflow bug in Categorify if features have vocabularies of varying size. [#1062](https://github.com/NVIDIA/NVTabular/issues/1062)
+- Fix TargetEncoding op on CPU only systems [#976](https://github.com/NVIDIA/NVTabular/issues/976)
+- Fix writing empty partitions to Parquet files [#1097](https://github.com/NVIDIA/NVTabular/issues/1097)
 
 # NVTabular v0.6.1 (11 August 2021)
 
 ## Bug Fixes
 
-* Fix installing package via pip [#1030](https://github.com/NVIDIA/NVTabular/pull/1030)
-* Fix inference with groupby operator [#1019](https://github.com/NVIDIA/NVTabular/issues/1019)
-* Install tqdm with conda package [#1030](https://github.com/NVIDIA/NVTabular/pull/1030)
-* Fix workflow output_dtypes with empty partitions [#1028](https://github.com/NVIDIA/NVTabular/pull/1028)
+- Fix installing package via pip [#1030](https://github.com/NVIDIA/NVTabular/pull/1030)
+- Fix inference with groupby operator [#1019](https://github.com/NVIDIA/NVTabular/issues/1019)
+- Install tqdm with conda package [#1030](https://github.com/NVIDIA/NVTabular/pull/1030)
+- Fix workflow output_dtypes with empty partitions [#1028](https://github.com/NVIDIA/NVTabular/pull/1028)
 
 # NVTabular v0.6.0 (5 August 2021)
 
 ## Improvements
-* Add CPU support [#534](https://github.com/NVIDIA/NVTabular/issues/534)
-* Speed up inference on Triton Inference Server [#744](https://github.com/NVIDIA/NVTabular/issues/744)
-* Add support for session based recommenders [#355](https://github.com/NVIDIA/NVTabular/issues/355)
-* Add PyTorch Dataloader support for Sparse Tensors [#500](https://github.com/NVIDIA/NVTabular/issues/500)
-* Add ListSlice operator for truncating list columns [#734](https://github.com/NVIDIA/NVTabular/issues/734)
-* Categorical ids sorted by frequency [#799](https://github.com/NVIDIA/NVTabular/issues/799)
-* Add ability to select a subset of a ColumnGroup [#809](https://github.com/NVIDIA/NVTabular/issues/809)
-* Add option to use Rename op to give a single column a new fixed name [#825](https://github.com/NVIDIA/NVTabular/issues/824)
-* Add a 'map' function to KerasSequenceLoader, which enables sample weights [#667](https://github.com/NVIDIA/NVTabular/issues/667)
-* Add JoinExternal option on nvt.Dataset in addition to cudf [#370](https://github.com/NVIDIA/NVTabular/issues/370)
-* Allow passing ColumnGroup to get_embedding_sizes [#732](https://github.com/NVIDIA/NVTabular/issues/732)
-* Add ability to name LambdaOp and provide a better default name in graph visualizations [#860](https://github.com/NVIDIA/NVTabular/issues/860)
+
+- Add CPU support [#534](https://github.com/NVIDIA/NVTabular/issues/534)
+- Speed up inference on Triton Inference Server [#744](https://github.com/NVIDIA/NVTabular/issues/744)
+- Add support for session based recommenders [#355](https://github.com/NVIDIA/NVTabular/issues/355)
+- Add PyTorch Dataloader support for Sparse Tensors [#500](https://github.com/NVIDIA/NVTabular/issues/500)
+- Add ListSlice operator for truncating list columns [#734](https://github.com/NVIDIA/NVTabular/issues/734)
+- Categorical ids sorted by frequency [#799](https://github.com/NVIDIA/NVTabular/issues/799)
+- Add ability to select a subset of a ColumnGroup [#809](https://github.com/NVIDIA/NVTabular/issues/809)
+- Add option to use Rename op to give a single column a new fixed name [#825](https://github.com/NVIDIA/NVTabular/issues/824)
+- Add a 'map' function to KerasSequenceLoader, which enables sample weights [#667](https://github.com/NVIDIA/NVTabular/issues/667)
+- Add JoinExternal option on nvt.Dataset in addition to cudf [#370](https://github.com/NVIDIA/NVTabular/issues/370)
+- Allow passing ColumnGroup to get_embedding_sizes [#732](https://github.com/NVIDIA/NVTabular/issues/732)
+- Add ability to name LambdaOp and provide a better default name in graph visualizations [#860](https://github.com/NVIDIA/NVTabular/issues/860)
 
 ## Bug Fixes
 
-* Fix make_feature_column_workflow for Categorical columns [#763](https://github.com/NVIDIA/NVTabular/issues/763)
-* Fix Categorify output dtypes for list columns [#963](https://github.com/NVIDIA/NVTabular/issues/963)
-* Fix inference for Outbrain example [#669](https://github.com/NVIDIA/NVTabular/issues/669)
-* Fix dask metadata after calling workflow.to_ddf() [#852](https://github.com/NVIDIA/NVTabular/issues/734)
-* Fix out of memory errors [#896](https://github.com/NVIDIA/NVTabular/issues/896), [#971](https://github.com/NVIDIA/NVTabular/pull/971)
-* Fix normalize output when stdev is zero [#993](https://github.com/NVIDIA/NVTabular/pull/993)
-* Fix using UCX with a dask cluster on Merlin containers [#872](https://github.com/NVIDIA/NVTabular/pull/872)
+- Fix make_feature_column_workflow for Categorical columns [#763](https://github.com/NVIDIA/NVTabular/issues/763)
+- Fix Categorify output dtypes for list columns [#963](https://github.com/NVIDIA/NVTabular/issues/963)
+- Fix inference for Outbrain example [#669](https://github.com/NVIDIA/NVTabular/issues/669)
+- Fix dask metadata after calling workflow.to_ddf() [#852](https://github.com/NVIDIA/NVTabular/issues/734)
+- Fix out of memory errors [#896](https://github.com/NVIDIA/NVTabular/issues/896), [#971](https://github.com/NVIDIA/NVTabular/pull/971)
+- Fix normalize output when stdev is zero [#993](https://github.com/NVIDIA/NVTabular/pull/993)
+- Fix using UCX with a dask cluster on Merlin containers [#872](https://github.com/NVIDIA/NVTabular/pull/872)
 
 # NVTabular v0.5.3 (1 June 2021)
 
 ## Bug Fixes
-* Fix Shuffling in Torch DataLoader [#818](https://github.com/NVIDIA/NVTabular/pull/818)
-* Fix "Unsupported type_id conversion" in triton inference for string columns [#813](https://github.com/NVIDIA/NVTabular/issues/813)
-* Fix HugeCTR inference backend [Merlin#8](https://github.com/NVIDIA-Merlin/Merlin/pull/8)
+
+- Fix Shuffling in Torch DataLoader [#818](https://github.com/NVIDIA/NVTabular/pull/818)
+- Fix "Unsupported type_id conversion" in triton inference for string columns [#813](https://github.com/NVIDIA/NVTabular/issues/813)
+- Fix HugeCTR inference backend [Merlin#8](https://github.com/NVIDIA-Merlin/Merlin/pull/8)
 
 # NVTabular v0.5.1 (4 May 2021)
 
 ## Improvements
 
-* Update dependencies to use cudf 0.19
-* Removed conda from docker containers, leading to much smaller container sizes
-* Added CUDA 11.2 support
-* Added FastAI v2.3 support
+- Update dependencies to use cudf 0.19
+- Removed conda from docker containers, leading to much smaller container sizes
+- Added CUDA 11.2 support
+- Added FastAI v2.3 support
 
 ## Bug Fixes
 
-* Fix NVTabular preprocessing with HugeCTR inference
+- Fix NVTabular preprocessing with HugeCTR inference
 
 # NVTabular v0.5.0 (13 April 2021)
 
 ## Improvements
 
-* Adding Horovod integration to NVTabular's dataloaders, allowing you to use multiple GPU's to train TensorFlow and PyTorch models
-* Adding a Groupby operation for use with session based recommender models
-* Added ability to read and write datasets partitioned by a column, allowing
-* Add example notebooks for using Triton Inference Server with NVTabular
-* Restructure and simplify Criteo example notebooks
-* Add support for PyTorch inference with Triton Inference Server
+- Adding Horovod integration to NVTabular's dataloaders, allowing you to use multiple GPU's to train TensorFlow and PyTorch models
+- Adding a Groupby operation for use with session based recommender models
+- Added ability to read and write datasets partitioned by a column, allowing
+- Add example notebooks for using Triton Inference Server with NVTabular
+- Restructure and simplify Criteo example notebooks
+- Add support for PyTorch inference with Triton Inference Server
 
 ## Bug Fixes
 
-* Fix bug with preprocessing categorical columns with NVTabular not working with HugeCTR and Triton Inference Server [#707](https://github.com/NVIDIA/NVTabular/issues/707)
+- Fix bug with preprocessing categorical columns with NVTabular not working with HugeCTR and Triton Inference Server [#707](https://github.com/NVIDIA/NVTabular/issues/707)
 
 # NVTabular v0.4.0 (9 March 2021)
 
 ## Breaking Changes
 
-* The API for NVTabular has been significantly refactored, and existing code targeting the 0.3 API will need to be updated.
-Workflows are now represented as graphs of operations, and applied using a sklearn 'transformers' style api. Read more by
-checking out the [examples](https://nvidia-merlin.github.io/NVTabular/v0.4.0/examples/index.html)
+- The API for NVTabular has been significantly refactored, and existing code targeting the 0.3 API will need to be updated.
+  Workflows are now represented as graphs of operations, and applied using a sklearn 'transformers' style api. Read more by
+  checking out the [examples](https://nvidia-merlin.github.io/NVTabular/v0.4.0/examples/index.html)
 
 ## Improvements
 
-* Triton integration support for NVTabular with TensorFlow and HugeCTR models
-* Recommended cloud configuration and support for AWS and GCP
-* Reorganized examples and documentation
-* Unified Docker containers for Merlin components (NVTabular, HugeCTR and Triton)
-* Dataset analysis and generation tools
+- Triton integration support for NVTabular with TensorFlow and HugeCTR models
+- Recommended cloud configuration and support for AWS and GCP
+- Reorganized examples and documentation
+- Unified Docker containers for Merlin components (NVTabular, HugeCTR and Triton)
+- Dataset analysis and generation tools
 
 # NVTabular v0.3.0 (23 November 2020)
 
 ## Improvements
 
-* Add MultiHot categorical support for both preprocessing and dataloading
-* Add support for pretrained embeddings to the dataloaders
-* Add a Recsys2020 competition example notebook
-* Add ability to automatically map tensorflow feature columns to a NVTabular workflow
-* Multi-Node support
+- Add MultiHot categorical support for both preprocessing and dataloading
+- Add support for pretrained embeddings to the dataloaders
+- Add a Recsys2020 competition example notebook
+- Add ability to automatically map tensorflow feature columns to a NVTabular workflow
+- Multi-Node support
 
 # NVTabular v0.2.0 (10 September 2020)
 
 ## Improvements
 
-* Add Multi-GPU support using Dask-cuDF
-* Add support for reading datasets from S3, GCS and HDFS
-* Add 11 new operators: ColumnSimilarity, Dropna, Filter, FillMedian, HashBucket, JoinGroupBy, JoinExternal, LambdaOp, NormalizeMinMax, TargetEncoding and DifferenceLag
-* Add HugeCTR integration and an example notebook showing an end to end workflow
-* Signicantly faster dataloaders featuring a unified backend between TensorFlow and PyTorch
+- Add Multi-GPU support using Dask-cuDF
+- Add support for reading datasets from S3, GCS and HDFS
+- Add 11 new operators: ColumnSimilarity, Dropna, Filter, FillMedian, HashBucket, JoinGroupBy, JoinExternal, LambdaOp, NormalizeMinMax, TargetEncoding and DifferenceLag
+- Add HugeCTR integration and an example notebook showing an end to end workflow
+- Signicantly faster dataloaders featuring a unified backend between TensorFlow and PyTorch
 
 # NVTabular v0.1.1 (3 June 2020)
 
 ## Improvements
 
-* Switch to using the release version of cudf 0.14
+- Switch to using the release version of cudf 0.14
 
 ## Bug Fixes
 
-* Fix PyTorch dataloader for compatibility with deep learning examples
-* Fix FillMissing operator with constant fill
-* Fix missing yaml dependency on conda install
-* Fix get_emb_sz off-by-one error
+- Fix PyTorch dataloader for compatibility with deep learning examples
+- Fix FillMissing operator with constant fill
+- Fix missing yaml dependency on conda install
+- Fix get_emb_sz off-by-one error
 
 # NVTabular v0.1.0 - (13 May 2020)
 
-* Initial public release of NVTabular
+- Initial public release of NVTabular
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 9bb54e36eff..0ae1a4a31b2 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -4,27 +4,27 @@ If you are interested in contributing to NVTabular your contributions will fall
 into three categories:
 
 1. You want to report a bug, feature request, or documentation issue
-    - File an [issue](https://github.com/nvidia/NVTabular/issues/new/choose)
-    describing what you encountered or what you want to see changed.
-    - The NVTabular team will evaluate the issues and triage them, scheduling
-    them for a release. If you believe the issue needs priority attention
-    comment on the issue to notify the team.
+   - File an [issue](https://github.com/nvidia/NVTabular/issues/new/choose)
+     describing what you encountered or what you want to see changed.
+   - The NVTabular team will evaluate the issues and triage them, scheduling
+     them for a release. If you believe the issue needs priority attention
+     comment on the issue to notify the team.
 2. You want to propose a new Feature and implement it
-    - Post about your intended feature, and we shall discuss the design and
-    implementation.
-    - Once we agree that the plan looks good, go ahead and implement it, using
-    the [code contributions](#code-contributions) guide below.
+   - Post about your intended feature, and we shall discuss the design and
+     implementation.
+   - Once we agree that the plan looks good, go ahead and implement it, using
+     the [code contributions](#code-contributions) guide below.
 3. You want to implement a feature or bug-fix for an outstanding issue
-    - Follow the [code contributions](#code-contributions) guide below.
-    - If you need more context on a particular issue, please ask and we shall
-    provide.
+   - Follow the [code contributions](#code-contributions) guide below.
+   - If you need more context on a particular issue, please ask and we shall
+     provide.
 
 ## Code contributions
 
 ### Your first issue
 
 1. Read the project's [README.md](https://github.com/nvidia/NVTabular/blob/main/README.md)
-    to learn how to setup the development environment.
+   to learn how to setup the development environment.
 2. Find an issue to work on. The best way is to look for the
    [good first issue](https://github.com/nvidia/NVTabular/issues?q=is%3Aissue+is%3Aopen+label%3A%22good+first+issue%22)
    or [help wanted](https://github.com/nvidia/NVTabular/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22) labels.
@@ -46,7 +46,7 @@ Once you have gotten your feet wet and are more comfortable with the code, you
 can look at the prioritized issues of our next release in our [project boards](https://github.com/nvidia/NVTabular/projects).
 
 > **Pro Tip:** Always look at the release board with the highest number for
-issues to work on. This is where NVTabular developers also focus their efforts.
+> issues to work on. This is where NVTabular developers also focus their efforts.
 
 Look at the unassigned issues, and find an issue you are comfortable with
 contributing to. Start with _Step 3_ from above, commenting on the issue to let
@@ -68,7 +68,7 @@ and help categorize the PR in our change log:
   repository or the project.
 
 By default, an unlabeled PR is listed at the top of the change log and is not
-grouped under a heading like *Features* that groups similar PRs.
+grouped under a heading like _Features_ that groups similar PRs.
 Labeling the PRs so we can categorize them is preferred.
 
 If, for some reason, you do not believe your PR should be included in the change
diff --git a/README.md b/README.md
index 8a4b8525fa6..bdc6210517b 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,7 @@
 
 NVTabular is a feature engineering and preprocessing library for tabular data that is designed to easily manipulate terabyte scale datasets and train deep learning (DL) based recommender systems. It provides high-level abstraction to simplify code and accelerates computation on the GPU using the [RAPIDS Dask-cuDF](https://github.com/rapidsai/cudf/tree/main/python/dask_cudf) library.
 
-NVTabular is a component of [NVIDIA Merlin](https://developer.nvidia.com/nvidia-merlin), an open source framework for building and deploying recommender systems and works with the other Merlin components including [Merlin Models](https://github.com/NVIDIA-Merlin/models), [HugeCTR](https://github.com/NVIDIA/HugeCTR) and [Merlin Systems](https://github.com/NVIDIA-Merlin/systems) to provide end-to-end acceleration of recommender systems on the GPU. Extending beyond model training, with NVIDIA’s  [Triton Inference Server](https://github.com/NVIDIA/tensorrt-inference-server), the feature engineering and preprocessing steps performed on the data during training can be automatically applied to incoming data during inference.
+NVTabular is a component of [NVIDIA Merlin](https://developer.nvidia.com/nvidia-merlin), an open source framework for building and deploying recommender systems and works with the other Merlin components including [Merlin Models](https://github.com/NVIDIA-Merlin/models), [HugeCTR](https://github.com/NVIDIA/HugeCTR) and [Merlin Systems](https://github.com/NVIDIA-Merlin/systems) to provide end-to-end acceleration of recommender systems on the GPU. Extending beyond model training, with NVIDIA’s [Triton Inference Server](https://github.com/NVIDIA/tensorrt-inference-server), the feature engineering and preprocessing steps performed on the data during training can be automatically applied to incoming data during inference.
 
 <!-- <img src='https://developer.nvidia.com/blog/wp-content/uploads/2020/07/recommender-system-training-pipeline-1.png'/> -->
 
@@ -14,17 +14,17 @@ NVTabular is a component of [NVIDIA Merlin](https://developer.nvidia.com/nvidia-
 
 When training DL recommender systems, data scientists and machine learning (ML) engineers have been faced with the following challenges:
 
-* **Huge Datasets**: Commercial recommenders are trained on huge datasets that may be several terabytes in scale.
-* **Complex Data Feature Engineering and Preprocessing Pipelines**: Datasets need to be preprocessed and transformed so that they can be used with DL models and frameworks. In addition, feature engineering creates an extensive set of new features from existing ones, requiring multiple iterations to arrive at an optimal solution.
-* **Input Bottleneck**: Data loading, if not well optimized, can be the slowest part of the training process, leading to under-utilization of high-throughput computing devices such as GPUs.
-* **Extensive Repeated Experimentation**: The entire data engineering, training, and evaluation process can be repetitious and time consuming, requiring significant computational resources.
+- **Huge Datasets**: Commercial recommenders are trained on huge datasets that may be several terabytes in scale.
+- **Complex Data Feature Engineering and Preprocessing Pipelines**: Datasets need to be preprocessed and transformed so that they can be used with DL models and frameworks. In addition, feature engineering creates an extensive set of new features from existing ones, requiring multiple iterations to arrive at an optimal solution.
+- **Input Bottleneck**: Data loading, if not well optimized, can be the slowest part of the training process, leading to under-utilization of high-throughput computing devices such as GPUs.
+- **Extensive Repeated Experimentation**: The entire data engineering, training, and evaluation process can be repetitious and time consuming, requiring significant computational resources.
 
 NVTabular alleviates these challenges and helps data scientists and ML engineers:
 
-* process datasets that exceed GPU and CPU memory without having to worry about scale.
-* focus on what to do with the data and not how to do it by using abstraction at the operation level.
-* prepare datasets quickly and easily for experimentation so that more models can be trained.
-* deploy models into production by providing faster dataset transformation
+- process datasets that exceed GPU and CPU memory without having to worry about scale.
+- focus on what to do with the data and not how to do it by using abstraction at the operation level.
+- prepare datasets quickly and easily for experimentation so that more models can be trained.
+- deploy models into production by providing faster dataset transformation
 
 Learn more in the NVTabular [core features documentation](https://nvidia-merlin.github.io/NVTabular/main/core_features.html).
 
@@ -38,14 +38,14 @@ The performance of the Criteo DRLM workflow also demonstrates the effectiveness
 
 NVTabular requires Python version 3.7+. Additionally, GPU support requires:
 
-* CUDA version 11.0+
-* NVIDIA Pascal GPU or later (Compute Capability >=6.0)
-* NVIDIA driver 450.80.02+
-* Linux or WSL
+- CUDA version 11.0+
+- NVIDIA Pascal GPU or later (Compute Capability >=6.0)
+- NVIDIA driver 450.80.02+
+- Linux or WSL
 
 #### Installing NVTabular Using Conda
 
-NVTabular can be installed with Anaconda from the ```nvidia``` channel by running the following command:
+NVTabular can be installed with Anaconda from the `nvidia` channel by running the following command:
 
 ```
 conda install -c nvidia -c rapidsai -c numba -c conda-forge nvtabular python=3.7 cudatoolkit=11.2
@@ -68,23 +68,25 @@ NVTabular Docker containers are available in the [NVIDIA Merlin container
 repository](https://catalog.ngc.nvidia.com/?filters=&orderBy=scoreDESC&query=merlin).
 The following table summarizes the key information about the containers:
 
-| Container Name             | Container Location | Functionality |
-| -------------------------- | ------------------ | ------------- |
-| merlin-hugectr             |https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr         | NVTabular, HugeCTR, and Triton Inference |
-| merlin-tensorflow          |https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow      | NVTabular, Tensorflow and Triton Inference |
-| merlin-pytorch             |https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch         | NVTabular, PyTorch, and Triton Inference |
+| Container Name    | Container Location                                                                   | Functionality                              |
+| ----------------- | ------------------------------------------------------------------------------------ | ------------------------------------------ |
+| merlin-hugectr    | https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-hugectr    | NVTabular, HugeCTR, and Triton Inference   |
+| merlin-tensorflow | https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-tensorflow | NVTabular, Tensorflow and Triton Inference |
+| merlin-pytorch    | https://catalog.ngc.nvidia.com/orgs/nvidia/teams/merlin/containers/merlin-pytorch    | NVTabular, PyTorch, and Triton Inference   |
 
 To use these Docker containers, you'll first need to install the [NVIDIA Container Toolkit](https://github.com/NVIDIA/nvidia-docker) to provide GPU support for Docker. You can use the NGC links referenced in the table above to obtain more information about how to launch and run these containers. To obtain more information about the software and model versions that NVTabular supports per container, see [Support Matrix](https://github.com/NVIDIA/NVTabular/blob/main/docs/source/resources/support_matrix.rst).
 
 ### Notebook Examples and Tutorials
 
 We provide a [collection of examples](https://github.com/NVIDIA-Merlin/NVTabular/tree/main/examples) to demonstrate feature engineering with NVTabular as Jupyter notebooks:
-* Introduction to NVTabular's High-Level API
-* Advanced workflows with NVTabular
-* NVTabular on CPU
-* Scaling NVTabular to multi-GPU systems
+
+- Introduction to NVTabular's High-Level API
+- Advanced workflows with NVTabular
+- NVTabular on CPU
+- Scaling NVTabular to multi-GPU systems
 
 In addition, NVTabular is used in many of our examples in other Merlin libraries:
+
 - [End-To-End Examples with Merlin](https://github.com/NVIDIA-Merlin/Merlin/tree/main/examples)
 - [Training Examples with Merlin Models](https://github.com/NVIDIA-Merlin/models/tree/main/examples)
 - [Training Examples with Transformer4Rec](https://github.com/NVIDIA-Merlin/Transformers4Rec/tree/main/examples)
diff --git a/bench/examples/MultiGPUBench.md b/bench/examples/MultiGPUBench.md
index 5f5ad0730d9..f0461b53d82 100644
--- a/bench/examples/MultiGPUBench.md
+++ b/bench/examples/MultiGPUBench.md
@@ -2,23 +2,21 @@
 
 The benchmark script described in this document is located at `NVTabular/examples/dask-nvtabular-criteo-benchmark.py`.
 
-The [multi-GPU Criteo/DLRM benchmark](https://github.com/NVIDIA/NVTabular/blob/main/examples/dask-nvtabular-criteo-benchmark.py) is designed to measure the time required to preprocess the [Criteo (1TB) dataset](https://www.kaggle.com/c/criteo-display-ad-challenge/data) for Facebook’s [DLRM model](https://github.com/facebookresearch/dlrm).  The user must specify the path of the raw dataset (using the `--data-path` flag), as well as the output directory for all temporary/final data (using the `--out-path` flag).
+The [multi-GPU Criteo/DLRM benchmark](https://github.com/NVIDIA/NVTabular/blob/main/examples/dask-nvtabular-criteo-benchmark.py) is designed to measure the time required to preprocess the [Criteo (1TB) dataset](https://www.kaggle.com/c/criteo-display-ad-challenge/data) for Facebook’s [DLRM model](https://github.com/facebookresearch/dlrm). The user must specify the path of the raw dataset (using the `--data-path` flag), as well as the output directory for all temporary/final data (using the `--out-path` flag).
 
 ### Example Usage
 
-
 ```python
 python dask-nvtabular-criteo-benchmark.py --data-path /path/to/criteo_parquet --out-path /out/dir/`
 ```
 
 ### Dataset Requirements (Parquet)
 
-
-The script is designed with a parquet-formatted dataset in mind. Although csv files can also be handled by NVTabular, converting to parquet yields significantly better performance.  To convert your dataset, try using the [conversion notebook](https://github.com/NVIDIA/NVTabular/blob/main/examples/optimize_criteo.ipynb) (located at `NVTabular/examples/optimize_criteo.ipynb`).
+The script is designed with a parquet-formatted dataset in mind. Although csv files can also be handled by NVTabular, converting to parquet yields significantly better performance. To convert your dataset, try using the [conversion notebook](https://github.com/NVIDIA/NVTabular/blob/main/examples/optimize_criteo.ipynb) (located at `NVTabular/examples/optimize_criteo.ipynb`).
 
 ### General Notes on Parameter Tuning
 
-The script was originally developed and tested on an NVIDIA DGX-1 machine (8x 32GB V100s, 1TB RAM).  Users with limited device and/or host memory may experience memory errors with the default options. Depending on the system, these users may need to modify one or more of the “Algorithm Options” described below. For example, it may be necessary to expand the list of “high-cardinality” columns, increase the tree-width and/or use “disk” for the cat-cache options.
+The script was originally developed and tested on an NVIDIA DGX-1 machine (8x 32GB V100s, 1TB RAM). Users with limited device and/or host memory may experience memory errors with the default options. Depending on the system, these users may need to modify one or more of the “Algorithm Options” described below. For example, it may be necessary to expand the list of “high-cardinality” columns, increase the tree-width and/or use “disk” for the cat-cache options.
 
 In addition to adjusting the algorithm details, users with limited device memory may also find it useful to adjust the `--device-pool-frac` and/or `--device-limit-frac` options (reduce both fractions).
 
@@ -32,34 +30,34 @@ For all users, the most important benchmark options include the following.
 
 See option descriptions below for more information.
 
-
 ## Parameter Overview
 
 ### System Options
 
-
 ##### Visible Devices
-By default, the script will deploy a cluster with a single Dask-CUDA worker on  every GPU specified by the `CUDA_VISIBLE_DEVICES` environment variable. The user may also specify distinct list of devices using the `-d` flag.
 
-e.g. `-d 1,2,3`
+By default, the script will deploy a cluster with a single Dask-CUDA worker on every GPU specified by the `CUDA_VISIBLE_DEVICES` environment variable. The user may also specify distinct list of devices using the `-d` flag.
 
+e.g. `-d 1,2,3`
 
 ##### Communication Protocol
-By default, the Dask-CUDA cluster will use a “tcp” protocol for inter-process communication.  Users may also elect to use “ucx” to take advantage of NVLink and/or Infiniband technologies.  The “ucx” option is highly recommended, but requires ucx-py to be installed.
 
-e.g. `-p ucx`
+By default, the Dask-CUDA cluster will use a “tcp” protocol for inter-process communication. Users may also elect to use “ucx” to take advantage of NVLink and/or Infiniband technologies. The “ucx” option is highly recommended, but requires ucx-py to be installed.
 
+e.g. `-p ucx`
 
 ##### Memory Management
-By default, the Dask-CUDA workers will use an RMM memory pool to avoid memory-allocation bottlenecks, and will spill data from device to host memory when Dask-aware usage exceeds a specific threshold.  The size of the RMM memory pool On each worker defaults to 90% of the total capacity, but the user can specify a different fraction using the `--device-pool-frac` flag.  If `0` is specified, no memory pool will be used.
+
+By default, the Dask-CUDA workers will use an RMM memory pool to avoid memory-allocation bottlenecks, and will spill data from device to host memory when Dask-aware usage exceeds a specific threshold. The size of the RMM memory pool On each worker defaults to 90% of the total capacity, but the user can specify a different fraction using the `--device-pool-frac` flag. If `0` is specified, no memory pool will be used.
 
 e.g. `--device-pool-frac 0.5`
 
-By default, the Dask-CUDA workers will begin spilling data from device memory to host memory when the input/output data of in-memory tasks exceeds 80% of the total capacity.  For systems with limited device memory, temporary allocation made during task execution may still lead to out-of-memory (OOM) errors.  To modify the threshold, the user can specify a different fraction Using the `--device-limit-frac` flag.
+By default, the Dask-CUDA workers will begin spilling data from device memory to host memory when the input/output data of in-memory tasks exceeds 80% of the total capacity. For systems with limited device memory, temporary allocation made during task execution may still lead to out-of-memory (OOM) errors. To modify the threshold, the user can specify a different fraction Using the `--device-limit-frac` flag.
 
 e.g. `--device-limit-frac 0.66`
 
 ##### IO Threads (Writing)
+
 By default, multi-threading will not be used to write output data. Some systems may see better performance when 2+ threads are used to overlap sequential writes by the same worker. The user can specify a specific number of threads using the `--num-io-threads` flag.
 
 e.g. `--num-io-threads 2`
@@ -68,82 +66,87 @@ Note that multi-threading may reduce the optimal partition size (see the `--part
 
 ### Data-Decomposition Options
 
-
 ##### Partition Sizes (dataset chunking)
-To process out-of-core data, NVTabular uses Dask-CuDF to partition the data into a lazily-evaluated collection of CuDF DataFrame objects.  By default the maximum size of these so-called partitions will be approximately 12.5% of a single-GPUs memory capacity.  The user can modify the desired size of partitions by passing a fractional value with the `--part-mem-frac` flag.
+
+To process out-of-core data, NVTabular uses Dask-CuDF to partition the data into a lazily-evaluated collection of CuDF DataFrame objects. By default the maximum size of these so-called partitions will be approximately 12.5% of a single-GPUs memory capacity. The user can modify the desired size of partitions by passing a fractional value with the `--part-mem-frac` flag.
 
 e.g. `--part-mem-frac 0.16`
 
 ##### Output-File Count
-NVTabular uses the file structure of the output dataset to shuffle data as it is written to disk.  That is, after a Dask-CUDA worker transforms a dataset partition, it will append (random) splits of that partition to some number of output files.  Each worker will manage its own set of output files.  The `--out-files-per-proc` can be used to modify the number of output files managed by each worker (defaults to 8).  Since output files are uniquely mapped to processes, the total number of output files is multiplied by the number of workers.
+
+NVTabular uses the file structure of the output dataset to shuffle data as it is written to disk. That is, after a Dask-CUDA worker transforms a dataset partition, it will append (random) splits of that partition to some number of output files. Each worker will manage its own set of output files. The `--out-files-per-proc` can be used to modify the number of output files managed by each worker (defaults to 8). Since output files are uniquely mapped to processes, the total number of output files is multiplied by the number of workers.
 
 e.g. `--out-files-per-proc 24`
 
-Note that a large number of output files may be required to perform the “PER_WORKER” shuffle option (see description of the `—shuffle` flag below).  This is because each file will be fully shuffled in device memory.
+Note that a large number of output files may be required to perform the “PER_WORKER” shuffle option (see description of the `—shuffle` flag below). This is because each file will be fully shuffled in device memory.
 
 ##### Shuffling
-NVTabular currently offers two options for shuffling output data to disk. The `“PER_PARTITION”` option means that each partition will be independently shuffled after transformation, and then appended to some number of distinct output files.  The number of output files is specified by the `--out-files-per-proc` flag (described above), and the files  are uniquely mapped to each worker.  The `“PER_WORKER”` option follows the same process, but the “files” are initially written to in-host-memory, and then reshuffled and persisted to disk after the full dataset is processed.  The user can specify the specific shuffling algorithm to use with the `—shuffle` flag.
+
+NVTabular currently offers two options for shuffling output data to disk. The `“PER_PARTITION”` option means that each partition will be independently shuffled after transformation, and then appended to some number of distinct output files. The number of output files is specified by the `--out-files-per-proc` flag (described above), and the files are uniquely mapped to each worker. The `“PER_WORKER”` option follows the same process, but the “files” are initially written to in-host-memory, and then reshuffled and persisted to disk after the full dataset is processed. The user can specify the specific shuffling algorithm to use with the `—shuffle` flag.
 
 e.g. `—shuffle PER_WORKER`
 
 Note that the `“PER_WORKER”` option may require a larger-than default output-file count (See description of the `--out-files-per-proc` flag above).
 
-
 ### Preprocessing Options
 
 ##### Column Names
+
 By default this script will assume the following categorical and continuous column names.
 
 - Categorical: “C1”, ”C2”, … , ”C26”
 - Continuous: “I1”, ”I2”, … , ”I13”
 
-The user may specify different column names, or a subset of these names, by passing a column-separated list to  the `--cat-names` and/or `—cont_names` flags.
+The user may specify different column names, or a subset of these names, by passing a column-separated list to the `--cat-names` and/or `—cont_names` flags.
 
-e.g. `—cat-names C01,C02  --cont_names I01,I02 —high_cards C01`
+e.g. `—cat-names C01,C02 --cont_names I01,I02 —high_cards C01`
 
 Note that, if your dataset includes non-default column names, you should also use the `—high-cards` flag (described below), to specify the names of high-cardinality columns.
 
 ##### Categorical Encoding
-By default, all categorical-column groups will be used for the final encoding transformation.  The user can also specify a frequency threshold for groups to be included in the encoding with the `—freq-limit` (or `-f`) flag.
+
+By default, all categorical-column groups will be used for the final encoding transformation. The user can also specify a frequency threshold for groups to be included in the encoding with the `—freq-limit` (or `-f`) flag.
 
 e.g `-f 15` (groups with fewer than 15 instances in the dataset will not be used for encoding)
+
 ### Algorithm Options
 
 ##### High-Cardinality Columns
 
-As described below, the specific algorithm used for categorical encoding can be column dependent.  In this script, we use special options for a subset of “high-cardinality” columns.  By default, these columns are "C20,C1,C22,C10”.  However, the user can specify different column names with the `--high-cards` flag.
+As described below, the specific algorithm used for categorical encoding can be column dependent. In this script, we use special options for a subset of “high-cardinality” columns. By default, these columns are "C20,C1,C22,C10”. However, the user can specify different column names with the `--high-cards` flag.
 
 e.g. `—high_cards C01,C10`
 
 Note that only the columns specified with this flag (or the default columns) will be targetedby the `--tree-width` and/or `--cat-cache-high` flags (described below).
 
 ##### Global-Categories Calculation (GroupbyStatistics)
-In order encode categorical columns, NVTabular needs to calculate a global list of unique categories for each categorical column. This is accomplished with a global groupby-aggregation-based tree reduction on each column. In order to avoid memory pressure on the device, intermediate groupby data is moved to host memory between tasks in the global-aggregation tree.  For users with a sufficient amount of total GPU memory, this device-to-host transfer can be avoided with the by adding the `—cats-on-device` flag to the execution command.
+
+In order encode categorical columns, NVTabular needs to calculate a global list of unique categories for each categorical column. This is accomplished with a global groupby-aggregation-based tree reduction on each column. In order to avoid memory pressure on the device, intermediate groupby data is moved to host memory between tasks in the global-aggregation tree. For users with a sufficient amount of total GPU memory, this device-to-host transfer can be avoided with the by adding the `—cats-on-device` flag to the execution command.
 
 e.g. `—cats-on-device`
 
-In addition to controlling device-to-host data movement, the user can also use the `--tree-width` flag to specify the width of the groupby-aggregation tree for high-cardinality columns.  Although NVTabular allows the user to specify the tree-width for each column independently, this option will target all columns  specified with `—high-cards`.
+In addition to controlling device-to-host data movement, the user can also use the `--tree-width` flag to specify the width of the groupby-aggregation tree for high-cardinality columns. Although NVTabular allows the user to specify the tree-width for each column independently, this option will target all columns specified with `—high-cards`.
 
 e.g. `—tree_width 4`
 
-
 ##### Categorical Encoding (Categorify)
-During the categorical-encoding transformation stage, the column-specific unique values must be read into GPU memory for the operation.  Since each NVTabular process will only operate on a single partition at a time, the same unique-value statistics need to be re-read (for every categorical column) for each partition that is transformed.  Unsurprisingly, the performance of categorical encoding can be dramatically improved by caching the unique values on each worker between transformation operations.
 
-The user can specify caching location for low- and high-cardinality columns separately. Recall that high-cardinality columns can be specified with `—high_cards` (and all remaining categorical columns will be treated as low-cardinality”).  The user can specify the caching location of low-cardinality columns with the `--cat-cache-low` flag, and high-cardinality columns with the `--cat-cache-low` flag.  For both cases, the options are “device”, “host”, or “disk”.
+During the categorical-encoding transformation stage, the column-specific unique values must be read into GPU memory for the operation. Since each NVTabular process will only operate on a single partition at a time, the same unique-value statistics need to be re-read (for every categorical column) for each partition that is transformed. Unsurprisingly, the performance of categorical encoding can be dramatically improved by caching the unique values on each worker between transformation operations.
 
-e.g. `--cat-cache-low device  --cat-cache-high host`
+The user can specify caching location for low- and high-cardinality columns separately. Recall that high-cardinality columns can be specified with `—high_cards` (and all remaining categorical columns will be treated as low-cardinality”). The user can specify the caching location of low-cardinality columns with the `--cat-cache-low` flag, and high-cardinality columns with the `--cat-cache-low` flag. For both cases, the options are “device”, “host”, or “disk”.
 
+e.g. `--cat-cache-low device --cat-cache-high host`
 
 ### Diagnostics Options
 
-
 ##### Dashboard
-A wonderful advantage of the Dask-Distributed ecosystem is the convenient set of diagnostics utilities.  By default (if Bokeh is installed on your system), the distributed scheduler will host a diagnostics dashboard at  `http://localhost:8787/status` (where localhost may need to be changed to the the IP address where the scheduler is running).  If port 8787 is already in use, a different (random) port will be used.  However, the user can specify a specific port using the `—dashboard-port` flag.
+
+A wonderful advantage of the Dask-Distributed ecosystem is the convenient set of diagnostics utilities. By default (if Bokeh is installed on your system), the distributed scheduler will host a diagnostics dashboard at  `http://localhost:8787/status` (where localhost may need to be changed to the the IP address where the scheduler is running). If port 8787 is already in use, a different (random) port will be used. However, the user can specify a specific port using the `—dashboard-port` flag.
 
 e.g. `—dashboard-port 3787`
 
 ##### Profile
-In addition to hosting a diagnostics dashboard, the distributed cluster can also collect and export profiling data on all scheduler and worker processes.  To export an interactive profile report, the user can specify a file path with the `—profile` flag.  If this flag is not used, no profile will be collected/exported.
+
+In addition to hosting a diagnostics dashboard, the distributed cluster can also collect and export profiling data on all scheduler and worker processes. To export an interactive profile report, the user can specify a file path with the `—profile` flag. If this flag is not used, no profile will be collected/exported.
 
 e.g. `—profile my-profile.html`
diff --git a/docs/README.md b/docs/README.md
index a81d0367ba7..a0a8439838c 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -69,22 +69,22 @@ Run the script again to confirm that the URL is no longer reported as a broken l
 
 ### Source management: README and index files
 
-* To preserve Sphinx's expectation that all source files are child files and directories
+- To preserve Sphinx's expectation that all source files are child files and directories
   of the `docs/source` directory, other content, such as the `notebooks` directory is
   copied to the source directory. You can determine which directories are copied by
   viewing `docs/source/conf.py` and looking for the `copydirs_additional_dirs` list.
   Directories are specified relative to the Sphinx source directory, `docs/source`.
 
-* One consequence of the preceding bullet is that any change to the original files,
+- One consequence of the preceding bullet is that any change to the original files,
   such as adding or removing a topic, requires a similar change to the `docs/source/toc.yaml`
-  file.  Updating the `docs/source/toc.yaml` file is not automatic.
+  file. Updating the `docs/source/toc.yaml` file is not automatic.
 
-* Because the GitHub browsing expectation is that a `README.md` file is rendered when you
+- Because the GitHub browsing expectation is that a `README.md` file is rendered when you
   browse a directory, when a directory is copied, the `README.md` file is renamed to
   `index.md` to meet the HTML web server expectation of locating an `index.html` file
   in a directory.
 
-* Add the file to the `docs/source/toc.yaml` file.  Keep in mind that notebooks are
+- Add the file to the `docs/source/toc.yaml` file. Keep in mind that notebooks are
   copied into the `docs/source/` directory, so the paths are relative to that location.
   Follow the pattern that is already established and you'll be fine.
 
@@ -95,13 +95,13 @@ the underscores in the link even though they are converted to hyphens in the HTM
 
 Refer to the following examples from HugeCTR:
 
-* `../QAList.md#24-how-to-set-workspace_size_per_gpu_in_mb-and-slot_size_array`
-* `./api/python_interface.md#save_params_to_files-method`
+- `../QAList.md#24-how-to-set-workspace_size_per_gpu_in_mb-and-slot_size_array`
+- `./api/python_interface.md#save_params_to_files-method`
 
 #### Docs-to-docs links
 
 There is no concern for the GitHub browsing experience for files in the `docs/source/` directory.
-You can use a relative path for the link.  For example, the following link is in the
+You can use a relative path for the link. For example, the following link is in the
 `docs/source/hugectr_user_guide.md` file and links to the "Build HugeCTR from Source" heading
 in the `docs/source/hugectr_contributor_guide.md` file:
 
@@ -143,6 +143,7 @@ a relative path works both in the HTML docs page and in the repository browsing
 
 ```markdown
 ### Some awesome feature
+
     + ...snip...
     + ...snip...
     + Added the [awesome notebook](examples/awesome_notebook.ipynb) to show how to use the feature.
diff --git a/docs/source/core_features.md b/docs/source/core_features.md
index a3d92461790..6ac0132ed95 100644
--- a/docs/source/core_features.md
+++ b/docs/source/core_features.md
@@ -1,24 +1,25 @@
 # Core Features
 
 NVTabular supports the following core features:
-* [TensorFlow and PyTorch Interoperability](#tensorflow-and-pytorch-interoperability)
-* [HugeCTR Interoperability](#hugectr-interoperability)
-* [Multi-GPU Support](#multi-gpu-support)
-* [Multi-Node Support](#multi-node-support)
-* [Multi-Hot Encoding and Pre-Existing Embeddings](#multi-hot-encoding-and-pre-existing-embeddings)
-* [Shuffling Datasets](#shuffling-datasets)
-* [Cloud Integration](#cloud-integration)
-* [CPU Support](#cpu-support)
 
-## TensorFlow and PyTorch Interoperability ##
+- [TensorFlow and PyTorch Interoperability](#tensorflow-and-pytorch-interoperability)
+- [HugeCTR Interoperability](#hugectr-interoperability)
+- [Multi-GPU Support](#multi-gpu-support)
+- [Multi-Node Support](#multi-node-support)
+- [Multi-Hot Encoding and Pre-Existing Embeddings](#multi-hot-encoding-and-pre-existing-embeddings)
+- [Shuffling Datasets](#shuffling-datasets)
+- [Cloud Integration](#cloud-integration)
+- [CPU Support](#cpu-support)
+
+## TensorFlow and PyTorch Interoperability
 
 In addition to providing mechanisms for transforming the data to prepare it for deep learning models, we also have framework-specific dataloaders implemented to help optimize getting that data to the GPU. Under a traditional dataloading scheme, data is read item by item and collated into a batch. With PyTorch, multiple processes can create many batches at the same time. However, this still leads to many individual rows of tabular data being accessed independently, which impacts I/O, especially when this data is on the disk and not in the CPU memory. TensorFlow loads and shuffles TFRecords by adopting a windowed buffering scheme that loads data sequentially to a buffer, which it randomly samples batches and replenishes with the next sequential elements from the disk. Larger buffer sizes ensure more randomness, but can quickly bottleneck performance as TensorFlow tries to keep the buffer saturated. Smaller buffer sizes mean that datasets, which aren't uniformly distributed on the disk, lead to biased sampling and potentially degraded convergence.
 
-## HugeCTR Interoperability ##
+## HugeCTR Interoperability
 
 NVTabular is also capable of preprocessing datasets that can be passed to HugeCTR for training. For additional information, see the [HugeCTR Example Notebook](https://github.com/NVIDIA-Merlin/NVTabular/blob/main/examples/scaling-criteo/03-Training-with-HugeCTR.ipynb) for details about how this works.
 
-## Multi-GPU Support ##
+## Multi-GPU Support
 
 NVTabular supports multi-GPU scaling with [Dask-CUDA](https://github.com/rapidsai/dask-cuda) and [dask.distributed](https://distributed.dask.org/en/latest/). To enable distributed parallelism, the NVTabular `Workflow` must be initialized with a `dask.distributed.Client` object as follows:
 
@@ -39,41 +40,42 @@ Currently, there are many ways to deploy a "cluster" for Dask. This [article](ht
 
 Since NVTabular already uses [Dask-CuDF](https://docs.rapids.ai/api/cudf/stable/) for internal data processing, there are no other requirements for multi-GPU scaling. With that said, the parallel performance can depend strongly on (1) the size of `Dataset` partitions, (2) the shuffling procedure used for data output, and (3) the specific arguments used for both global-statistics and transformation operations. For additional information, see [Multi-GPU](https://github.com/NVIDIA/NVTabular/blob/main/examples/multi-gpu-toy-example/multi-gpu_dask.ipynb) for a simple step-by-step example.
 
-## Multi-Node Support ##
+## Multi-Node Support
 
 NVTabular supports multi-node scaling with [Dask-CUDA](https://github.com/rapidsai/dask-cuda) and [dask.distributed](https://distributed.dask.org/en/latest/). To enable distributed parallelism, start a cluster and connect to it to run the application by doing the following:
 
-1) Start the scheduler `dask-scheduler`.
-2) Start the workers `dask-cuda-worker schedulerIP:schedulerPort`.
-3) Run the NVTabular application where the NVTabular `Workflow` has been initialized as described in the Multi-GPU Support section.
+1. Start the scheduler `dask-scheduler`.
+2. Start the workers `dask-cuda-worker schedulerIP:schedulerPort`.
+3. Run the NVTabular application where the NVTabular `Workflow` has been initialized as described in the Multi-GPU Support section.
 
 For a detailed description of each existing method that is needed to start a cluster, please read this [article](https://blog.dask.org/2020/07/23/current-state-of-distributed-dask-clusters).
 
-## Multi-Hot Encoding and Pre-Existing Embeddings ##
+## Multi-Hot Encoding and Pre-Existing Embeddings
 
 NVTabular supports the:
 
-* processing of datasets with multi-hot categorical columns.
-* passing of continuous vector features like pre-trained embeddings, which includes basic preprocessing and feature engineering, as well as full support in the dataloaders for training models with both TensorFlow and PyTorch.
+- processing of datasets with multi-hot categorical columns.
+- passing of continuous vector features like pre-trained embeddings, which includes basic preprocessing and feature engineering, as well as full support in the dataloaders for training models with both TensorFlow and PyTorch.
 
 Multi-hot lets you represent a set of categories as a single feature. For example, in a movie recommendation system, each movie might have a list of genres associated with it like comedy, drama, horror, or science fiction. Since movies can belong to more than one genre, we can't use single-hot encoding like we are doing for scalar
 columns. Instead we train models with multi-hot embeddings for these features by having the deep learning model look up an embedding for each category in the list and then average all the embeddings for each row. Both multi-hot categoricals and vector continuous features are represented using list columns in our datasets. cuDF has recently added support for list columns, and we're leveraging that support in NVTabular to power this feature.
 
-Our Categorify and HashBucket operators can map list columns down to small contiguous integers, which are suitable for use in an embedding lookup table. This is only possible if the dataset contains two rows like ```[['comedy', 'horror'], ['comedy', 'sciencefiction']]``` so that NVTabular can transform the strings for each row into categorical IDs like ```[[0, 1], [0, 2]]``` to be used in our embedding layers.
+Our Categorify and HashBucket operators can map list columns down to small contiguous integers, which are suitable for use in an embedding lookup table. This is only possible if the dataset contains two rows like `[['comedy', 'horror'], ['comedy', 'sciencefiction']]` so that NVTabular can transform the strings for each row into categorical IDs like `[[0, 1], [0, 2]]` to be used in our embedding layers.
 
-Our PyTorch and TensorFlow dataloaders have been extended to handle both categorical and continuous list columns. In TensorFlow, the KerasSequenceLoader class will transform each list column into two tensors representing the values and offsets into those values for each batch. These tensors can be converted into RaggedTensors for multi-hot columns, and for vector continuous columns where the offsets tensor can be safely ignored. We've provided a ```nvtabular.framework_utils.tensorflow.layers.DenseFeatures``` Keras layer that will automatically handle these conversions for both continuous and categorical columns. For PyTorch, there's support for multi-hot columns to our ```nvtabular.framework_utils.torch.models.Model``` class, which internally is using the PyTorch [EmbeddingBag](https://pytorch.org/docs/stable/generated/torch.nn.EmbeddingBag.html) layer to handle the multi-hot columns.
+Our PyTorch and TensorFlow dataloaders have been extended to handle both categorical and continuous list columns. In TensorFlow, the KerasSequenceLoader class will transform each list column into two tensors representing the values and offsets into those values for each batch. These tensors can be converted into RaggedTensors for multi-hot columns, and for vector continuous columns where the offsets tensor can be safely ignored. We've provided a `nvtabular.framework_utils.tensorflow.layers.DenseFeatures` Keras layer that will automatically handle these conversions for both continuous and categorical columns. For PyTorch, there's support for multi-hot columns to our `nvtabular.framework_utils.torch.models.Model` class, which internally is using the PyTorch [EmbeddingBag](https://pytorch.org/docs/stable/generated/torch.nn.EmbeddingBag.html) layer to handle the multi-hot columns.
 
-## Shuffling Datasets ##
+## Shuffling Datasets
 
 NVTabular makes it possible to shuffle during dataset creation. This creates a uniformly shuffled dataset that allows the dataloader to load large contiguous chunks of data, which are already randomized across the entire dataset. NVTabular also makes it possible to control the number of chunks that are combined into a batch, providing flexibility when trading off between performance and true randomization. This mechanism is critical when dealing with datasets that exceed CPU memory and individual epoch shuffling is desired during training. Full shuffle of such a dataset can exceed training time for the epoch by several orders of magnitude.
 
-## Cloud Integration ##
+## Cloud Integration
 
 NVTabular offers cloud integration with Amazon Web Services (AWS) and Google Cloud Platform (GCP), giving you the ability to build, train, and deploy models on the cloud using datasets. For additional information, see [Amazon Web Services](./resources/cloud_integration.md#amazon-web-services) and [Google Cloud Platform](./resources/cloud_integration.md#google-cloud-platform).
 
-## CPU Support ##
+## CPU Support
 
 NVTabular supports CPU using [pandas](https://pandas.pydata.org/), [pyarrow](https://arrow.apache.org/docs/python/), and [dask dataframe](https://examples.dask.org/dataframe.html). To enable CPU, the Dataset class must be initialized with the `cpu` parameter as follows:
+
 ```
 dataset = Dataset(path, cpu=True)
 ```
diff --git a/docs/source/resources/architecture.md b/docs/source/resources/architecture.md
index 9acdb995e17..b044255f6f1 100644
--- a/docs/source/resources/architecture.md
+++ b/docs/source/resources/architecture.md
@@ -1,5 +1,4 @@
-Architecture
-============
+# Architecture
 
 The NVTabular engine uses the [RAPIDS](http://www.rapids.ai) [Dask-cuDF library](https://github.com/rapidsai/dask-cuda), which provides the bulk of the functionality for accelerating dataframe operations on the GPU and scaling across multiple GPUs. NVTabular provides functionality commonly found in deep learning recommendation workflows, allowing you to focus on what you want to do with your data, and not how you need to do it. NVTabular also provides a template for our core compute mechanism, which is referred to as Operations (ops), allowing you to build your own custom ops from cuDF and other libraries.
 
@@ -9,13 +8,13 @@ Once NVTabular is installed, the next step is to define the preprocessing and fe
 
 Operations are a reflection of the way in which compute happens on the GPU across large datasets. There are two types of compute:
 
-* the type that touches the entire dataset (or some large chunk of it)
-* the type that operates on a single row
+- the type that touches the entire dataset (or some large chunk of it)
+- the type that operates on a single row
 
 Operations split the compute into two phases:
 
-* Statistics Gathering is the first phase where operations that cross the row boundary can occur. An example of this would be in the Normalize op that relies on two statistics: mean and standard deviation. To normalize a row, we must first have these two values calculated using a Dask-cudf graph.
-* Transform is the second phase that uses the statistics, which were created earlier, to modify the dataset and transform the data. NVTabular allows for the application of transforms, which doesn't only take place during the modification of the dataset but also during dataloading. The same transforms can also be applied with Inference.
+- Statistics Gathering is the first phase where operations that cross the row boundary can occur. An example of this would be in the Normalize op that relies on two statistics: mean and standard deviation. To normalize a row, we must first have these two values calculated using a Dask-cudf graph.
+- Transform is the second phase that uses the statistics, which were created earlier, to modify the dataset and transform the data. NVTabular allows for the application of transforms, which doesn't only take place during the modification of the dataset but also during dataloading. The same transforms can also be applied with Inference.
 
 NVTabular's preprocessing and feature engineering workflows are directed graphs of operators, which are applied to user defined groups of columns. Defining this graph is decoupled from the Workflow class, and lets users easily define complicated graphs of operations on their own custom defined sets of columns. The NVTabular workflow uses an API similar to the one noted on [scikit-learn](https://scikit-learn.org/stable/data_transforms.html) for dataset transformations. Statistics are calculated using a 'fit' method and applied with a 'transform' method. The NVTabular Dataset object can handle both the input and output for datasets using the ‘transform’ method of the workflow, taking an input dataset and returning it as output in the form of a transformed dataset.
 
@@ -29,8 +28,9 @@ cont_features = CONT_COLUMNS >> <op1> >> <op2> >> ...
 ## A Higher Level of Abstraction
 
 The NVTabular code is targeted at the operator level and not the dataframe level, which provides a method for specifying the operation that you want to perform, as well as the columns or type of data that you want to perform it on. There are two types of operators:
-* Base Operator: It transforms columns using a transform method that processes the cudf dataframe object and a list of columns and returns the transformed cudf dataframe object. It also declares the columns that are produced using the ‘output_columns_names’ method and additional column groups using the ‘dependencies’ method.
-* StatOperator: A subclass that uses a 'fit' method to calculate statistics on a dataframe, a 'finalize' method to combine different statistics from various dask workers, and save/load methods to handle serialization.
+
+- Base Operator: It transforms columns using a transform method that processes the cudf dataframe object and a list of columns and returns the transformed cudf dataframe object. It also declares the columns that are produced using the ‘output_columns_names’ method and additional column groups using the ‘dependencies’ method.
+- StatOperator: A subclass that uses a 'fit' method to calculate statistics on a dataframe, a 'finalize' method to combine different statistics from various dask workers, and save/load methods to handle serialization.
 
 A flexible method is used for defining the operators in the workflow, which is treated as a directed acyclic graph of operators on a set of columns. Operators take in identical types of column sets and perform the operation across each column in which the output is transformed during the final operation into a long tensor for categorical variables or float tensor for continuous variables. Operators can be chained to allow for more complex feature engineering or preprocessing. Chaining operators to the ColumnGroup defines the graph, which is necessary to produce the output dataset. The chained operators replace the chained columns by transforming the columns while retaining the same column names.
 
@@ -69,6 +69,7 @@ We can easily convert this workflow definition into a graph, and visualize the f
 ```
 (cat_features+cont_features+label_feature).graph
 ```
+
 ![NVTabular Workflow Graph](/images/nvt_workflow_graph.png)
 
 The Rename operator can be used to change the names of columns. This operator provides several different options for renaming columns such as applying a user defined function to get new column names, as well as appending a suffix to each column. Refer to the API documentation for the {class}`Rename <nvtabular.ops.Rename>` operator.
diff --git a/docs/source/resources/cloud_integration.md b/docs/source/resources/cloud_integration.md
index d1201843c3d..0e7137a63eb 100644
--- a/docs/source/resources/cloud_integration.md
+++ b/docs/source/resources/cloud_integration.md
@@ -1,5 +1,4 @@
-Cloud Integration
-=================
+# Cloud Integration
 
 ```{contents}
 ---
@@ -9,13 +8,14 @@ backlinks: none
 ---
 ```
 
-## Amazon Web Services ##
+## Amazon Web Services
 
 Amazon Web Services (AWS) offers [EC2 instances with NVIDIA GPU support](https://aws.amazon.com/ec2/instance-types/#Accelerated_Computing). NVTabular can be used with 1x, 4x, or 8x GPU instances or multiple nodes. We're using an EC2 instance with 8x NVIDIA A100 GPUs to demonstrate the steps below. Check out the $/h for this instance type and adjust the type.
 
 To run NVTabular on the cloud using AWS, do the following:
 
 1. Start the AWS EC2 instance with the [NVIDIA Deep Learning AMI image](https://aws.amazon.com/marketplace/pp/NVIDIA-NVIDIA-Deep-Learning-AMI/B076K31M1S) using the aws-cli.
+
    ```
    # Starts the P4D instance with 8x NVIDIA A100 GPUs (take a look at the $/h for this instance type before using them)
    aws ec2 run-instances --image-id ami-04c0416d6bd8e4b1f --count 1 --instance-type p4d.24xlarge --key-name <MyKeyPair> --security-groups <my-sg>
@@ -42,6 +42,7 @@ To run NVTabular on the cloud using AWS, do the following:
    ```
 
 4. Launch the NVTabular Docker container by running the following command:
+
    ```
    docker run --gpus all --rm -it -p 8888:8888 -p 8797:8787 -p 8796:8786 --ipc=host --cap-add SYS_PTRACE -v /mnt/raid:/raid nvcr.io/nvidia/nvtabular:0.3 /bin/bash
    ```
@@ -51,16 +52,17 @@ To run NVTabular on the cloud using AWS, do the following:
    jupyter-lab --allow-root --ip='0.0.0.0' --NotebookApp.token='<password>'
    ```
 
-## Google Cloud Platform ##
+## Google Cloud Platform
 
 The Google Cloud Platform (GCP) offers [Compute Engine instances with NVIDIA GPU support](https://cloud.google.com/compute/docs/gpus). We're using a VM with 8x NVIDIA A100 GPUs and eight local SSD-NVMe devices configured as RAID 0 to demonstrate the steps below.
 
 To run NVTabular on the cloud using GCP, do the following:
 
 1. Configure and create the VM as follows:
-    * **GPU**: 8xA100 (a2-highgpu-8g)
-    * **Boot Disk**: Ubuntu version 18.04
-    * **Storage**: Local 8xSSD-NVMe
+
+   - **GPU**: 8xA100 (a2-highgpu-8g)
+   - **Boot Disk**: Ubuntu version 18.04
+   - **Storage**: Local 8xSSD-NVMe
 
 2. Install the NVIDIA drivers and CUDA by running the following commands:
 
@@ -108,7 +110,7 @@ To run NVTabular on the cloud using GCP, do the following:
    docker run --gpus all --rm -it -p 8888:8888 -p 8797:8787 -p 8796:8786 --ipc=host --cap-add SYS_PTRACE -v /mnt/raid:/raid nvcr.io/nvidia/nvtabular:0.3 /bin/bash
    ```
 
-## Databricks ##
+## Databricks
 
 Databricks has developed a web-based platform on top of Apache Spark to provide automated cluster management. Databricks currently supports [custom containers](https://docs.databricks.com/clusters/custom-containers.html)
 
@@ -127,23 +129,27 @@ To run NVTabular on Databricks, do the following:
    **NOTE**: All versions of the NVTabular conda repo are listed [here](https://anaconda.org/nvidia/nvtabular/files?version=).
 
 4. Clone the cloud-ml-example repo by running the following command:
+
    ```
    git clone https://github.com/rapidsai/cloud-ml-examples.git
    ```
 
 5. Add the selected version of the NVTabular Conda repo to the [rapids-spec.txt](https://github.com/rapidsai/cloud-ml-examples/blob/main/databricks/docker/rapids-spec.txt) file by
    running the following command:
+
    ```
    cd databricks
    echo "https://conda.anaconda.org/nvidia/linux-64/nvtabular-0.6.1-py38_0.tar.bz2" >> docker/rapids-spec.txt
    ```
 
 6. To install PyTorch, add the fastai pip package install to the Dockerfile by running the following command:
+
    ```
    RUN pip install fastai
    ```
 
 7. Build the container and push it to Docker Hub or the AWS Elastic Container Registry by running the following command:
+
    ```
    docker build --tag <repo_name>/databricks_nvtabular:latest docker push <repo_name>/databricks_nvtabular:latest
    ```
@@ -155,7 +161,7 @@ To run NVTabular on Databricks, do the following:
 9. Select a GPU node for the Worker and Driver.
    Once the Databricks cluster is up, NVTabular will be running inside of it.
 
-## AWS SageMaker ##
+## AWS SageMaker
 
 [AWS SageMaker](https://aws.amazon.com/sagemaker/) is a service from AWS to "build, train and deploy machine learning" models. It automates and manages the MLOps workflow. It supports jupyter notebook instances enabling users to work directly in jupyter notebook/jupyter lab without any additional configurations. In this section, we will explain how to run NVIDIA Merlin (NVTabular) on AWS SageMaker notebook instances. We adopted the work from [Eugene](https://twitter.com/eugeneyan/) from his [twitter post](https://twitter.com/eugeneyan/status/1470916049604268035). We tested the workflow on February, 1st, 2022, but it is not integrated into our CI workflows. Future release of Merlin or Merlin's dependencies can cause issues.
 
diff --git a/docs/source/resources/links.md b/docs/source/resources/links.md
index e843c1ff97c..f1689191a27 100644
--- a/docs/source/resources/links.md
+++ b/docs/source/resources/links.md
@@ -1,32 +1,28 @@
-Talks and Blog Posts
-====================
+# Talks and Blog Posts
 
 This is a collection of presentations, blog posts and documentation around NVIDIA Merlin (NVTabular and HugeCTR).
 
-Talks
------
+## Talks
 
-* [Part 6: "AI for Every Company" | GTC Keynote Oct 2020](https://www.youtube.com/watch?v=XiwVziNh_3s) Oct 5th, 2020
-* [A Deep Dive into the NVIDIA Merlin Recommendation Framework | GTC Session Oct 2020](https://www.nvidia.com/en-us/on-demand/session/gtcfall20-a21132/) Oct 6th, 2020
-* [Merlin: A GPU Accelerated Recommendation Framework | KDD 2020 Aug 2020](https://www.youtube.com/watch?v=Pi2kCE5xcpw&feature=youtu.be) Aug 22nd, 2020
-* [NVTabular: GPU Accelerated ETL for Recommender Systems | GTC Session May 2020](https://www.nvidia.com/en-us/on-demand/session/gtcsj20-s21651/) May 15th, 2020
-* [HugeCTR: High-Performance Click-Through Rate Estimation Training | GTC Session May 2020](https://www.nvidia.com/en-us/on-demand/session/gtcsj20-s21455/) May 15th, 2020
+- [Part 6: "AI for Every Company" | GTC Keynote Oct 2020](https://www.youtube.com/watch?v=XiwVziNh_3s) Oct 5th, 2020
+- [A Deep Dive into the NVIDIA Merlin Recommendation Framework | GTC Session Oct 2020](https://www.nvidia.com/en-us/on-demand/session/gtcfall20-a21132/) Oct 6th, 2020
+- [Merlin: A GPU Accelerated Recommendation Framework | KDD 2020 Aug 2020](https://www.youtube.com/watch?v=Pi2kCE5xcpw&feature=youtu.be) Aug 22nd, 2020
+- [NVTabular: GPU Accelerated ETL for Recommender Systems | GTC Session May 2020](https://www.nvidia.com/en-us/on-demand/session/gtcsj20-s21651/) May 15th, 2020
+- [HugeCTR: High-Performance Click-Through Rate Estimation Training | GTC Session May 2020](https://www.nvidia.com/en-us/on-demand/session/gtcsj20-s21455/) May 15th, 2020
 
-
-Blog posts
-----------
+## Blog posts
 
 We frequently post updates on [our blog](https://medium.com/nvidia-merlin) and on the [NVIDIA Developer Technical Blog](https://developer.nvidia.com/blog?r=1&tags=&categories=recommendation-systems).
 
 Some highlights:
 
-* [Why isn’t your recommender system training faster on GPU? (And what can you do about it?)](https://medium.com/nvidia-merlin/why-isnt-your-recommender-system-training-faster-on-gpu-and-what-can-you-do-about-it-6cb44a711ad4) Dec 3rd, 2020
-* [Democratizing Deep Learning Recommenders Resources](https://news.developer.nvidia.com/democratizing-deep-learning-recommenders-resources/?ncid=so-link-59588#cid=dl19_so-link_en-us) Dec 3rd, 2020
-* [NVIDIA Merlin Deepens Commitment to Deep Learning Recommenders with Latest Beta Update](https://news.developer.nvidia.com/nvidia-merlin-deepens-commitment-to-deep-learning-recommenders-with-latest-beta-update/), Nov 23rd, 2020
-* [NVIDIA Merlin Powers Fastest Commercially Available Solution for Recommender Systems Training](https://news.developer.nvidia.com/nvidia-merlin-powers-fastest-commercially-available-solution-for-recommender-systems-training/), Oct 6th, 2020
-* [Announcing the NVIDIA NVTabular Open Beta with Multi-GPU Support and New Data Loaders](https://developer.nvidia.com/blog/announcing-the-nvtabular-open-beta-with-multi-gpu-support-and-new-data-loaders/) Oct 5th, 2020
-* [Accelerating Recommender Systems Training with NVIDIA Merlin Open Beta](https://developer.nvidia.com/blog/accelerating-recommender-systems-training-with-nvidia-merlin-open-beta/) Oct 5th, 2020
-* [Better Predictions at Scale with Merlin](https://news.developer.nvidia.com/better-predictions-at-scale-with-merlin/) Aug 20th, 2020
-* [Developer Blog: Introducing NVIDIA Merlin HugeCTR](https://news.developer.nvidia.com/developer-blog-introducing-nvidia-merlin-hugectr/) July 28th, 2020
-* [Scalable Recommender Systems with NVTabular- A Fast Tabular Data Loading and Transformation Library](https://medium.com/rapids-ai/gpu-recommender-systems-with-nvtabular-eee056c37ea0) Jul 23rd, 2020
-* [Introducing NVIDIA Merlin HugeCTR: A Training Framework Dedicated to Recommender Systems](https://developer.nvidia.com/blog/introducing-merlin-hugectr-training-framework-dedicated-to-recommender-systems/) July 21st, 2020
+- [Why isn’t your recommender system training faster on GPU? (And what can you do about it?)](https://medium.com/nvidia-merlin/why-isnt-your-recommender-system-training-faster-on-gpu-and-what-can-you-do-about-it-6cb44a711ad4) Dec 3rd, 2020
+- [Democratizing Deep Learning Recommenders Resources](https://news.developer.nvidia.com/democratizing-deep-learning-recommenders-resources/?ncid=so-link-59588#cid=dl19_so-link_en-us) Dec 3rd, 2020
+- [NVIDIA Merlin Deepens Commitment to Deep Learning Recommenders with Latest Beta Update](https://news.developer.nvidia.com/nvidia-merlin-deepens-commitment-to-deep-learning-recommenders-with-latest-beta-update/), Nov 23rd, 2020
+- [NVIDIA Merlin Powers Fastest Commercially Available Solution for Recommender Systems Training](https://news.developer.nvidia.com/nvidia-merlin-powers-fastest-commercially-available-solution-for-recommender-systems-training/), Oct 6th, 2020
+- [Announcing the NVIDIA NVTabular Open Beta with Multi-GPU Support and New Data Loaders](https://developer.nvidia.com/blog/announcing-the-nvtabular-open-beta-with-multi-gpu-support-and-new-data-loaders/) Oct 5th, 2020
+- [Accelerating Recommender Systems Training with NVIDIA Merlin Open Beta](https://developer.nvidia.com/blog/accelerating-recommender-systems-training-with-nvidia-merlin-open-beta/) Oct 5th, 2020
+- [Better Predictions at Scale with Merlin](https://news.developer.nvidia.com/better-predictions-at-scale-with-merlin/) Aug 20th, 2020
+- [Developer Blog: Introducing NVIDIA Merlin HugeCTR](https://news.developer.nvidia.com/developer-blog-introducing-nvidia-merlin-hugectr/) July 28th, 2020
+- [Scalable Recommender Systems with NVTabular- A Fast Tabular Data Loading and Transformation Library](https://medium.com/rapids-ai/gpu-recommender-systems-with-nvtabular-eee056c37ea0) Jul 23rd, 2020
+- [Introducing NVIDIA Merlin HugeCTR: A Training Framework Dedicated to Recommender Systems](https://developer.nvidia.com/blog/introducing-merlin-hugectr-training-framework-dedicated-to-recommender-systems/) July 21st, 2020
diff --git a/examples/README.md b/examples/README.md
index 6b9168b3655..2701d86db48 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -11,6 +11,7 @@ In this library, we provide a collection of Jupyter notebooks, which demonstrate
 - [Running on multiple GPUs or on CPU](03-Running-on-multiple-GPUs-or-on-CPU.ipynb): Run NVTabular in different environments, such as multi-GPU or CPU-only mode.
 
 In addition, NVTabular is used in many of our examples in other Merlin libraries. You can explore more complex processing pipelines in following examples:
+
 - [End-To-End Examples with Merlin](https://github.com/NVIDIA-Merlin/Merlin/tree/main/examples)
 - [Training Examples with Merlin Models](https://github.com/NVIDIA-Merlin/models/tree/main/examples)
 - [Training Examples with Transformer4Rec](https://github.com/NVIDIA-Merlin/Transformers4Rec/tree/main/examples)

From caa06561646acfef123322deaf75c785e05c9162 Mon Sep 17 00:00:00 2001
From: Karl Higley <kmhigley@gmail.com>
Date: Fri, 13 Jan 2023 12:45:36 -0500
Subject: [PATCH 3/3] Apply formatting changes from `prettier`

---
 .github/workflows/cpu-ci.yml | 94 ++++++++++++++++++------------------
 .github/workflows/gpu-ci.yml | 30 ++++++------
 2 files changed, 62 insertions(+), 62 deletions(-)

diff --git a/.github/workflows/cpu-ci.yml b/.github/workflows/cpu-ci.yml
index 7a77b373e50..7d9f914b02b 100644
--- a/.github/workflows/cpu-ci.yml
+++ b/.github/workflows/cpu-ci.yml
@@ -18,52 +18,52 @@ jobs:
         os: [ubuntu-latest]
 
     steps:
-    - uses: actions/checkout@v3
-      with:
-       fetch-depth: 0
-    - name: Set up Python ${{ matrix.python-version }}
-      uses: actions/setup-python@v4
-      with:
-        python-version: ${{ matrix.python-version }}
-    - name: Install Ubuntu packages
-      run: |
-        sudo apt-get update -y
-        sudo apt-get install -y protobuf-compiler
-    - name: Install and upgrade python packages
-      run: |
-        python -m pip install --upgrade pip setuptools==59.4.0 wheel tox pybind11
-        python -m pip uninstall protobuf -y
-        python -m pip install --no-binary=protobuf protobuf
-    - name: Generate package for pypi
-      run: |
-        python setup.py sdist bdist_wheel
-    - name: Check distribution is valid
-      run: |
-        ./ci/check_dist.sh
-    - name: Run tests
-      run: |
-        ref_type=${{ github.ref_type }}
-        branch=main
-        if [[ $ref_type == "tag"* ]]
-        then
-          raw=$(git branch -r --contains ${{ github.ref_name }})
-          branch=${raw/origin\/}
-        fi
-        tox -e test-cpu -- $branch
-    - name: Upload pypi artifacts to github
-      uses: actions/upload-artifact@v3
-      with:
-        name: dist
-        path: dist
-    - name: Generate package for conda
-      id: conda_build
-      run: |
-        conda update conda
-        conda install conda-build pybind11
-        conda build --python ${{ matrix.python-version }} . -c defaults -c conda-forge -c numba -c rapidsai -c nvidia --output-folder ./conda_packages
-        export CONDA_PACKAGE=$(conda build --python ${{ matrix.python-version }} . -c defaults -c conda-forge -c numba -c rapidsai -c nvidia --output-folder ./conda_packages --output)
-        echo "conda_package : $CONDA_PACKAGE"
-        echo "conda_package=$CONDA_PACKAGE" >> $GITHUB_OUTPUT
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install Ubuntu packages
+        run: |
+          sudo apt-get update -y
+          sudo apt-get install -y protobuf-compiler
+      - name: Install and upgrade python packages
+        run: |
+          python -m pip install --upgrade pip setuptools==59.4.0 wheel tox pybind11
+          python -m pip uninstall protobuf -y
+          python -m pip install --no-binary=protobuf protobuf
+      - name: Generate package for pypi
+        run: |
+          python setup.py sdist bdist_wheel
+      - name: Check distribution is valid
+        run: |
+          ./ci/check_dist.sh
+      - name: Run tests
+        run: |
+          ref_type=${{ github.ref_type }}
+          branch=main
+          if [[ $ref_type == "tag"* ]]
+          then
+            raw=$(git branch -r --contains ${{ github.ref_name }})
+            branch=${raw/origin\/}
+          fi
+          tox -e test-cpu -- $branch
+      - name: Upload pypi artifacts to github
+        uses: actions/upload-artifact@v3
+        with:
+          name: dist
+          path: dist
+      - name: Generate package for conda
+        id: conda_build
+        run: |
+          conda update conda
+          conda install conda-build pybind11
+          conda build --python ${{ matrix.python-version }} . -c defaults -c conda-forge -c numba -c rapidsai -c nvidia --output-folder ./conda_packages
+          export CONDA_PACKAGE=$(conda build --python ${{ matrix.python-version }} . -c defaults -c conda-forge -c numba -c rapidsai -c nvidia --output-folder ./conda_packages --output)
+          echo "conda_package : $CONDA_PACKAGE"
+          echo "conda_package=$CONDA_PACKAGE" >> $GITHUB_OUTPUT
 
       - name: Upload conda artifacts to github
         uses: actions/upload-artifact@v3
@@ -135,4 +135,4 @@ jobs:
         env:
           ANACONDA_TOKEN: ${{ secrets.ANACONDA_TOKEN }}
         run: |
-          anaconda -t $ANACONDA_TOKEN upload -u nvidia conda/*.tar.bz2
\ No newline at end of file
+          anaconda -t $ANACONDA_TOKEN upload -u nvidia conda/*.tar.bz2
diff --git a/.github/workflows/gpu-ci.yml b/.github/workflows/gpu-ci.yml
index 079da77dcaa..c379199e0f5 100644
--- a/.github/workflows/gpu-ci.yml
+++ b/.github/workflows/gpu-ci.yml
@@ -3,11 +3,11 @@ name: GPU CI
 on:
   workflow_dispatch:
   push:
-    branches: [ main ]
+    branches: [main]
     tags:
       - v*
   pull_request:
-    branches: [ main ]
+    branches: [main]
     types: [opened, synchronize, reopened, closed]
 
 jobs:
@@ -15,16 +15,16 @@ jobs:
     runs-on: 2GPU
 
     steps:
-    - uses: actions/checkout@v3
-      with:
-        fetch-depth: 0
-    - name: Run tests
-      run: |
-        ref_type=${{ github.ref_type }}
-        branch=main
-        if [[ $ref_type == "tag"* ]]
-        then
-          raw=$(git branch -r --contains ${{ github.ref_name }})
-          branch=${raw/origin\/}
-        fi
-        cd ${{ github.workspace }}; tox -e test-gpu -- $branch
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+      - name: Run tests
+        run: |
+          ref_type=${{ github.ref_type }}
+          branch=main
+          if [[ $ref_type == "tag"* ]]
+          then
+            raw=$(git branch -r --contains ${{ github.ref_name }})
+            branch=${raw/origin\/}
+          fi
+          cd ${{ github.workspace }}; tox -e test-gpu -- $branch