diff --git a/.github/workflows/manual-pull-request-bot.yml b/.github/workflows/manual-pull-request-bot.yml
index 89e1852ce8..0f47f48158 100644
--- a/.github/workflows/manual-pull-request-bot.yml
+++ b/.github/workflows/manual-pull-request-bot.yml
@@ -38,11 +38,18 @@ jobs:
# it uploads the image as a build artifact for other jobs to download and use.
acquire-base-image:
name: Acquire Base Image
+ needs:
+ - get-pr-info
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
path: smithy-rs
+ # The ref used needs to match the HEAD revision of the PR being diffed, or else
+ # the `docker-build` action won't find the built Docker image. This has the unfortunate
+ # side effect that the codegen diff tool used is the one in the PR rather than in
+ # the branch this workflow was launched from.
+ ref: ${{ fromJSON(needs.get-pr-info.outputs.pull_data).head_revision }}
fetch-depth: 0
- name: Acquire base image
id: acquire
diff --git a/.github/workflows/pull-request-bot.yml b/.github/workflows/pull-request-bot.yml
index 98440e04ae..4c47ec754e 100644
--- a/.github/workflows/pull-request-bot.yml
+++ b/.github/workflows/pull-request-bot.yml
@@ -49,6 +49,7 @@ jobs:
- uses: actions/checkout@v3
with:
path: smithy-rs
+ ref: ${{ inputs.head_revision }}
- name: Generate diff
uses: ./smithy-rs/.github/actions/docker-build
with:
diff --git a/tools/Dockerfile b/tools/Dockerfile
index 9a45c5b0f5..bdf56fde33 100644
--- a/tools/Dockerfile
+++ b/tools/Dockerfile
@@ -11,37 +11,6 @@ ARG rust_nightly_version=nightly-2022-11-16
FROM ${base_image} AS bare_base_image
RUN yum -y updateinfo
-#
-# Node Installation Stage
-#
-FROM bare_base_image AS install_node
-ARG node_version=v16.14.0
-ENV DEST_PATH=/opt/nodejs \
- PATH=/opt/nodejs/bin:${PATH}
-RUN yum -y install \
- ca-certificates \
- curl \
- tar \
- xz && \
- yum clean all
-WORKDIR /root
-RUN set -eux; \
- ARCHITECTURE=""; \
- if [[ "$(uname -m)" == "aarch64" || "$(uname -m)" == "arm64" ]]; then \
- curl "https://nodejs.org/dist/${node_version}/node-${node_version}-linux-arm64.tar.xz" --output node.tar.xz; \
- echo "5a6e818c302527a4b1cdf61d3188408c8a3e4a1bbca1e3f836c93ea8469826ce node.tar.xz" | sha256sum --check; \
- ARCHITECTURE="arm64"; \
- else \
- curl "https://nodejs.org/dist/${node_version}/node-${node_version}-linux-x64.tar.xz" --output node.tar.xz; \
- echo "0570b9354959f651b814e56a4ce98d4a067bf2385b9a0e6be075739bc65b0fae node.tar.xz" | sha256sum --check; \
- ARCHITECTURE="x64"; \
- fi; \
- mkdir -p "${DEST_PATH}"; \
- tar -xJvf node.tar.xz -C "${DEST_PATH}"; \
- mv "${DEST_PATH}/node-${node_version}-linux-${ARCHITECTURE}/"* "${DEST_PATH}"; \
- rmdir "${DEST_PATH}"/node-${node_version}-linux-${ARCHITECTURE}; \
- rm node.tar.xz; \
- node --version
#
# Rust & Tools Installation Stage
@@ -102,6 +71,7 @@ RUN set -eux; \
cargo +${rust_nightly_version} -Z sparse-registry install --locked --path tools/publisher; \
cargo +${rust_nightly_version} -Z sparse-registry install --locked --path tools/changelogger; \
cargo +${rust_nightly_version} -Z sparse-registry install --locked --path tools/crate-hasher; \
+ cargo +${rust_nightly_version} -Z sparse-registry install --locked --path tools/difftags; \
cargo +${rust_nightly_version} -Z sparse-registry install --locked --path tools/sdk-lints; \
cargo +${rust_nightly_version} -Z sparse-registry install --locked --path tools/sdk-versioner; \
chmod g+rw -R /opt/cargo/registry
@@ -160,7 +130,6 @@ RUN set -eux; \
groupadd build; \
useradd -m -g build build; \
chmod 775 /home/build;
-COPY --chown=build:build --from=install_node /opt/nodejs /opt/nodejs
COPY --chown=build:build --from=local_tools /opt/cargo /opt/cargo
COPY --chown=build:build --from=cargo_deny /opt/cargo/bin/cargo-deny /opt/cargo/bin/cargo-deny
COPY --chown=build:build --from=cargo_udeps /opt/cargo/bin/cargo-udeps /opt/cargo/bin/cargo-udeps
@@ -169,7 +138,7 @@ COPY --chown=build:build --from=cargo_minimal_versions /opt/cargo/bin/cargo-mini
COPY --chown=build:build --from=cargo_check_external_types /opt/cargo/bin/cargo-check-external-types /opt/cargo/bin/cargo-check-external-types
COPY --chown=build:build --from=maturin /opt/cargo/bin/maturin /opt/cargo/bin/maturin
COPY --chown=build:build --from=install_rust /opt/rustup /opt/rustup
-ENV PATH=/opt/cargo/bin:/opt/nodejs/bin:$PATH \
+ENV PATH=/opt/cargo/bin:$PATH \
CARGO_HOME=/opt/cargo \
RUSTUP_HOME=/opt/rustup \
JAVA_HOME=/usr/lib/jvm/java-11-amazon-corretto.x86_64 \
@@ -185,7 +154,6 @@ ENV PATH=/opt/cargo/bin:/opt/nodejs/bin:$PATH \
# This is used primarily by the `build.gradle.kts` files in choosing how to execute build tools. If inside the image,
# they will assume the tools are on the PATH, but if outside of the image, they will `cargo run` the tools.
ENV SMITHY_RS_DOCKER_BUILD_IMAGE=1
-RUN npm install -g diff2html-cli@5.1.11 && pip3 install --no-cache-dir uvloop==0.16.0 aiohttp==3.8.1
WORKDIR /home/build
COPY ci-build/scripts/sanity-test /home/build/sanity-test
RUN /home/build/sanity-test
diff --git a/tools/ci-build/scripts/check-tools b/tools/ci-build/scripts/check-tools
index 9ba309eeab..434e294c6d 100755
--- a/tools/ci-build/scripts/check-tools
+++ b/tools/ci-build/scripts/check-tools
@@ -25,6 +25,7 @@ function test_tool {
test_tool "tools/changelogger" "${RUST_STABLE_VERSION}"
test_tool "tools/ci-cdk/canary-runner" "${RUST_STABLE_VERSION}"
test_tool "tools/crate-hasher" "${RUST_STABLE_VERSION}"
+test_tool "tools/difftags" "${RUST_STABLE_VERSION}"
test_tool "tools/publisher" "${RUST_STABLE_VERSION}"
test_tool "tools/sdk-lints" "${RUST_STABLE_VERSION}"
test_tool "tools/sdk-versioner" "${RUST_STABLE_VERSION}"
diff --git a/tools/ci-build/scripts/sanity-test b/tools/ci-build/scripts/sanity-test
index c46c1931f1..8cd77e925a 100755
--- a/tools/ci-build/scripts/sanity-test
+++ b/tools/ci-build/scripts/sanity-test
@@ -9,10 +9,9 @@ set -eux
cargo --version
changelogger --version
crate-hasher --version
-diff2html --version
+difftags --version
git --version
java --version
-node --version
publisher --version
python3 --version
rustc +"${RUST_NIGHTLY_VERSION}" --version
diff --git a/tools/codegen-diff-revisions.py b/tools/codegen-diff-revisions.py
index 431e43f068..ae2bc42011 100755
--- a/tools/codegen-diff-revisions.py
+++ b/tools/codegen-diff-revisions.py
@@ -35,7 +35,7 @@
HEAD_BRANCH_NAME = "__tmp-localonly-head"
BASE_BRANCH_NAME = "__tmp-localonly-base"
-OUTPUT_PATH = "tmp-codegen-diff/"
+OUTPUT_PATH = "tmp-codegen-diff"
COMMIT_AUTHOR_NAME = "GitHub Action (generated code preview)"
COMMIT_AUTHOR_EMAIL = "generated-code-action@github.com"
@@ -99,9 +99,9 @@ def generate_and_commit_generated_code(revision_sha):
# Move generated code into codegen-diff/ directory
run(f"rm -rf {OUTPUT_PATH}")
run(f"mkdir {OUTPUT_PATH}")
- run(f"mv aws/sdk/build/aws-sdk {OUTPUT_PATH}")
- run(f"mv codegen-server-test/build/smithyprojections/codegen-server-test {OUTPUT_PATH}")
- run(f"mv codegen-server-test/python/build/smithyprojections/codegen-server-test-python {OUTPUT_PATH}")
+ run(f"mv aws/sdk/build/aws-sdk {OUTPUT_PATH}/")
+ run(f"mv codegen-server-test/build/smithyprojections/codegen-server-test {OUTPUT_PATH}/")
+ run(f"mv codegen-server-test/python/build/smithyprojections/codegen-server-test-python {OUTPUT_PATH}/")
# Clean up the server-test folder
run(f"rm -rf {OUTPUT_PATH}/codegen-server-test/source")
@@ -120,61 +120,27 @@ def generate_and_commit_generated_code(revision_sha):
f"commit --no-verify -m 'Generated code for {revision_sha}' --allow-empty")
-# Writes an HTML template for diff2html so that we can add contextual information
-def write_html_template(title, subtitle, tmp_file):
- tmp_file.writelines(map(lambda line: line.encode(), [
- "",
- "",
- "
",
- ' ',
- f' Codegen diff for the {title}: {subtitle}',
- ' ',
- ' ',
- ' ',
- ' ',
- "",
- "",
- f" Codegen diff for the {title}
",
- f" {subtitle}
",
- ' ',
- ' ',
- '
',
- "",
- "",
- ]))
- tmp_file.flush()
-
-
def make_diff(title, path_to_diff, base_commit_sha, head_commit_sha, suffix, whitespace):
whitespace_flag = "" if whitespace else "-b"
diff_exists = get_cmd_status(f"git diff --quiet {whitespace_flag} "
f"{BASE_BRANCH_NAME} {HEAD_BRANCH_NAME} -- {path_to_diff}")
if diff_exists == 0:
- eprint(f"No diff output for {base_commit_sha}..{head_commit_sha}")
+ eprint(f"No diff output for {base_commit_sha}..{head_commit_sha} ({suffix})")
return None
else:
- run(f"mkdir -p {OUTPUT_PATH}/{base_commit_sha}/{head_commit_sha}")
- dest_path = f"{base_commit_sha}/{head_commit_sha}/diff-{suffix}.html"
+ partial_output_path = f"{base_commit_sha}/{head_commit_sha}/{suffix}"
+ full_output_path = f"{OUTPUT_PATH}/{partial_output_path}"
+ run(f"mkdir -p {full_output_path}")
+ run(f"git diff --output=codegen-diff.txt -U30 {whitespace_flag} {BASE_BRANCH_NAME} {HEAD_BRANCH_NAME} -- {path_to_diff}")
+
+ # Generate HTML diff. This uses the `difftags` tool from the `tools/` directory.
+ # All arguments after the first `--` go to the `git diff` command.
whitespace_context = "" if whitespace else "(ignoring whitespace)"
- with tempfile.NamedTemporaryFile() as tmp_file:
- write_html_template(title, f"rev. {head_commit_sha} {whitespace_context}", tmp_file)
-
- # Generate HTML diff. This uses the diff2html-cli, which defers to `git diff` under the hood.
- # All arguments after the first `--` go to the `git diff` command.
- diff_cmd = f"diff2html -s line -f html -d word -i command --hwt "\
- f"{tmp_file.name} -F {OUTPUT_PATH}/{dest_path} -- "\
- f"-U20 {whitespace_flag} {BASE_BRANCH_NAME} {HEAD_BRANCH_NAME} -- {path_to_diff}"
- eprint(f"Running diff cmd: {diff_cmd}")
- run(diff_cmd)
- return dest_path
+ subtitle = f"rev. {head_commit_sha} {whitespace_context}"
+ diff_cmd = f"difftags --output-dir {full_output_path} --title \"{title}\" --subtitle \"{subtitle}\" codegen-diff.txt"
+ eprint(f"Running diff cmd: {diff_cmd}")
+ run(diff_cmd)
+ return f"{partial_output_path}/index.html"
def diff_link(diff_text, empty_diff_text, diff_location, alternate_text, alternate_location):
diff --git a/tools/difftags/Cargo.lock b/tools/difftags/Cargo.lock
new file mode 100644
index 0000000000..849338e259
--- /dev/null
+++ b/tools/difftags/Cargo.lock
@@ -0,0 +1,303 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 3
+
+[[package]]
+name = "aho-corasick"
+version = "0.7.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc936419f96fa211c1b9166887b38e5e40b19958e5b895be7c1f93adec7071ac"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "winapi",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "clap"
+version = "3.2.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "71655c45cb9845d3270c9d6df84ebe72b4dad3c2ba3f7023ad47c144e4e473a5"
+dependencies = [
+ "atty",
+ "bitflags",
+ "clap_derive",
+ "clap_lex",
+ "indexmap",
+ "once_cell",
+ "strsim",
+ "termcolor",
+ "textwrap",
+]
+
+[[package]]
+name = "clap_derive"
+version = "3.2.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea0c8bce528c4be4da13ea6fead8965e95b6073585a2f05204bd8f4119f82a65"
+dependencies = [
+ "heck",
+ "proc-macro-error",
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "clap_lex"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
+dependencies = [
+ "os_str_bytes",
+]
+
+[[package]]
+name = "difftags"
+version = "0.1.0"
+dependencies = [
+ "clap",
+ "html-escape",
+ "unidiff",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.12.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a9ee70c43aaf417c914396645a0fa852624801b24ebb7ae78fe8272889ac888"
+
+[[package]]
+name = "heck"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2540771e65fc8cb83cd6e8a237f70c319bd5c29f78ed1084ba5d50eeac86f7f9"
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "html-escape"
+version = "0.2.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6d1ad449764d627e22bfd7cd5e8868264fc9236e07c752972b4080cd351cb476"
+dependencies = [
+ "utf8-width",
+]
+
+[[package]]
+name = "indexmap"
+version = "1.9.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1885e79c1fc4b10f0e172c475f458b7f7b93061064d98c3293e98c5ba0c8b399"
+dependencies = [
+ "autocfg",
+ "hashbrown",
+]
+
+[[package]]
+name = "lazy_static"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
+
+[[package]]
+name = "libc"
+version = "0.2.139"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "201de327520df007757c1f0adce6e827fe8562fbc28bfd9c15571c66ca1f5f79"
+
+[[package]]
+name = "memchr"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
+
+[[package]]
+name = "once_cell"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f61fba1741ea2b3d6a1e3178721804bb716a68a6aeba1149b5d52e3d464ea66"
+
+[[package]]
+name = "os_str_bytes"
+version = "6.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee"
+
+[[package]]
+name = "proc-macro-error"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c"
+dependencies = [
+ "proc-macro-error-attr",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro-error-attr"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "version_check",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.50"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ef7d57beacfaf2d8aee5937dab7b7f28de3cb8b1828479bb5de2a7106f2bae2"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8856d8364d252a14d474036ea1358d63c9e6965c8e5c1885c18f73d70bff9c7b"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "regex"
+version = "1.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "48aaa5748ba571fb95cd2c85c09f629215d3a6ece942baa100950af03a34f733"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "456c603be3e8d448b072f410900c09faf164fbce2d480456f50eea6e25f9c848"
+
+[[package]]
+name = "strsim"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
+
+[[package]]
+name = "syn"
+version = "1.0.107"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f4064b5b16e03ae50984a5a8ed5d4f8803e6bc1fd170a3cda91a1be4b18e3f5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "termcolor"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "textwrap"
+version = "0.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "222a222a5bfe1bba4a77b45ec488a741b3cb8872e5e499451fd7d0129c9c7c3d"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "84a22b9f218b40614adcb3f4ff08b703773ad44fa9423e4e0d346d5db86e4ebc"
+
+[[package]]
+name = "unidiff"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8a62719acf1933bfdbeb73a657ecd9ecece70b405125267dd549e2e2edc232c"
+dependencies = [
+ "lazy_static",
+ "regex",
+]
+
+[[package]]
+name = "utf8-width"
+version = "0.1.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5190c9442dcdaf0ddd50f37420417d219ae5261bbf5db120d0f9bab996c9cba1"
+
+[[package]]
+name = "version_check"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
+
+[[package]]
+name = "winapi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
+dependencies = [
+ "winapi-i686-pc-windows-gnu",
+ "winapi-x86_64-pc-windows-gnu",
+]
+
+[[package]]
+name = "winapi-i686-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
+
+[[package]]
+name = "winapi-util"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
+dependencies = [
+ "winapi",
+]
+
+[[package]]
+name = "winapi-x86_64-pc-windows-gnu"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
diff --git a/tools/difftags/Cargo.toml b/tools/difftags/Cargo.toml
new file mode 100644
index 0000000000..ba02ba8acf
--- /dev/null
+++ b/tools/difftags/Cargo.toml
@@ -0,0 +1,16 @@
+[package]
+name = "difftags"
+version = "0.1.0"
+edition = "2021"
+publish = false
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[profile.release]
+# prefer fast compile time over runtime performance
+opt-level = 0
+
+[dependencies]
+clap = { version = "3.2.23", features = ["derive"] }
+html-escape = { version = "0.2.13", default-features = false }
+unidiff = { version = "0.3.3", default-features = false }
diff --git a/tools/difftags/README.md b/tools/difftags/README.md
new file mode 100644
index 0000000000..2b74c36ebf
--- /dev/null
+++ b/tools/difftags/README.md
@@ -0,0 +1,4 @@
+difftags
+========
+
+Simple CLI tool to convert a unified diff file into human readable/browsable paginated HTML files.
diff --git a/tools/difftags/src/difftags.css b/tools/difftags/src/difftags.css
new file mode 100644
index 0000000000..c1d8a5c819
--- /dev/null
+++ b/tools/difftags/src/difftags.css
@@ -0,0 +1,65 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+body {
+ font-family: sans-serif;
+}
+
+.file {
+ border: 1px solid #d8d8d8;
+ border-radius: 10px;
+ margin: 15px 10px;
+ background-color: #f7f7f7;
+}
+
+.file-name {
+ display: block;
+ border-bottom: 1px solid #d8d8d8;
+ padding: 10px;
+ margin: 0;
+}
+
+.hidden {
+ display: none !important;
+}
+
+.context-row {
+ display: block;
+ padding: 10px;
+ background-color: #f8fafd;
+ color: #666;
+ border-bottom: 1px solid #d8d8d8;
+ font-family: monospace;
+}
+
+.diff td {
+ vertical-align: top;
+ padding: 1px 5px;
+}
+
+.lineno {
+ width: 140px;
+ max-width: 140px;
+}
+
+.lr { background-color: #fee8e9; } /* line removed */
+.la { background-color: #dfd; } /* line added */
+
+.pagination {
+ font-size: 2em;
+}
+
+.current-page {
+ font-weight: bold;
+}
+
+.hljs {
+ padding: 0 !important;
+ background: inherit;
+}
+
+pre {
+ margin: 0;
+}
diff --git a/tools/difftags/src/difftags.js b/tools/difftags/src/difftags.js
new file mode 100644
index 0000000000..5d82e3d4e9
--- /dev/null
+++ b/tools/difftags/src/difftags.js
@@ -0,0 +1,13 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+function expandTable(event, id) {
+ event.preventDefault();
+ const table = document.querySelector(`#${id}`);
+ table.classList.remove("hidden");
+
+ const expander = document.querySelector(`#${id}-exp`);
+ expander.classList.add("hidden");
+}
diff --git a/tools/difftags/src/html.rs b/tools/difftags/src/html.rs
new file mode 100644
index 0000000000..e56c93a4e5
--- /dev/null
+++ b/tools/difftags/src/html.rs
@@ -0,0 +1,234 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+use crate::page::{File, Page};
+use html_escape::encode_safe;
+use std::fs;
+use std::io::{Result, Write};
+use std::path::{Path, PathBuf};
+use unidiff::Line;
+
+pub fn write_html(
+ output_dir: &Path,
+ title: Option,
+ subtitle: Option,
+ pages: &[Page],
+) -> Result<()> {
+ for (page_num, page) in pages.iter().enumerate() {
+ let file_path = file_path(output_dir, page_num);
+ let mut file = fs::File::create(&file_path)?;
+
+ write_header(&mut file, title.as_deref(), subtitle.as_deref(), pages)?;
+ for (file_num, page_file) in page.files.iter().enumerate() {
+ write_file(&mut file, page_file, page_num, file_num)?;
+ }
+ write_footer(&mut file, page_num, pages.len())?;
+ }
+ Ok(())
+}
+
+fn file_id(page_num: usize, file_num: usize) -> String {
+ format!("file-{page_num}-{file_num}")
+}
+
+fn file_name(page_num: usize) -> String {
+ match page_num {
+ 0 => "index.html".into(),
+ _ => format!("index_page_{page_num}.html"),
+ }
+}
+
+fn file_path(output_dir: &Path, page_num: usize) -> PathBuf {
+ output_dir.join(file_name(page_num))
+}
+
+fn write_header(
+ mut w: W,
+ title: Option<&str>,
+ subtitle: Option<&str>,
+ pages: &[Page],
+) -> Result<()> {
+ let title = encode_safe(title.unwrap_or("Diff"));
+ writeln!(w, "")?;
+ writeln!(w, "")?;
+ writeln!(w, "")?;
+ writeln!(w, " ")?;
+ writeln!(w, " {title}",)?;
+ writeln!(w, " ")?;
+ writeln!(w, " ")?;
+ writeln!(w, " ", include_str!("difftags.css"))?;
+ writeln!(w, " ", include_str!("difftags.js"))?;
+ writeln!(w, "")?;
+ writeln!(w, "")?;
+ writeln!(w, " {title}
")?;
+ if let Some(subtitle) = subtitle {
+ writeln!(w, " {subtitle}
")?;
+ }
+
+ writeln!(w, " Files changed:
")?;
+ writeln!(w, " ")?;
+ for (page_num, page) in pages.iter().enumerate() {
+ for (file_num, page_file) in page.files.iter().enumerate() {
+ writeln!(
+ w,
+ " - {}
",
+ file_name(page_num),
+ file_id(page_num, file_num),
+ encode_safe(page_file.name())
+ )?;
+ }
+ }
+ writeln!(w, "
")?;
+
+ Ok(())
+}
+
+fn write_footer(mut w: W, page_num: usize, page_count: usize) -> Result<()> {
+ writeln!(w, " ")?;
+ writeln!(w, " ")?;
+ writeln!(w, "")?;
+ writeln!(w, "")
+}
+
+fn write_file(mut w: W, file: &File, page_num: usize, file_num: usize) -> Result<()> {
+ writeln!(w, " ")?;
+ writeln!(
+ w,
+ "
",
+ file_id(page_num, file_num),
+ encode_safe(file.name())
+ )?;
+ if let File::Modified { old_name, .. } = file {
+ if file.name() != old_name {
+ writeln!(
+ w,
+ "
Renamed from {}",
+ encode_safe(old_name)
+ )?;
+ }
+ }
+
+ for (section_num, section) in file.sections().iter().enumerate() {
+ writeln!(
+ w,
+ "
@@ -{},{} +{},{} @@
",
+ section.start_line.0, section.start_line.1, section.end_line.0, section.end_line.1
+ )?;
+ if let Some(context_prefix) = §ion.context_prefix {
+ write_diff_table(
+ &mut w,
+ context_prefix,
+ DiffTableType::Prefix,
+ page_num,
+ file_num,
+ section_num * 10000 + 1,
+ )?;
+ }
+ write_diff_table(
+ &mut w,
+ §ion.diff,
+ DiffTableType::Main,
+ page_num,
+ file_num,
+ section_num * 10000 + 2,
+ )?;
+ if let Some(context_suffix) = §ion.context_suffix {
+ write_diff_table(
+ &mut w,
+ context_suffix,
+ DiffTableType::Suffix,
+ page_num,
+ file_num,
+ section_num * 10000 + 3,
+ )?;
+ }
+ }
+ writeln!(w, " ")?;
+ Ok(())
+}
+
+#[derive(Debug, Eq, PartialEq)]
+enum DiffTableType {
+ Prefix,
+ Main,
+ Suffix,
+}
+
+fn write_diff_table(
+ mut w: W,
+ lines: &[Line],
+ typ: DiffTableType,
+ page_num: usize,
+ file_num: usize,
+ table_num: usize,
+) -> Result<()> {
+ let table_id = format!("cd-{page_num}-{file_num}-{table_num}");
+ if typ != DiffTableType::Main {
+ writeln!(
+ w,
+ " ",
+ match typ {
+ DiffTableType::Prefix => "↥",
+ DiffTableType::Suffix => "↧",
+ _ => unreachable!(),
+ }
+ )?;
+ }
+ writeln!(
+ w,
+ " ",
+ if typ != DiffTableType::Main {
+ " hidden"
+ } else {
+ ""
+ }
+ )?;
+ for line in lines {
+ write_line(&mut w, line)?;
+ }
+ writeln!(w, "
")
+}
+
+fn write_line(mut w: W, line: &Line) -> Result<()> {
+ write!(
+ w,
+ " ",
+ match line.line_type.as_str() {
+ "-" => " class=\"lr\"",
+ "+" => " class=\"la\"",
+ _ => "",
+ }
+ )?;
+ write!(
+ w,
+ "{:>5} {:>5} {} | ",
+ line.source_line_no
+ .map(|n| n.to_string())
+ .unwrap_or_else(|| "".to_string()),
+ line.target_line_no
+ .map(|n| n.to_string())
+ .unwrap_or_else(|| "".to_string()),
+ line.line_type
+ )?;
+ writeln!(
+ w,
+ "{}
|
",
+ encode_safe(&line.value)
+ )
+}
diff --git a/tools/difftags/src/main.rs b/tools/difftags/src/main.rs
new file mode 100644
index 0000000000..8e675ec282
--- /dev/null
+++ b/tools/difftags/src/main.rs
@@ -0,0 +1,90 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+use crate::page::{File, Page, PageTracker};
+use clap::Parser;
+use std::fs;
+use std::path::PathBuf;
+use std::process;
+use unidiff::PatchSet;
+
+mod html;
+mod page;
+
+#[derive(Debug, Parser)]
+#[clap(name = "difftags", version)]
+#[clap(about = "Diff to HTML conversion tool")]
+struct Cli {
+ /// Directory to output to
+ #[clap(short, long)]
+ output_dir: PathBuf,
+
+ /// Diff file to convert to HTML, in unified diff format
+ input: PathBuf,
+
+ /// Maximum files per page of HTML
+ #[clap(long, default_value = "15")]
+ max_files_per_page: usize,
+
+ /// Maximum modified lines per page of HTML
+ #[clap(long, default_value = "1000")]
+ max_lines_per_page: usize,
+
+ /// Title to apply to the diff
+ #[clap(long)]
+ title: Option,
+
+ /// Optional subtitle to appear under the title
+ #[clap(long)]
+ subtitle: Option,
+}
+
+fn main() {
+ let args = Cli::parse();
+ let diff_str = match fs::read_to_string(args.input) {
+ Ok(diff_str) => diff_str,
+ Err(err) => {
+ eprintln!("failed to load the input diff file: {err}");
+ process::exit(1)
+ }
+ };
+
+ let mut patch = PatchSet::new();
+ if let Err(err) = patch.parse(&diff_str) {
+ eprintln!("failed to parse the input diff file: {err}");
+ process::exit(1)
+ }
+
+ let mut pages = Vec::new();
+ let mut page_tracker = PageTracker::new(args.max_files_per_page, args.max_lines_per_page);
+ let mut current_page = Page::default();
+ for patched_file in patch {
+ if page_tracker.next_file_is_page_boundary() {
+ pages.push(current_page);
+ current_page = Page::default();
+ page_tracker.reset();
+ }
+ let file: File = patched_file.into();
+ page_tracker.total_modified_lines(
+ file.sections()
+ .iter()
+ .map(|section| {
+ section
+ .diff
+ .iter()
+ .filter(|line| line.line_type != " ")
+ .count()
+ })
+ .sum(),
+ );
+ current_page.files.push(file);
+ }
+ pages.push(current_page);
+
+ if let Err(err) = html::write_html(&args.output_dir, args.title, args.subtitle, &pages) {
+ eprintln!("failed to write HTML: {err}");
+ process::exit(1)
+ }
+}
diff --git a/tools/difftags/src/page.rs b/tools/difftags/src/page.rs
new file mode 100644
index 0000000000..94ab30588c
--- /dev/null
+++ b/tools/difftags/src/page.rs
@@ -0,0 +1,316 @@
+/*
+ * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+use unidiff::{Hunk, Line, PatchedFile};
+
+/// Number of lines before and after the first modified and last modified lines in a diff that will
+/// be displayed by default. Lines outside of this range will be hidden by default (but can be shown
+/// by clicking a link to expand the context).
+const DISPLAYED_CONTEXT_LINES: usize = 10;
+
+#[derive(Debug, Default)]
+pub struct PageTracker {
+ max_files_per_page: usize,
+ max_lines_per_page: usize,
+ files: usize,
+ lines: usize,
+}
+
+impl PageTracker {
+ pub fn new(max_files_per_page: usize, max_lines_per_page: usize) -> Self {
+ Self {
+ max_files_per_page,
+ max_lines_per_page,
+ files: 0,
+ lines: 0,
+ }
+ }
+
+ pub fn next_file_is_page_boundary(&mut self) -> bool {
+ self.files += 1;
+ self.files > self.max_files_per_page || self.lines >= self.max_lines_per_page
+ }
+
+ pub fn total_modified_lines(&mut self, lines: usize) {
+ self.lines += lines;
+ }
+
+ pub fn reset(&mut self) {
+ self.files = 0;
+ self.lines = 0;
+ }
+}
+
+#[derive(Debug, Default)]
+pub struct Page {
+ pub files: Vec,
+}
+
+#[derive(Debug)]
+pub enum File {
+ New {
+ name: String,
+ sections: Vec,
+ },
+ Removed {
+ name: String,
+ sections: Vec,
+ },
+ Modified {
+ old_name: String,
+ new_name: String,
+ sections: Vec,
+ },
+}
+
+impl File {
+ pub fn name(&self) -> &str {
+ match self {
+ Self::New { name, .. } => name,
+ Self::Removed { name, .. } => name,
+ Self::Modified { new_name, .. } => new_name,
+ }
+ }
+ pub fn sections(&self) -> &[Section] {
+ match self {
+ Self::New { sections, .. } => sections.as_ref(),
+ Self::Removed { sections, .. } => sections.as_ref(),
+ Self::Modified { sections, .. } => sections.as_ref(),
+ }
+ }
+}
+
+impl From for File {
+ fn from(patched_file: PatchedFile) -> Self {
+ let sections = patched_file.hunks().iter().map(Section::from).collect();
+ let source = patched_file
+ .source_file
+ .strip_prefix("a/")
+ .unwrap_or(&patched_file.source_file);
+ let target = patched_file
+ .target_file
+ .strip_prefix("b/")
+ .unwrap_or(&patched_file.target_file);
+ if source == "/dev/null" {
+ File::New {
+ name: target.into(),
+ sections,
+ }
+ } else if target == "/dev/null" {
+ File::Removed {
+ name: source.into(),
+ sections,
+ }
+ } else {
+ File::Modified {
+ old_name: source.into(),
+ new_name: target.into(),
+ sections,
+ }
+ }
+ }
+}
+
+#[derive(Debug)]
+pub struct Section {
+ pub start_line: (usize, usize),
+ pub context_prefix: Option>,
+ pub diff: Vec,
+ pub context_suffix: Option>,
+ pub end_line: (usize, usize),
+}
+
+impl From<&Hunk> for Section {
+ fn from(hunk: &Hunk) -> Self {
+ assert!(!hunk.lines().is_empty());
+ let mut diff_start = None;
+ let mut suffix_start = None;
+ for (index, line) in hunk.lines().iter().enumerate() {
+ if diff_start.is_none() {
+ if line.line_type != " " {
+ diff_start = Some(index);
+ }
+ } else if suffix_start.is_some() && line.line_type != " " {
+ suffix_start = None;
+ } else if suffix_start.is_none() && line.line_type == " " {
+ suffix_start = Some(index);
+ }
+ }
+
+ let diff_start = diff_start.unwrap().saturating_sub(DISPLAYED_CONTEXT_LINES);
+ let suffix_start = usize::min(
+ hunk.lines().len(),
+ suffix_start
+ .unwrap_or_else(|| hunk.lines().len())
+ .saturating_add(DISPLAYED_CONTEXT_LINES),
+ );
+
+ let context_prefix: Vec = (&hunk.lines()[0..diff_start]).into();
+ let lines: Vec = (&hunk.lines()[diff_start..suffix_start]).into();
+ let context_suffix: Vec = (&hunk.lines()[suffix_start..]).into();
+ let end_line = &hunk.lines()[hunk.lines().len() - 1];
+
+ Self {
+ start_line: (
+ hunk.lines()[0].source_line_no.unwrap_or_default(),
+ hunk.lines()[0].target_line_no.unwrap_or_default(),
+ ),
+ context_prefix: if context_prefix.is_empty() {
+ None
+ } else {
+ Some(context_prefix)
+ },
+ diff: lines,
+ context_suffix: if context_suffix.is_empty() {
+ None
+ } else {
+ Some(context_suffix)
+ },
+ end_line: (
+ end_line.source_line_no.unwrap_or_default(),
+ end_line.target_line_no.unwrap_or_default(),
+ ),
+ }
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+ use unidiff::PatchSet;
+
+ #[test]
+ fn test_hunk_to_section() {
+ let diff_str = r#"
+diff --git a/some/path/to/file.rs b/some/path/to/file.rs
+index 422b64415..9561909ed 100644
+--- a/some/path/to/file.rs
++++ b/some/path/to/file.rs
+@@ -1,31 +1,31 @@
+ 00
+ 01
+ 02
+ 03
+ 04
+ 05
+ 06
+ 07
+ 08
+ 09
+ 10
+ 11
+ 12
+ 13
+ 14
+-oops
++15
+ 16
+ 17
+ 18
+ 19
+ 20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 30
+ "#;
+ let mut patch = PatchSet::new();
+ patch.parse(&diff_str).unwrap();
+
+ let hunk = &patch.files()[0].hunks()[0];
+ let section: Section = hunk.into();
+
+ assert_eq!((1, 1), section.start_line);
+ assert_eq!((31, 31), section.end_line);
+ assert_eq!(5, section.context_prefix.as_ref().unwrap().len());
+ assert_eq!(22, section.diff.len());
+ assert_eq!(
+ "05", section.diff[0].value,
+ "the first line of the diff should be {DISPLAYED_CONTEXT_LINES} lines before the first modified line"
+ );
+ assert_eq!(
+ "25", section.diff[21].value,
+ "the last line of the diff should be {DISPLAYED_CONTEXT_LINES} lines after the last modified line"
+ );
+ assert_eq!(5, section.context_suffix.as_ref().unwrap().len());
+ assert_eq!("26", section.context_suffix.as_ref().unwrap()[0].value);
+ assert_eq!("30", section.context_suffix.as_ref().unwrap()[4].value);
+
+ let diff_str = r#"
+diff --git a/some/path/to/file.rs b/some/path/to/file.rs
+index 422b64415..9561909ed 100644
+--- a/some/path/to/file.rs
++++ b/some/path/to/file.rs
+@@ -1,38 +1,36 @@
+ 00
+ 01
+ 02
+ 03
+ 04
+ 05
+ 06
+ 07
+ 08
+ 09
+ 10
+ 11
+ 12
+ 13
+ 14
+-oops
++15
+ 16
+ 17
+ 18
+ 19
+-oops1
+-oops2
+-oops3
++20
+ 21
+ 22
+ 23
+ 24
+ 25
+ 26
+ 27
+ 28
+ 29
+ 31
+ 32
+ 33
+ 34
+ 35
+ 36
+ "#;
+ let mut patch = PatchSet::new();
+ patch.parse(&diff_str).unwrap();
+
+ let hunk = &patch.files()[0].hunks()[0];
+ let section: Section = hunk.into();
+
+ assert_eq!((1, 1), section.start_line);
+ assert_eq!((38, 36), section.end_line);
+ assert_eq!(5, section.context_prefix.as_ref().unwrap().len());
+ assert_eq!(30, section.diff.len());
+ assert_eq!(
+ "05", section.diff[0].value,
+ "the first line of the diff should be {DISPLAYED_CONTEXT_LINES} lines before the first modified line"
+ );
+ assert_eq!(
+ "31", section.diff[29].value,
+ "the last line of the diff should be {DISPLAYED_CONTEXT_LINES} lines after the last modified line"
+ );
+ assert_eq!(5, section.context_suffix.as_ref().unwrap().len());
+ assert_eq!("32", section.context_suffix.as_ref().unwrap()[0].value);
+ assert_eq!("36", section.context_suffix.as_ref().unwrap()[4].value);
+ }
+}