Skip to content

Commit

Permalink
Add a tool for generating package dependency report (#466)
Browse files Browse the repository at this point in the history
  • Loading branch information
aws-tianquaw authored Aug 13, 2024
1 parent 7f17af2 commit 43a7e67
Show file tree
Hide file tree
Showing 8 changed files with 156 additions and 43 deletions.
4 changes: 2 additions & 2 deletions .github/ISSUE_TEMPLATE/bug_report.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ body:
label: 🐛 Describe the bug
description: |
Please provide a clear and concise description of what the bug is.
If relevant, add a minimal example so that we can reproduce the error by running the code. It is very important for the snippet to be as succinct (minimal) as possible, so please take time to trim down any irrelevant code to help us debug efficiently. We are going to copy-paste your code and we expect to get the same result as you did: avoid any external data, and include the relevant imports, etc.
Please also paste or describe the results you observe.
If you observe an error, please paste the error message including the **full** traceback of the exception. It may be relevant to wrap error messages in ```` ```triple quotes blocks``` ````.
placeholder: |
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/PR_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
This pull request was created by GitHub Actions/AWS CodeBuild! Before merging, please do the following:
- [ ] Review changelog/staleness report.
- [ ] (Only for Minor/Major version releases) Review python package dependency and size report.
- [ ] Review build/test results by clicking *Build Logs* in CI Report (be patient, tests take ~4hr).
- [ ] Review ECR Scan results.
36 changes: 2 additions & 34 deletions src/changelog_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,39 +2,7 @@

from semver import Version

from utils import get_dir_for_version, get_match_specs, get_semver


def _derive_changeset(target_version_dir, source_version_dir, image_config) -> (dict[str, list[str]], dict[str, str]):
env_in_file_name = image_config["build_args"]["ENV_IN_FILENAME"]
env_out_file_name = image_config["env_out_filename"]
required_packages_from_target = get_match_specs(target_version_dir + "/" + env_in_file_name).keys()
target_match_spec_out = get_match_specs(target_version_dir + "/" + env_out_file_name)
source_match_spec_out = get_match_specs(source_version_dir + "/" + env_out_file_name)

# Note: required_packages_from_source is not currently used.
# In the future, If we remove any packages from env.in, at that time required_packages_from_source will be needed.
# We only care about the packages which are present in the target version env.in file
installed_packages_from_target = {
k: str(v.get("version")).removeprefix("==")
for k, v in target_match_spec_out.items()
if k in required_packages_from_target
}
# Note: A required package in the target version might not be a required package in the source version
# But source version could still have this package pulled as a dependency of a dependency.
installed_packages_from_source = {
k: str(v.get("version")).removeprefix("==")
for k, v in source_match_spec_out.items()
if k in required_packages_from_target
}
upgrades = {
k: [installed_packages_from_source[k], v]
for k, v in installed_packages_from_target.items()
if k in installed_packages_from_source and installed_packages_from_source[k] != v
}
new_packages = {k: v for k, v in installed_packages_from_target.items() if k not in installed_packages_from_source}
# TODO: Add support for removed packages.
return upgrades, new_packages
from utils import derive_changeset, get_dir_for_version, get_semver


def generate_change_log(target_version: Version, image_config):
Expand All @@ -48,7 +16,7 @@ def generate_change_log(target_version: Version, image_config):
source_version = get_semver(source_patch_version)
source_version_dir = get_dir_for_version(source_version)
image_type = image_config["image_type"]
upgrades, new_packages = _derive_changeset(target_version_dir, source_version_dir, image_config)
upgrades, new_packages = derive_changeset(target_version_dir, source_version_dir, image_config)
with open(f"{target_version_dir}/CHANGELOG-{image_type}.md", "w") as f:
f.write("# Change log: " + str(target_version) + "(" + image_type + ")\n\n")
if len(upgrades) != 0:
Expand Down
11 changes: 11 additions & 0 deletions src/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
_get_dependency_upper_bound_for_runtime_upgrade,
)
from package_report import (
generate_package_dependency_report,
generate_package_size_report,
generate_package_staleness_report,
)
Expand Down Expand Up @@ -426,6 +427,16 @@ def get_arg_parser():
action="store_true",
help="Validate package size delta and raise error if the validation failed.",
)
package_dependency_parser = subparsers.add_parser(
"generate-dependency-report",
help="Generates package dependency report for each of newly introcuded packages in the target image version.",
)
package_dependency_parser.set_defaults(func=generate_package_dependency_report)
package_dependency_parser.add_argument(
"--target-patch-version",
required=True,
help="Specify the target patch version for which the package dependency report needs to be generated.",
)
return parser


Expand Down
45 changes: 45 additions & 0 deletions src/package_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from dependency_upgrader import _dependency_metadata
from utils import (
create_markdown_table,
derive_changeset,
get_dir_for_version,
get_match_specs,
get_semver,
Expand Down Expand Up @@ -221,6 +222,28 @@ def _generate_python_package_size_report_per_image(
return validate_result


def _generate_python_package_dependency_report(image_config, base_version_dir, target_version_dir):
# Get a list of newly introduced marquee packages in changeset and their versions.
_, new_packages = derive_changeset(target_version_dir, base_version_dir, image_config)

results = dict()
for package, version in new_packages.items():
# Pull package metadata from conda-forge and dump into json file
search_result = conda.cli.python_api.run_command("search", f"{package}=={version}", "--json")
package_metadata = json.loads(search_result[0])[package][0]
results[package] = {"version": package_metadata["version"], "depends": package_metadata["depends"]}

print(
create_markdown_table(
["Package", "Version in the Target Image", "Dependencies"],
[
{"pkg": k, "version": v["version"], "depends": v["depends"]}
for k, v in islice(results.items(), None, 20)
],
)
)


def generate_package_staleness_report(args):
target_version = get_semver(args.target_patch_version)
target_version_dir = get_dir_for_version(target_version)
Expand Down Expand Up @@ -260,3 +283,25 @@ def generate_package_size_report(args):
if validate_results:
raise Exception(f"Size Validation Failed! Issues found: {validate_results}")
print("Pakcage Size Validation Passed!")


def generate_package_dependency_report(args):
target_version = get_semver(args.target_patch_version)
target_version_dir = get_dir_for_version(target_version)

base_version = None
source_version_txt_file_path = f"{target_version_dir}/source-version.txt"
if os.path.exists(source_version_txt_file_path):
with open(source_version_txt_file_path, "r") as f:
source_patch_version = f.readline()
base_version = get_semver(source_patch_version)

base_version_dir = get_dir_for_version(base_version) if base_version else None

print("\n# Python Package Dependency Report\n")
print("\n### Target Image Version: " + str(target_version) + " | Base Image Version: " + str(base_version) + "\n")
if not base_version:
print("WARNING: No base version or base version directory found, will generate full report for target version.")
for image_config in _image_generator_configs:
print("## Image Type: " + "(" + image_config["image_type"].upper() + ")")
_generate_python_package_dependency_report(image_config, base_version_dir, target_version_dir)
32 changes: 32 additions & 0 deletions src/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,3 +103,35 @@ def pull_conda_package_metadata(image_config, image_artifact_dir):
results = {k: v for k, v in sorted(results.items(), key=lambda item: item[1]["size"], reverse=True)}

return results


def derive_changeset(target_version_dir, source_version_dir, image_config) -> (dict[str, list[str]], dict[str, str]):
env_in_file_name = image_config["build_args"]["ENV_IN_FILENAME"]
env_out_file_name = image_config["env_out_filename"]
required_packages_from_target = get_match_specs(target_version_dir + "/" + env_in_file_name).keys()
target_match_spec_out = get_match_specs(target_version_dir + "/" + env_out_file_name)
source_match_spec_out = get_match_specs(source_version_dir + "/" + env_out_file_name)

# Note: required_packages_from_source is not currently used.
# In the future, If we remove any packages from env.in, at that time required_packages_from_source will be needed.
# We only care about the packages which are present in the target version env.in file
installed_packages_from_target = {
k: str(v.get("version")).removeprefix("==")
for k, v in target_match_spec_out.items()
if k in required_packages_from_target
}
# Note: A required package in the target version might not be a required package in the source version
# But source version could still have this package pulled as a dependency of a dependency.
installed_packages_from_source = {
k: str(v.get("version")).removeprefix("==")
for k, v in source_match_spec_out.items()
if k in required_packages_from_target
}
upgrades = {
k: [installed_packages_from_source[k], v]
for k, v in installed_packages_from_target.items()
if k in installed_packages_from_source and installed_packages_from_source[k] != v
}
new_packages = {k: v for k, v in installed_packages_from_target.items() if k not in installed_packages_from_source}
# TODO: Add support for removed packages.
return upgrades, new_packages
5 changes: 2 additions & 3 deletions test/test_main.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
import os
from unittest.mock import MagicMock, Mock, patch

from changelog_generator import _derive_changeset
from config import _image_generator_configs
from main import (
_get_config_for_image,
Expand All @@ -25,7 +24,7 @@
_get_image_type_package_metadata,
_get_package_to_image_type_mapping,
)
from utils import get_semver
from utils import derive_changeset, get_semver


class CreateVersionArgs:
Expand Down Expand Up @@ -629,7 +628,7 @@ def test_derive_changeset(tmp_path):
_create_docker_cpu_env_out_file(target_version_dir + "/cpu.env.out", package_metadata=target_env_out_packages)
expected_upgrades = {"ipykernel": ["6.21.3", "6.21.6"]}
expected_new_packages = {"boto3": "1.2"}
actual_upgrades, actual_new_packages = _derive_changeset(
actual_upgrades, actual_new_packages = derive_changeset(
target_version_dir, source_version_dir, _image_generator_configs[1]
)
assert expected_upgrades == actual_upgrades
Expand Down
65 changes: 61 additions & 4 deletions test/test_package_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

from config import _image_generator_configs
from package_report import (
_generate_python_package_dependency_report,
_generate_python_package_size_report_per_image,
_get_installed_package_versions_and_conda_versions,
)
Expand All @@ -23,14 +24,29 @@ def _create_env_in_docker_file(file_path):
)


def _create_env_out_docker_file(file_path):
def _create_target_env_in_docker_file(file_path):
with open(file_path, "w") as env_in_file:
env_in_file.write(
f"""# This file is auto-generated.
conda-forge::ipykernel
conda-forge::numpy[version=\'>=1.0.17,<2.0.0\']
conda-forge::sagemaker-headless-execution-driver[version='>=0.0.12,<0.1.0']
"""
)


def _create_env_out_docker_file(
file_path,
package_metadata="""https://conda.anaconda.org/conda-forge/noarch/ipykernel-6.21.3-pyh210e3f2_0.conda#8c1f6bf32a6ca81232c4853d4165ca67
https://conda.anaconda.org/conda-forge/linux-64/numpy-1.24.2-py38h10c12cc_0.conda#05592c85b9f6931dc2df1e80c0d56294""",
):
with open(file_path, "w") as env_out_file:
env_out_file.write(
f"""# This file may be used to create an environment using:
# $ conda create --name <env> --file <this file>
# platform: linux-64
https://conda.anaconda.org/conda-forge/noarch/ipykernel-6.21.3-pyh210e3f2_0.conda#8c1f6bf32a6ca81232c4853d4165ca67
https://conda.anaconda.org/conda-forge/linux-64/numpy-1.24.2-py38h10c12cc_0.conda#05592c85b9f6931dc2df1e80c0d56294\n"""
@EXPLICIT
{package_metadata}\n"""
)


Expand Down Expand Up @@ -61,7 +77,7 @@ def test_get_match_specs(tmp_path):
assert str(numpy_match_spec.get("version")).removeprefix("==") == "1.24.2"
assert ipykernel_match_spec.get("subdir") == "noarch"
assert numpy_match_spec.get("subdir") == "linux-64"
assert len(match_spec_out) == 2
assert len(match_spec_out) == 3
# Test bad file path
env_out_file_path = tmp_path / "bad.env.out"
match_spec_out = get_match_specs(env_out_file_path)
Expand Down Expand Up @@ -166,3 +182,44 @@ def test_generate_package_size_report_when_base_version_is_not_present(capsys):
assert "python|3.12.2|30.82MB" in captured.out
assert "libclang|18.1.2|18.38MB" in captured.out
assert "tqdm|4.66.2|87.47KB" in captured.out


@patch("conda.cli.python_api.run_command")
def test_generate_package_dependency_report(mock_conda_command, tmp_path, capsys):
base_version_dir = tmp_path / "base"
base_version_dir.mkdir()

target_version_dir = tmp_path / "target"
target_version_dir.mkdir()
_create_target_env_in_docker_file(target_version_dir / "cpu.env.in")

_create_env_out_docker_file(
str(base_version_dir) + "/cpu.env.out",
package_metadata="""https://conda.anaconda.org/conda-forge/noarch/ipykernel-6.21.3-pyh210e3f_0.conda#8c1f6bf32a6ca81232c4853d4165ca67
https://conda.anaconda.org/conda-forge/linux-64/boto3-1.2-cuda112py38hd_0.conda#8c1f6bf32a6ca81232c4853d4165ca67""",
)

target_env_out_packages = (
"https://conda.anaconda.org/conda-forge/noarch/ipykernel-6.21.6-pyh210e3f2_0.conda#8c1f6bf32a6ca81232c4853d4165ca67\n"
"https://conda.anaconda.org/conda-forge/linux-64/boto3-1.2-cuda112py38hd_0.conda#8c1f6bf32a6ca81232c4853d4165ca67\n"
"https://conda.anaconda.org/conda-forge/noarch/sagemaker-headless-execution-driver-0.0.13-pyhd8ed1ab_0.conda#feaec93c21652caac71ed7ecf450cb17"
)
_create_env_out_docker_file(str(target_version_dir) + "/cpu.env.out", package_metadata=target_env_out_packages)

mock_conda_command.return_value = (
'{"sagemaker-headless-execution-driver":[{"version":"0.0.13","depends":["nbconvert","papermill >=2.4","python >3.8"]}]}',
"",
0,
)

_generate_python_package_dependency_report(
_image_generator_configs[1], str(base_version_dir), str(target_version_dir)
)

captured = capsys.readouterr()
# Assert dependency report for newly added packages
assert "sagemaker-headless-execution-driver|0.0.13|['nbconvert', 'papermill >=2.4', 'python >3.8']" in captured.out

# Assert existing packages not in report
assert "ipykernel" not in captured.out
assert "numpy" not in captured.out

0 comments on commit 43a7e67

Please sign in to comment.