Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: get PR description from googleapis commits #2531

Merged
merged 42 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
eb93ac3
feat: get commit message from googleapis
JoeWang1127 Feb 26, 2024
df8fe50
add comments
JoeWang1127 Mar 1, 2024
eb0da6b
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 1, 2024
9a1be4c
move to integration test
JoeWang1127 Mar 1, 2024
c1f556c
add paths from generation_config
JoeWang1127 Mar 1, 2024
ccb1a21
add generate_pr_description.py
JoeWang1127 Mar 2, 2024
057da91
add an integration test
JoeWang1127 Mar 2, 2024
da3fc4c
add an unit test
JoeWang1127 Mar 2, 2024
1e6ab5e
fix unit tests
JoeWang1127 Mar 2, 2024
a1c786f
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 2, 2024
d128a2d
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 3, 2024
939c513
change func name
JoeWang1127 Mar 3, 2024
7079327
compare result with golden file
JoeWang1127 Mar 4, 2024
a08a866
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 4, 2024
b77aa1d
fix integration tests
JoeWang1127 Mar 4, 2024
eecaba0
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 4, 2024
259c4ef
move get_commit_messages to surface
JoeWang1127 Mar 5, 2024
f09fb3b
format pr description
JoeWang1127 Mar 5, 2024
5e1d85e
remove common protos
JoeWang1127 Mar 5, 2024
7e9be85
add integration test for split repo
JoeWang1127 Mar 5, 2024
9f3bbca
format code
JoeWang1127 Mar 5, 2024
8324776
format pr description
JoeWang1127 Mar 5, 2024
cc9088e
change parameter comments
JoeWang1127 Mar 5, 2024
334298b
change parameter comments
JoeWang1127 Mar 5, 2024
aabd9c1
add generator version in pr description
JoeWang1127 Mar 5, 2024
176871d
find the versioned proto_path from a file path
JoeWang1127 Mar 5, 2024
93e60de
add library name in pr description
JoeWang1127 Mar 5, 2024
d4d16f6
format pr description
JoeWang1127 Mar 5, 2024
a279549
do not include library_name in split repo
JoeWang1127 Mar 5, 2024
1946d64
bring back PiperOrigin-RevId
JoeWang1127 Mar 6, 2024
be189c5
use NESTED_COMMIT
JoeWang1127 Mar 6, 2024
188a632
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 6, 2024
5d7c915
add comments
JoeWang1127 Mar 6, 2024
240038f
format pr description
JoeWang1127 Mar 6, 2024
33df40b
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 6, 2024
cd7f639
format multi-line commit messages
JoeWang1127 Mar 6, 2024
1715714
add unit test
JoeWang1127 Mar 6, 2024
6cf0045
add unit tests
JoeWang1127 Mar 7, 2024
b473438
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 7, 2024
2b507aa
refactor class and unit tests
JoeWang1127 Mar 7, 2024
2ce846a
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 7, 2024
c03d6dd
Merge branch 'main' into feat/get-pr-description
JoeWang1127 Mar 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
197 changes: 197 additions & 0 deletions library_generation/generate_pr_description.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
#!/usr/bin/env python3
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import shutil
from typing import Dict

from typing import List

import click
from git import Commit, Repo
from library_generation.model.generation_config import from_yaml
from library_generation.utilities import find_versioned_proto_path
from library_generation.utilities import get_file_paths


@click.group(invoke_without_command=False)
@click.pass_context
@click.version_option(message="%(version)s")
def main(ctx):
pass


@main.command()
@click.option(
"--generation-config-yaml",
required=True,
type=str,
help="""
Path to generation_config.yaml that contains the metadata about
library generation.
The googleapis commit in the configuration is the latest commit,
inclusively, from which the commit message is considered.
""",
)
@click.option(
"--baseline-commit",
required=True,
type=str,
help="""
The baseline (oldest) commit, exclusively, from which the commit message is
considered.
This commit should be an ancestor of googleapis commit in configuration.
""",
)
@click.option(
"--repo-url",
type=str,
default="https://github.com/googleapis/googleapis.git",
show_default=True,
help="""
GitHub repository URL.
""",
)
def generate(
generation_config_yaml: str,
repo_url: str,
baseline_commit: str,
) -> str:
return generate_pr_descriptions(
generation_config_yaml=generation_config_yaml,
repo_url=repo_url,
baseline_commit=baseline_commit,
)


def generate_pr_descriptions(
generation_config_yaml: str,
repo_url: str,
baseline_commit: str,
) -> str:
config = from_yaml(generation_config_yaml)
paths = get_file_paths(config)
return __get_commit_messages(
repo_url=repo_url,
latest_commit=config.googleapis_commitish,
baseline_commit=baseline_commit,
paths=paths,
generator_version=config.gapic_generator_version,
is_monorepo=config.is_monorepo,
)


def __get_commit_messages(
repo_url: str,
latest_commit: str,
baseline_commit: str,
paths: Dict[str, str],
generator_version: str,
is_monorepo: bool,
) -> str:
"""
Combine commit messages of a repository from latest_commit to
baseline_commit. Only commits which change files in a pre-defined
file paths will be considered.
Note that baseline_commit should be an ancestor of latest_commit.

:param repo_url: the url of the repository.
:param latest_commit: the newest commit to be considered in
selecting commit message.
:param baseline_commit: the oldest commit to be considered in
selecting commit message. This commit should be an ancestor of
:param paths: a mapping from file paths to library_name.
:param generator_version: the version of the generator.
:param is_monorepo: whether to generate commit messages in a monorepo.
:return: commit messages.
"""
tmp_dir = "/tmp/repo"
shutil.rmtree(tmp_dir, ignore_errors=True)
os.mkdir(tmp_dir)
repo = Repo.clone_from(repo_url, tmp_dir)
commit = repo.commit(latest_commit)
qualified_commits = {}
while str(commit.hexsha) != baseline_commit:
commit_and_name = __filter_qualified_commit(paths=paths, commit=commit)
if commit_and_name != ():
qualified_commits[commit_and_name[0]] = commit_and_name[1]
commit_parents = commit.parents
if len(commit_parents) == 0:
break
commit = commit_parents[0]
shutil.rmtree(tmp_dir, ignore_errors=True)
return __combine_commit_messages(
latest_commit=latest_commit,
baseline_commit=baseline_commit,
commits=qualified_commits,
generator_version=generator_version,
is_monorepo=is_monorepo,
)


def __filter_qualified_commit(paths: Dict[str, str], commit: Commit) -> (Commit, str):
"""
Returns a tuple of a commit and libray_name.
A qualified commit means at least one file changes in that commit is
within the versioned proto_path in paths.

:param paths: a mapping from versioned proto_path to library_name.
:param commit: a commit under consideration.
:return: a tuple of a commit and library_name if the commit is
qualified; otherwise an empty tuple.
"""
for file in commit.stats.files.keys():
versioned_proto_path = find_versioned_proto_path(file)
if versioned_proto_path in paths:
return commit, paths[versioned_proto_path]
return ()


def __combine_commit_messages(
latest_commit: str,
baseline_commit: str,
commits: Dict[Commit, str],
generator_version: str,
is_monorepo: bool,
) -> str:
messages = [
f"This pull request is generated with proto changes between googleapis commit {baseline_commit} (exclusive) and {latest_commit} (inclusive).",
"Qualified commits are:",
]
for commit in commits:
short_sha = commit.hexsha[:7]
messages.append(
f"[googleapis/googleapis@{short_sha}](https://github.com/googleapis/googleapis/commit/{commit.hexsha})"
)

messages.append("BEGIN_COMMIT_OVERRIDE")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the rationale to use BEGIN_COMMIT_OVERRIDE? It's for when we manually modify the wrong commit message for release notes.

Copy link
Collaborator Author

@JoeWang1127 JoeWang1127 Mar 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the benefits of this is to create separate entries in release note.
For example:

COMMIT_OVERRIDE

feat: [document-ai] expose model_type in v1 processor, so that user can see the model_type after get or list processor version

feat: [document-ai] add model_type in v1beta3 processor proto

feat: Regenerate with the Java code generator (gapic-generator-java) v2.34.0

END_COMMIT_OVERRIDE

will appear in release note as:

Features
- [document-ai] expose model_type in v1 processor, so that user can see the model_type after get or list processor version (xxx)(yyy)
- [document-ai] add model_type in v1beta3 processor proto (xxx)(yyy)
- Regenerate with the Java code generator (gapic-generator-java) v2.34.0 (xxx)(yyy)

Note that the three entries will have the same sha.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor

@chingor13 chingor13 Mar 5, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both "nested commits" and "override" commits work. The nested one was to assist automations like owl-bot which append commits to the end of a PR. It allows for having the extended commit message while not breaking the multi-commit message parsing.

Example:

feat: some feature

some extended description
fix: another fix

more context about the fix

If you use override, you cannot keep the extended description of the first (if you add non-conventional-commit message lines between the feat and fix, then further commit messages above will be ignored:

BEGIN_COMMIT_OVERRIDE
feat: some feature
fix: another fix
END_COMMIT_OVERRIDE

If you use nested commits, you can keep all the extended descriptions (although they are generally ignored in the release notes anyways).

BEGIN_NESTED_COMMIT
feat: some feature

some extended description
END_NESTED_COMMIT
BEGIN_NESTED_COMMIT
fix: another fix

more context about the fix
END_NESTED_COMMIT

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've changed to BEGIN_NESTED_COMMIT and END_NESTED_COMMIT.

for commit, library_name in commits.items():
first_line = commit.message.partition("\n")[0]
convention, _, summary = first_line.partition(":")
formatted_message = (
f"{convention}: [{library_name}] {summary.strip()}"
if is_monorepo
else f"{convention}: {summary.strip()}"
)
messages.append(formatted_message)
messages.append(
f"feat: Regenerate with the Java code generator (gapic-generator-java) v{generator_version}"
)
messages.append("END_COMMIT_OVERRIDE")

return "\n\n".join(messages)


if __name__ == "__main__":
main()
37 changes: 35 additions & 2 deletions library_generation/test/integration_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,14 @@
import unittest
from distutils.dir_util import copy_tree
from distutils.file_util import copy_file
from filecmp import cmp
from filecmp import dircmp

from git import Repo
from pathlib import Path
from typing import List
from typing import Dict

from library_generation.generate_pr_description import generate_pr_descriptions
from library_generation.generate_repo import generate_from_yaml
from library_generation.model.generation_config import from_yaml, GenerationConfig
from library_generation.test.compare_poms import compare_xml
Expand All @@ -49,6 +51,35 @@


class IntegrationTest(unittest.TestCase):
def test_get_commit_message_success(self):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this as an integration test because it required network connection for git clone and checkout of googleapis repository.

repo_url = "https://github.com/googleapis/googleapis.git"
config_files = self.__get_config_files(config_dir)
monorepo_baseline_commit = "a17d4caf184b050d50cacf2b0d579ce72c31ce74"
split_repo_baseline_commit = "679060c64136e85b52838f53cfe612ce51e60d1d"
for repo, config_file in config_files:
baseline_commit = (
monorepo_baseline_commit
if repo == "google-cloud-java"
else split_repo_baseline_commit
)
description = generate_pr_descriptions(
generation_config_yaml=config_file,
repo_url=repo_url,
baseline_commit=baseline_commit,
)
description_file = f"{config_dir}/{repo}/pr-description.txt"
if os.path.isfile(f"{description_file}"):
os.remove(f"{description_file}")
with open(f"{description_file}", "w+") as f:
f.write(description)
self.assertTrue(
cmp(
f"{config_dir}/{repo}/pr-description-golden.txt",
f"{description_file}",
)
)
os.remove(f"{description_file}")

def test_generate_repo(self):
shutil.rmtree(f"{golden_dir}", ignore_errors=True)
os.makedirs(f"{golden_dir}", exist_ok=True)
Expand Down Expand Up @@ -150,7 +181,7 @@ def __pull_repo_to(cls, default_dest: Path, repo: str, committish: str) -> str:
repo = Repo(dest)
else:
dest = default_dest
repo_dest = f"{golden_dir}/{repo}"
shutil.rmtree(dest, ignore_errors=True)
repo_url = f"{repo_prefix}/{repo}"
print(f"Cloning repository {repo_url}")
repo = Repo.clone_from(repo_url, dest)
Expand All @@ -169,6 +200,8 @@ def __get_library_names_from_config(cls, config: GenerationConfig) -> List[str]:
def __get_config_files(cls, path: str) -> List[tuple[str, str]]:
config_files = []
for sub_dir in Path(path).resolve().iterdir():
if sub_dir.is_file():
continue
repo = sub_dir.name
if repo == "golden":
continue
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,17 @@ libraries:
- proto_path: google/cloud/alloydb/connectors/v1
- proto_path: google/cloud/alloydb/connectors/v1alpha
- proto_path: google/cloud/alloydb/connectors/v1beta

- api_shortname: documentai
name_pretty: Document AI
product_documentation: https://cloud.google.com/compute/docs/documentai/
api_description: allows developers to unlock insights from your documents with machine
learning.
library_name: document-ai
release_level: stable
issue_tracker: https://issuetracker.google.com/savedsearches/559755
GAPICs:
- proto_path: google/cloud/documentai/v1
- proto_path: google/cloud/documentai/v1beta1
- proto_path: google/cloud/documentai/v1beta2
- proto_path: google/cloud/documentai/v1beta3
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please capture the PiperOrigin-RevId of the relevant commits so we can track their release status through the pipeline.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@chingor13 Do you mind sharing how PiperOrigin-RevId is used?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Collaborator

@blakeli0 blakeli0 Mar 6, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can see the value of including them in googleapis commit messages, but since they are not propagated to release notes of google-cloud-java anyway, I'm not sure how we can track their release statuses. In addition, do we need to track the release statuses of a certain PiperOrigin-RevId? If we do need to, I think using the corresponding commit messages would be a better choice?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The metrics tracker links the release PRs with the source PRs either via the commit SHA or the PR# reference

Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
This pull request is generated with proto changes between googleapis commit a17d4caf184b050d50cacf2b0d579ce72c31ce74 (exclusive) and 1a45bf7393b52407188c82e63101db7dc9c72026 (inclusive).

Qualified commits are:

[googleapis/googleapis@7a9a855](https://github.com/googleapis/googleapis/commit/7a9a855287b5042410c93e5a510f40efd4ce6cb1)

[googleapis/googleapis@c7fd8bd](https://github.com/googleapis/googleapis/commit/c7fd8bd652ac690ca84f485014f70b52eef7cb9e)

BEGIN_COMMIT_OVERRIDE

JoeWang1127 marked this conversation as resolved.
Show resolved Hide resolved
feat: [document-ai] expose model_type in v1 processor, so that user can see the model_type after get or list processor version
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add a test case for a commit that contains multiple conventional commit messages? Like this one? I think the current logic that adds library name may not take this scenario into consideration.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


feat: [document-ai] add model_type in v1beta3 processor proto

feat: Regenerate with the Java code generator (gapic-generator-java) v2.34.0

END_COMMIT_OVERRIDE
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
This pull request is generated with proto changes between googleapis commit 679060c64136e85b52838f53cfe612ce51e60d1d (exclusive) and fc3043ebe12fb6bc1729c175e1526c859ce751d8 (inclusive).

Qualified commits are:

[googleapis/googleapis@fbcfef0](https://github.com/googleapis/googleapis/commit/fbcfef09510b842774530989889ed1584a8b5acb)

[googleapis/googleapis@63d2a60](https://github.com/googleapis/googleapis/commit/63d2a60056ad5b156c05c7fb13138fc886c3b739)

BEGIN_COMMIT_OVERRIDE

fix: extend timeouts for deleting snapshots, backups and tables

chore: update retry settings for backup rpcs

feat: Regenerate with the Java code generator (gapic-generator-java) v2.35.0

END_COMMIT_OVERRIDE
32 changes: 32 additions & 0 deletions library_generation/test/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
from library_generation.model.gapic_inputs import parse as parse_build_file
from library_generation.model.generation_config import from_yaml
from library_generation.model.library_config import LibraryConfig
from library_generation.utilities import find_versioned_proto_path
from library_generation.utilities import get_file_paths

script_dir = os.path.dirname(os.path.realpath(__file__))
resources_dir = os.path.join(script_dir, "resources")
Expand Down Expand Up @@ -214,6 +216,36 @@ def test_from_yaml_succeeds(self):
self.assertEqual("google/cloud/asset/v1p5beta1", gapics[3].proto_path)
self.assertEqual("google/cloud/asset/v1p7beta1", gapics[4].proto_path)

def test_get_file_paths_from_yaml_success(self):
paths = get_file_paths(from_yaml(f"{test_config_dir}/generation_config.yaml"))
self.assertEqual(
{
"google/cloud/asset/v1": "asset",
"google/cloud/asset/v1p1beta1": "asset",
"google/cloud/asset/v1p2beta1": "asset",
"google/cloud/asset/v1p5beta1": "asset",
"google/cloud/asset/v1p7beta1": "asset",
},
paths,
)

@parameterized.expand(
[
(
"google/cloud/aiplatform/v1/schema/predict/params/image_classification.proto",
"google/cloud/aiplatform/v1",
),
(
"google/cloud/asset/v1p2beta1/assets.proto",
"google/cloud/asset/v1p2beta1",
),
("google/type/color.proto", "google/type/color.proto"),
]
)
def test_find_versioned_proto_path(self, file_path, expected):
proto_path = find_versioned_proto_path(file_path)
self.assertEqual(expected, proto_path)

@parameterized.expand(
[
("BUILD_no_additional_protos.bazel", " "),
Expand Down
Loading
Loading