From fcab153fec438edb2b15b66306bd6aae709bfb89 Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Wed, 4 Sep 2024 10:41:17 -0400 Subject: [PATCH 1/5] Refactor into new GitHubCheckRunService Now the methods related to servicing GitHub Check Runs are part of the GitHubCheckRunService, which is apart from the GitHubRepoService. This should help make it easier to focus on the check run flow, apart from the functionality needed to sync data from GitHub. --- changelog.d/20240904_102527_jsick_DM_46095.md | 17 ++ src/timessquare/services/githubcheckrun.py | 263 ++++++++++++++++++ src/timessquare/services/githubrepo.py | 221 +-------------- .../worker/functions/create_check_run.py | 4 +- .../functions/create_rerequested_check_run.py | 4 +- .../worker/functions/pull_request_sync.py | 4 +- src/timessquare/worker/servicefactory.py | 42 +++ 7 files changed, 330 insertions(+), 225 deletions(-) create mode 100644 changelog.d/20240904_102527_jsick_DM_46095.md create mode 100644 src/timessquare/services/githubcheckrun.py diff --git a/changelog.d/20240904_102527_jsick_DM_46095.md b/changelog.d/20240904_102527_jsick_DM_46095.md new file mode 100644 index 0000000..c35d6fc --- /dev/null +++ b/changelog.d/20240904_102527_jsick_DM_46095.md @@ -0,0 +1,17 @@ + + +### Backwards-incompatible changes + +- + +### New features + +- + +### Bug fixes + +- + +### Other changes + +- The GitHub Check Run service methods are now part of a new `GitHubCheckRunService` class, separate from the `GitHubRepoService`. This internal change helps clarify what functionality is needed for the GitHub Checks functionality, as opposed to syncing data with GitHub repositories. diff --git a/src/timessquare/services/githubcheckrun.py b/src/timessquare/services/githubcheckrun.py new file mode 100644 index 0000000..7717e04 --- /dev/null +++ b/src/timessquare/services/githubcheckrun.py @@ -0,0 +1,263 @@ +"""Service for GitHub Check runs.""" + +from __future__ import annotations + +import asyncio +from collections import deque + +from gidgethub.httpx import GitHubAPI +from httpx import AsyncClient +from safir.github.models import ( + GitHubCheckRunConclusion, + GitHubCheckRunModel, + GitHubPullRequestModel, + GitHubRepositoryModel, +) +from safir.github.webhooks import ( + GitHubCheckRunEventModel, + GitHubCheckSuiteEventModel, +) +from structlog.stdlib import BoundLogger + +from timessquare.domain.githubcheckout import GitHubRepositoryCheckout +from timessquare.domain.githubcheckrun import ( + GitHubConfigsCheck, + NotebookExecutionsCheck, +) + +from ..domain.page import PageExecutionInfo +from ..storage.noteburst import NoteburstJobStatus +from .githubrepo import GitHubRepoService +from .page import PageService + +__all__ = ["GitHubCheckRunService"] + + +class GitHubCheckRunService: + """Service for GitHub Check runs. + + This service is responsible for creating and updating GitHub Check runs. It + should only be run from arq background tasks, triggered by GitHub webhooks. + """ + + def __init__( + self, + http_client: AsyncClient, + github_client: GitHubAPI, + repo_service: GitHubRepoService, + page_service: PageService, + logger: BoundLogger, + ) -> None: + self._http_client = http_client + self._github_client = github_client + self._repo_service = repo_service + self._page_service = page_service + self._logger = logger + + async def check_pull_request( + self, pr_payload: GitHubPullRequestModel + ) -> None: + """Run a check on a pull request.""" + # Called by the pull_request_sync function + # This isn't run. Instead we use the initial_check_run method + # called by the create_check_run async function. + + async def initiate_check_runs( + self, *, payload: GitHubCheckSuiteEventModel + ) -> None: + """Create a new GitHub check runs, given a new Check Suite. + + Notes + ----- + NOTE: currently we're assuming that check suites are automatically + created when created a check run. See + https://docs.github.com/en/rest/checks/runs#create-a-check-run + """ + # Run the configurations check + config_check = await GitHubConfigsCheck.create_check_run_and_validate( + github_client=self._github_client, + repo=payload.repository, + head_sha=payload.check_suite.head_sha, + ) + await config_check.submit_conclusion(github_client=self._github_client) + + repo = payload.repository + data = await self._github_client.post( + "repos/{owner}/{repo}/check-runs", + url_vars={"owner": repo.owner.login, "repo": repo.name}, + data={ + "name": NotebookExecutionsCheck.title, + "head_sha": payload.check_suite.head_sha, + "external_id": NotebookExecutionsCheck.external_id, + }, + ) + check_run = GitHubCheckRunModel.model_validate(data) + if config_check.conclusion == GitHubCheckRunConclusion.success: + await self.run_notebook_check_run( + check_run=check_run, + repo=payload.repository, + ) + else: + # Set the notebook check run to "neutral" indicating that we're + # skipping this check. + await self._github_client.patch( + str(check_run.url), + data={"conclusion": GitHubCheckRunConclusion.neutral}, + ) + + async def create_rerequested_check_run( + self, *, payload: GitHubCheckRunEventModel + ) -> None: + """Run a GitHub check run that was rerequested.""" + external_id = payload.check_run.external_id + if external_id == GitHubConfigsCheck.external_id: + config_check = await GitHubConfigsCheck.validate_repo( + github_client=self._github_client, + repo=payload.repository, + head_sha=payload.check_run.head_sha, + check_run=payload.check_run, + ) + await config_check.submit_conclusion( + github_client=self._github_client + ) + elif external_id == NotebookExecutionsCheck.external_id: + await self.run_notebook_check_run( + check_run=payload.check_run, + repo=payload.repository, + ) + + async def run_notebook_check_run( # noqa: C901 PLR0912 PLR0915 + self, *, check_run: GitHubCheckRunModel, repo: GitHubRepositoryModel + ) -> None: + """Run the notebook execution check. + + This check actually creates/updates Page resources, hence it is run + at the service layer, rather than in a domain model. + """ + check = NotebookExecutionsCheck(check_run, repo) + await check.submit_in_progress(self._github_client) + self._logger.debug("Notebook executions check in progress") + + checkout = await GitHubRepositoryCheckout.create( + github_client=self._github_client, + repo=repo, + head_sha=check_run.head_sha, + ) + + # Look for any existing pages for this repo's SHA. If they already + # exist it indicates the check is being re-run, so we'll delete those + # old pages for this commit + for page in await self._page_service.get_pages_for_repo( + owner=checkout.owner_name, + name=checkout.name, + commit=check_run.head_sha, + ): + await self._page_service.soft_delete_page(page) + self._logger.debug( + "Deleted existing page for notebook check run", + page_name=page.name, + ) + + tree = await checkout.get_git_tree(self._github_client) + pending_pages: deque[PageExecutionInfo] = deque() + for notebook_ref in tree.find_notebooks(checkout.settings): + self._logger.debug( + "Started notebook execution for notebook", + path=notebook_ref.notebook_source_path, + ) + notebook = await checkout.load_notebook( + notebook_ref=notebook_ref, github_client=self._github_client + ) + if notebook.sidecar.enabled is False: + self._logger.debug( + "Skipping notebook execution check for disabled notebook", + path=notebook_ref.notebook_source_path, + ) + continue + page = await self._repo_service.create_page( + checkout=checkout, + notebook=notebook, + commit_sha=check_run.head_sha, + ) + page_execution_info = ( + await self._page_service.execute_page_with_defaults( + page, + enable_retry=False, # fail quickly for CI + ) + ) + if page_execution_info.noteburst_error_message is not None: + self._logger.debug( + "Got immediate noteburst error", + path=notebook_ref.notebook_source_path, + error_message=page_execution_info.noteburst_error_message, + ) + check.report_noteburst_failure(page_execution_info) + else: + pending_pages.append(page_execution_info) + self._logger.debug( + "Noteburst result is pending", + path=notebook_ref.notebook_source_path, + ) + + await asyncio.sleep(5.0) # pause for noteburst to work + + # Poll for noteburst results + checked_page_count = 0 + while len(pending_pages) > 0: + checked_page_count += 1 + page_execution = pending_pages.popleft() + self._logger.debug( + "Polling noteburst job status", + path=page_execution.page.repository_source_path, + ) + if page_execution.noteburst_job is None: + raise RuntimeError("Noteburst job is None") + r = await self._page_service.noteburst_api.get_job( + str(page_execution.noteburst_job.job_url) + ) + if r.status_code >= 400: + # This is actually an issue with the noteburst service + # rather the notebook; consider adding that nuance to the + # GitHub Check + check.report_noteburst_failure(page_execution) + continue + + job = r.data + if job is None: + raise RuntimeError("Noteburst job is None") + if job.status == NoteburstJobStatus.complete: + self._logger.debug( + "Noteburst job is complete", + path=page_execution.page.repository_source_path, + ) + check.report_noteburst_completion( + page_execution=page_execution, job_result=job + ) + else: + # thow it back on the queue + self._logger.debug( + "Continuing to check noteburst job", + path=page_execution.page.repository_source_path, + ) + pending_pages.append(page_execution) + + # Once we've gone through all the pages once, pause + if checked_page_count >= len(pending_pages): + self._logger.debug( + "Pause polling of noteburst jobs", + checked_page_count=checked_page_count, + ) + await asyncio.sleep(2) + self._logger.debug( + "Pause finished", + checked_page_count=checked_page_count, + ) + checked_page_count = 0 + else: + self._logger.debug( + "Continuing to poll noteburst jobs", + pending_count=len(pending_pages), + checked_page_count=checked_page_count, + ) + + await check.submit_conclusion(github_client=self._github_client) diff --git a/src/timessquare/services/githubrepo.py b/src/timessquare/services/githubrepo.py index 95a29dc..d11c1d7 100644 --- a/src/timessquare/services/githubrepo.py +++ b/src/timessquare/services/githubrepo.py @@ -6,8 +6,6 @@ from __future__ import annotations -import asyncio -from collections import deque from pathlib import PurePosixPath from gidgethub.httpx import GitHubAPI @@ -16,16 +14,11 @@ from safir.github.models import ( GitHubBlobModel, GitHubBranchModel, - GitHubCheckRunConclusion, GitHubCheckRunModel, GitHubPullRequestModel, GitHubRepositoryModel, ) -from safir.github.webhooks import ( - GitHubCheckRunEventModel, - GitHubCheckSuiteEventModel, - GitHubPushEventModel, -) +from safir.github.webhooks import GitHubPushEventModel from safir.slack.blockkit import SlackException from structlog.stdlib import BoundLogger @@ -35,13 +28,8 @@ RepositoryNotebookModel, RepositorySettingsFile, ) -from timessquare.domain.githubcheckrun import ( - GitHubConfigsCheck, - NotebookExecutionsCheck, -) -from ..domain.page import PageExecutionInfo, PageModel -from ..storage.noteburst import NoteburstJobStatus +from ..domain.page import PageModel from .page import PageService @@ -170,11 +158,6 @@ async def sync_from_push( ) await self.sync_checkout(checkout) - async def check_pull_request( - self, pr_payload: GitHubPullRequestModel - ) -> None: - """Run a check on a pull request.""" - async def request_github_repository( self, owner: str, repo: str ) -> GitHubRepositoryModel: @@ -397,206 +380,6 @@ async def update_page( await self._page_service.update_page_and_execute(page) - async def initiate_check_runs( - self, *, payload: GitHubCheckSuiteEventModel - ) -> None: - """Create a new GitHub check runs, given a new Check Suite. - - Notes - ----- - NOTE: currently we're assuming that check suites are automatically - created when created a check run. See - https://docs.github.com/en/rest/checks/runs#create-a-check-run - """ - # Run the configurations check - config_check = await GitHubConfigsCheck.create_check_run_and_validate( - github_client=self._github_client, - repo=payload.repository, - head_sha=payload.check_suite.head_sha, - ) - await config_check.submit_conclusion(github_client=self._github_client) - - repo = payload.repository - data = await self._github_client.post( - "repos/{owner}/{repo}/check-runs", - url_vars={"owner": repo.owner.login, "repo": repo.name}, - data={ - "name": NotebookExecutionsCheck.title, - "head_sha": payload.check_suite.head_sha, - "external_id": NotebookExecutionsCheck.external_id, - }, - ) - check_run = GitHubCheckRunModel.model_validate(data) - if config_check.conclusion == GitHubCheckRunConclusion.success: - await self.run_notebook_check_run( - check_run=check_run, - repo=payload.repository, - ) - else: - # Set the notebook check run to "neutral" indicating that we're - # skipping this check. - await self._github_client.patch( - str(check_run.url), - data={"conclusion": GitHubCheckRunConclusion.neutral}, - ) - - async def create_rerequested_check_run( - self, *, payload: GitHubCheckRunEventModel - ) -> None: - """Run a GitHub check run that was rerequested.""" - external_id = payload.check_run.external_id - if external_id == GitHubConfigsCheck.external_id: - config_check = await GitHubConfigsCheck.validate_repo( - github_client=self._github_client, - repo=payload.repository, - head_sha=payload.check_run.head_sha, - check_run=payload.check_run, - ) - await config_check.submit_conclusion( - github_client=self._github_client - ) - elif external_id == NotebookExecutionsCheck.external_id: - await self.run_notebook_check_run( - check_run=payload.check_run, - repo=payload.repository, - ) - - async def run_notebook_check_run( # noqa: C901 PLR0912 PLR0915 - self, *, check_run: GitHubCheckRunModel, repo: GitHubRepositoryModel - ) -> None: - """Run the notebook execution check. - - This check actually creates/updates Page resources, hence it is run - at the service layer, rather than in a domain model. - """ - check = NotebookExecutionsCheck(check_run, repo) - await check.submit_in_progress(self._github_client) - self._logger.debug("Notebook executions check in progress") - - checkout = await GitHubRepositoryCheckout.create( - github_client=self._github_client, - repo=repo, - head_sha=check_run.head_sha, - ) - - # Look for any existing pages for this repo's SHA. If they already - # exist it indicates the check is being re-run, so we'll delete those - # old pages for this commit - for page in await self._page_service.get_pages_for_repo( - owner=checkout.owner_name, - name=checkout.name, - commit=check_run.head_sha, - ): - await self._page_service.soft_delete_page(page) - self._logger.debug( - "Deleted existing page for notebook check run", - page_name=page.name, - ) - - tree = await checkout.get_git_tree(self._github_client) - pending_pages: deque[PageExecutionInfo] = deque() - for notebook_ref in tree.find_notebooks(checkout.settings): - self._logger.debug( - "Started notebook execution for notebook", - path=notebook_ref.notebook_source_path, - ) - notebook = await checkout.load_notebook( - notebook_ref=notebook_ref, github_client=self._github_client - ) - if notebook.sidecar.enabled is False: - self._logger.debug( - "Skipping notebook execution check for disabled notebook", - path=notebook_ref.notebook_source_path, - ) - continue - page = await self.create_page( - checkout=checkout, - notebook=notebook, - commit_sha=check_run.head_sha, - ) - page_execution_info = ( - await self._page_service.execute_page_with_defaults( - page, - enable_retry=False, # fail quickly for CI - ) - ) - if page_execution_info.noteburst_error_message is not None: - self._logger.debug( - "Got immediate noteburst error", - path=notebook_ref.notebook_source_path, - error_message=page_execution_info.noteburst_error_message, - ) - check.report_noteburst_failure(page_execution_info) - else: - pending_pages.append(page_execution_info) - self._logger.debug( - "Noteburst result is pending", - path=notebook_ref.notebook_source_path, - ) - - await asyncio.sleep(5.0) # pause for noteburst to work - - # Poll for noteburst results - checked_page_count = 0 - while len(pending_pages) > 0: - checked_page_count += 1 - page_execution = pending_pages.popleft() - self._logger.debug( - "Polling noteburst job status", - path=page_execution.page.repository_source_path, - ) - if page_execution.noteburst_job is None: - raise RuntimeError("Noteburst job is None") - r = await self._page_service.noteburst_api.get_job( - str(page_execution.noteburst_job.job_url) - ) - if r.status_code >= 400: - # This is actually an issue with the noteburst service - # rather the notebook; consider adding that nuance to the - # GitHub Check - check.report_noteburst_failure(page_execution) - continue - - job = r.data - if job is None: - raise RuntimeError("Noteburst job is None") - if job.status == NoteburstJobStatus.complete: - self._logger.debug( - "Noteburst job is complete", - path=page_execution.page.repository_source_path, - ) - check.report_noteburst_completion( - page_execution=page_execution, job_result=job - ) - else: - # thow it back on the queue - self._logger.debug( - "Continuing to check noteburst job", - path=page_execution.page.repository_source_path, - ) - pending_pages.append(page_execution) - - # Once we've gone through all the pages once, pause - if checked_page_count >= len(pending_pages): - self._logger.debug( - "Pause polling of noteburst jobs", - checked_page_count=checked_page_count, - ) - await asyncio.sleep(2) - self._logger.debug( - "Pause finished", - checked_page_count=checked_page_count, - ) - checked_page_count = 0 - else: - self._logger.debug( - "Continuing to poll noteburst jobs", - pending_count=len(pending_pages), - checked_page_count=checked_page_count, - ) - - await check.submit_conclusion(github_client=self._github_client) - async def get_check_runs( self, owner: str, repo: str, head_sha: str ) -> list[GitHubCheckRunModel]: diff --git a/src/timessquare/worker/functions/create_check_run.py b/src/timessquare/worker/functions/create_check_run.py index 0a33264..1191b1d 100644 --- a/src/timessquare/worker/functions/create_check_run.py +++ b/src/timessquare/worker/functions/create_check_run.py @@ -8,7 +8,7 @@ from safir.github.webhooks import GitHubCheckSuiteEventModel from safir.slack.blockkit import SlackCodeBlock, SlackMessage, SlackTextField -from timessquare.worker.servicefactory import create_github_repo_service +from timessquare.worker.servicefactory import create_github_check_run_service async def create_check_run( @@ -28,7 +28,7 @@ async def create_check_run( try: async for db_session in db_session_dependency(): - github_repo_service = await create_github_repo_service( + github_repo_service = await create_github_check_run_service( http_client=ctx["http_client"], logger=logger, installation_id=payload.installation.id, diff --git a/src/timessquare/worker/functions/create_rerequested_check_run.py b/src/timessquare/worker/functions/create_rerequested_check_run.py index 682099e..452a88d 100644 --- a/src/timessquare/worker/functions/create_rerequested_check_run.py +++ b/src/timessquare/worker/functions/create_rerequested_check_run.py @@ -8,7 +8,7 @@ from safir.github.webhooks import GitHubCheckRunEventModel from safir.slack.blockkit import SlackCodeBlock, SlackMessage, SlackTextField -from timessquare.worker.servicefactory import create_github_repo_service +from timessquare.worker.servicefactory import create_github_check_run_service async def create_rerequested_check_run( @@ -29,7 +29,7 @@ async def create_rerequested_check_run( try: async for db_session in db_session_dependency(): - github_repo_service = await create_github_repo_service( + github_repo_service = await create_github_check_run_service( http_client=ctx["http_client"], logger=logger, installation_id=payload.installation.id, diff --git a/src/timessquare/worker/functions/pull_request_sync.py b/src/timessquare/worker/functions/pull_request_sync.py index 44e6471..7e28404 100644 --- a/src/timessquare/worker/functions/pull_request_sync.py +++ b/src/timessquare/worker/functions/pull_request_sync.py @@ -8,7 +8,7 @@ from safir.github.webhooks import GitHubPullRequestEventModel from safir.slack.blockkit import SlackCodeBlock, SlackMessage, SlackTextField -from timessquare.worker.servicefactory import create_github_repo_service +from timessquare.worker.servicefactory import create_github_check_run_service async def pull_request_sync( @@ -31,7 +31,7 @@ async def pull_request_sync( try: async for db_session in db_session_dependency(): - github_repo_service = await create_github_repo_service( + github_repo_service = await create_github_check_run_service( http_client=ctx["http_client"], logger=logger, installation_id=payload.installation.id, diff --git a/src/timessquare/worker/servicefactory.py b/src/timessquare/worker/servicefactory.py index c1d3a3a..b910990 100644 --- a/src/timessquare/worker/servicefactory.py +++ b/src/timessquare/worker/servicefactory.py @@ -12,12 +12,54 @@ from timessquare.config import config from timessquare.dependencies.redis import redis_dependency from timessquare.services.backgroundpage import BackgroundPageService +from timessquare.services.githubcheckrun import GitHubCheckRunService from timessquare.services.githubrepo import GitHubRepoService from timessquare.storage.nbhtmlcache import NbHtmlCacheStore from timessquare.storage.noteburstjobstore import NoteburstJobStore from timessquare.storage.page import PageStore +async def create_github_check_run_service( + *, + http_client: httpx.AsyncClient, + db_session: async_scoped_session, + logger: BoundLogger, + installation_id: int, +) -> GitHubCheckRunService: + """Create a GitHubRepoService for arq tasks.""" + if not config.github_app_id or not config.github_app_private_key: + raise SlackException( + "github_app_id and github_app_private_key must be set to " + "create the GitHubRepoService." + ) + github_client_factory = GitHubAppClientFactory( + http_client=http_client, + id=config.github_app_id, + key=config.github_app_private_key, + name="lsst-sqre/times-square", + ) + github_client = await github_client_factory.create_installation_client( + installation_id=installation_id + ) + + page_service = await create_page_service( + http_client=http_client, logger=logger, db_session=db_session + ) + repo_service = await create_github_repo_service( + http_client=http_client, + db_session=db_session, + logger=logger, + installation_id=installation_id, + ) + return GitHubCheckRunService( + http_client=http_client, + github_client=github_client, + repo_service=repo_service, + page_service=page_service, + logger=logger, + ) + + async def create_github_repo_service( *, http_client: httpx.AsyncClient, From a9dc7b611e24a41f9a909d7f56e1c00c19b13a88 Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Wed, 4 Sep 2024 12:18:44 -0400 Subject: [PATCH 2/5] Support YAML errors for check runs If the times-square.yaml file or the sidecar YAML files can't be parsed, we now format a check run annotation specifically to give feedback about that. --- changelog.d/20240904_102527_jsick_DM_46095.md | 4 +- src/timessquare/domain/githubcheckrun.py | 46 ++++++++++++++++++- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/changelog.d/20240904_102527_jsick_DM_46095.md b/changelog.d/20240904_102527_jsick_DM_46095.md index c35d6fc..435d1fb 100644 --- a/changelog.d/20240904_102527_jsick_DM_46095.md +++ b/changelog.d/20240904_102527_jsick_DM_46095.md @@ -6,7 +6,9 @@ ### New features -- +- Improved feedback in GitHub Check Runs: + + - If a YAML file has bad syntax causing a parsing error, we now post the YAML error as a check run annotation — including the location of the error in the file. ### Bug fixes diff --git a/src/timessquare/domain/githubcheckrun.py b/src/timessquare/domain/githubcheckrun.py index b582c3d..4ca686d 100644 --- a/src/timessquare/domain/githubcheckrun.py +++ b/src/timessquare/domain/githubcheckrun.py @@ -18,6 +18,7 @@ GitHubCheckRunStatus, GitHubRepositoryModel, ) +from yaml import YAMLError from timessquare.config import config @@ -51,6 +52,7 @@ class Annotation: def from_validation_error( cls, path: str, error: ValidationError ) -> list[Annotation]: + """Create a list of annotations from a Pydantic validation error.""" annotations: list[Annotation] = [] for item in error.errors(): title = cls._format_title_for_pydantic_item(item["loc"]) @@ -65,6 +67,34 @@ def from_validation_error( ) return annotations + @classmethod + def from_yaml_error(cls, path: str, error: YAMLError) -> list[Annotation]: + """Create a list of annotations from a YAML syntax error.""" + if hasattr(error, "problem_mark"): + # The YAML syntax error has a problem mark pointing to the + # location of the error. + start_line = error.problem_mark.line + 1 + column = error.problem_mark.column + 1 + return [ + Annotation( + path=path, + start_line=start_line, + message=str(error), + title=f"YAML syntax error ({start_line}:{column})", + annotation_level=GitHubCheckRunAnnotationLevel.failure, + ) + ] + return [ + # The exact location is unknown + Annotation( + path=path, + start_line=1, + message=str(error), + title="YAML syntax error", + annotation_level=GitHubCheckRunAnnotationLevel.failure, + ) + ] + @staticmethod def _format_title_for_pydantic_item(locations: Sequence[str | int]) -> str: title_elements: list[str] = [] @@ -234,7 +264,7 @@ async def create_check_run_and_validate( "external_id": cls.external_id, }, ) - check_run = GitHubCheckRunModel.parse_obj(data) + check_run = GitHubCheckRunModel.model_validate(data) return await cls.validate_repo( check_run=check_run, github_client=github_client, @@ -258,6 +288,7 @@ async def validate_repo( check = cls(check_run, repo) await check.submit_in_progress(github_client) + # Check out the repository and validate the times-square.yaml file. try: checkout = await GitHubRepositoryCheckout.create( github_client=github_client, @@ -270,6 +301,12 @@ async def validate_repo( ) check.annotations.extend(annotations) return check + except YAMLError as e: + annotations = Annotation.from_yaml_error( + path="times-square.yaml", error=e + ) + check.annotations.extend(annotations) + return check # Validate each notebook yaml file tree = await checkout.get_git_tree(github_client) @@ -303,7 +340,7 @@ async def validate_sidecar( repo.blobs_url, url_vars={"sha": notebook_ref.sidecar_git_tree_sha}, ) - sidecar_blob = GitHubBlobModel.parse_obj(data) + sidecar_blob = GitHubBlobModel.model_validate(data) try: NotebookSidecarFile.parse_yaml(sidecar_blob.decode()) except ValidationError as e: @@ -311,6 +348,11 @@ async def validate_sidecar( path=notebook_ref.sidecar_path, error=e ) self.annotations.extend(annotations) + except YAMLError as e: + annotations = Annotation.from_yaml_error( + path=notebook_ref.sidecar_path, error=e + ) + self.annotations.extend(annotations) self.sidecar_files_checked.append(notebook_ref.sidecar_path) @property From a85113cec22192abf89ac9a18cbba51120bda2f4 Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Wed, 4 Sep 2024 14:45:59 -0400 Subject: [PATCH 3/5] Report check run error for bad notebook jinja If the Jinja templating in a notebook is incorrect, the PageModel.render_parameters method raises a new exception, PageJinjaError. This is captured in the notebook execution check run and turned into useful user feedback. --- changelog.d/20240904_102527_jsick_DM_46095.md | 2 ++ src/timessquare/domain/githubcheckrun.py | 20 ++++++++++++++++++- src/timessquare/domain/page.py | 15 +++++++++++--- src/timessquare/exceptions.py | 18 +++++++++++++++++ src/timessquare/services/githubcheckrun.py | 18 ++++++++++++----- 5 files changed, 64 insertions(+), 9 deletions(-) diff --git a/changelog.d/20240904_102527_jsick_DM_46095.md b/changelog.d/20240904_102527_jsick_DM_46095.md index 435d1fb..0e8e533 100644 --- a/changelog.d/20240904_102527_jsick_DM_46095.md +++ b/changelog.d/20240904_102527_jsick_DM_46095.md @@ -10,6 +10,8 @@ - If a YAML file has bad syntax causing a parsing error, we now post the YAML error as a check run annotation — including the location of the error in the file. + - If the notebook has incorrect Jinja templating in its Markdown cells, we now post a check run annotation with the error message and cell number. + ### Bug fixes - diff --git a/src/timessquare/domain/githubcheckrun.py b/src/timessquare/domain/githubcheckrun.py index 4ca686d..9727128 100644 --- a/src/timessquare/domain/githubcheckrun.py +++ b/src/timessquare/domain/githubcheckrun.py @@ -21,6 +21,7 @@ from yaml import YAMLError from timessquare.config import config +from timessquare.exceptions import PageJinjaError from ..storage.noteburst import NoteburstJobResponseModel, NoteburstJobStatus from .githubcheckout import ( @@ -29,7 +30,7 @@ RecursiveGitTreeModel, RepositoryNotebookTreeRef, ) -from .page import PageExecutionInfo +from .page import PageExecutionInfo, PageModel @dataclass(kw_only=True) @@ -440,6 +441,23 @@ def __init__( self.notebook_paths_checked: list[str] = [] super().__init__(check_run=check_run, repo=repo) + def report_jinja_error( + self, page: PageModel, error: PageJinjaError + ) -> None: + """Report an error rendering a Jinja template in a notebook cell.""" + path = page.repository_source_path + if path is None: + raise RuntimeError("Page execution has no notebook path") + annotation = Annotation( + path=path, + start_line=1, + message=str(error), + title="Notebook Jinja templating error", + annotation_level=GitHubCheckRunAnnotationLevel.failure, + ) + self.annotations.append(annotation) + self.notebook_paths_checked.append(path) + def report_noteburst_failure( self, page_execution: PageExecutionInfo ) -> None: diff --git a/src/timessquare/domain/page.py b/src/timessquare/domain/page.py index 4a0c0c7..70a08ee 100644 --- a/src/timessquare/domain/page.py +++ b/src/timessquare/domain/page.py @@ -19,6 +19,7 @@ from jsonschema import Draft202012Validator from timessquare.exceptions import ( + PageJinjaError, PageNotebookFormatError, PageParameterError, PageParameterValueCastingError, @@ -462,6 +463,11 @@ def render_parameters( ------- ipynb : str JSON-encoded notebook source. + + Raises + ------ + PageJinjaError + Raised if there is an error rendering the Jinja template. """ # Build Jinja render context # Turn off autoescaping to avoid escaping the parameter values @@ -474,7 +480,7 @@ def render_parameters( # Read notebook and render cell-by-cell notebook = PageModel.read_ipynb(self.ipynb) processed_first_cell = False - for cell in notebook.cells: + for cell_index, cell in enumerate(notebook.cells): if cell.cell_type == "code": if processed_first_cell is False: # Handle first code cell specially by replacing it with a @@ -486,8 +492,11 @@ def render_parameters( continue # Render the templated cell - template = jinja_env.from_string(cell.source) - cell.source = template.render() + try: + template = jinja_env.from_string(cell.source) + cell.source = template.render() + except Exception as e: + raise PageJinjaError(str(e), cell_index) from e # Modify notebook metadata to include values if "times-square" not in notebook.metadata: diff --git a/src/timessquare/exceptions.py b/src/timessquare/exceptions.py index ccf6bfc..7edfddf 100644 --- a/src/timessquare/exceptions.py +++ b/src/timessquare/exceptions.py @@ -91,6 +91,24 @@ def for_value( return cls(message, location=location, field_path=field_path) +class PageJinjaError(Exception): + """An error occurred while rendering a template in a notebook cell. + + This error is raised duirng the notebook check run. + """ + + def __init__(self, message: str, cell_index: int) -> None: + """Create an exception with a message and cell index.""" + super().__init__(message) + self.cell_index = cell_index + + def __str__(self) -> str: + return ( + "Error rendering template in " + f"cell {self.cell_index + 1}: {self.args[0]}" + ) + + class ParameterSchemaValidationError(TimesSquareClientError): """Error related to a parameter.""" diff --git a/src/timessquare/services/githubcheckrun.py b/src/timessquare/services/githubcheckrun.py index 7717e04..d13f754 100644 --- a/src/timessquare/services/githubcheckrun.py +++ b/src/timessquare/services/githubcheckrun.py @@ -24,6 +24,7 @@ GitHubConfigsCheck, NotebookExecutionsCheck, ) +from timessquare.exceptions import PageJinjaError from ..domain.page import PageExecutionInfo from ..storage.noteburst import NoteburstJobStatus @@ -179,12 +180,19 @@ async def run_notebook_check_run( # noqa: C901 PLR0912 PLR0915 notebook=notebook, commit_sha=check_run.head_sha, ) - page_execution_info = ( - await self._page_service.execute_page_with_defaults( - page, - enable_retry=False, # fail quickly for CI + try: + page_execution_info = ( + await self._page_service.execute_page_with_defaults( + page, + enable_retry=False, # fail quickly for CI + ) ) - ) + except PageJinjaError as e: + # Error rendering out the notebook's Jinja. Report in the + # and move on to the next notebook + check.report_jinja_error(page, e) + continue + if page_execution_info.noteburst_error_message is not None: self._logger.debug( "Got immediate noteburst error", From caf39f141db4a9cbe4abb9c928b9d555a28326be Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Wed, 4 Sep 2024 16:25:16 -0400 Subject: [PATCH 4/5] Update to slim-bookworm base OS --- Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 43f355c..f8ae5e2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -14,7 +14,7 @@ # - Runs a non-root user. # - Sets up the entrypoint and port. -FROM python:3.12.5-slim-bullseye as base-image +FROM python:3.12.5-slim-bookworm as base-image # Update system packages COPY scripts/install-base-packages.sh . From f0c0b89f059fece4949d1ecbaa2e1b779b8f8c74 Mon Sep 17 00:00:00 2001 From: Jonathan Sick Date: Wed, 4 Sep 2024 16:31:16 -0400 Subject: [PATCH 5/5] Prepare for 0.12.0 release --- CHANGELOG.md | 21 +++++++++++++++++++ changelog.d/20240830_164744_jsick_DM_46065.md | 20 ------------------ changelog.d/20240904_102527_jsick_DM_46095.md | 21 ------------------- 3 files changed, 21 insertions(+), 41 deletions(-) delete mode 100644 changelog.d/20240830_164744_jsick_DM_46065.md delete mode 100644 changelog.d/20240904_102527_jsick_DM_46095.md diff --git a/CHANGELOG.md b/CHANGELOG.md index c70fee4..1e188b3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,27 @@ Collect fragments into this file with: scriv collect --version X.Y.Z + + +## 0.12.0 (2024-09-04) + +### New features + +- Improved feedback in GitHub Check Runs: + + - If a YAML file has bad syntax causing a parsing error, we now post the YAML error as a check run annotation — including the location of the error in the file. + + - If the notebook has incorrect Jinja templating in its Markdown cells, we now post a check run annotation with the error message and cell number. + +### Other changes + +- Adopt uv and tox-uv for pinning and installing dependencies +- Pin the tox dependencies with a `requirements/tox.[in|txt]` file +- Adapt configuration for tox-docker version 5's randomized connection ports +- Adopt [ruff-shared.toml](https://github.com/lsst/templates/blob/main/project_templates/fastapi_safir_app/example/ruff-shared.toml) for common SQuaRE ruff configuration. + +- The GitHub Check Run service methods are now part of a new `GitHubCheckRunService` class, separate from the `GitHubRepoService`. This internal change helps clarify what functionality is needed for the GitHub Checks functionality, as opposed to syncing data with GitHub repositories. + ## 0.11.0 (2024-03-27) diff --git a/changelog.d/20240830_164744_jsick_DM_46065.md b/changelog.d/20240830_164744_jsick_DM_46065.md deleted file mode 100644 index e2e49ab..0000000 --- a/changelog.d/20240830_164744_jsick_DM_46065.md +++ /dev/null @@ -1,20 +0,0 @@ - - -### Backwards-incompatible changes - -- - -### New features - -- - -### Bug fixes - -- - -### Other changes - -- Adopt uv and tox-uv for pinning and installing dependencies -- Pin the tox dependencies with a `requirements/tox.[in|txt]` file -- Adapt configuration for tox-docker version 5's randomized connection ports -- Adopt [ruff-shared.toml](https://github.com/lsst/templates/blob/main/project_templates/fastapi_safir_app/example/ruff-shared.toml) for common SQuaRE ruff configuration. diff --git a/changelog.d/20240904_102527_jsick_DM_46095.md b/changelog.d/20240904_102527_jsick_DM_46095.md deleted file mode 100644 index 0e8e533..0000000 --- a/changelog.d/20240904_102527_jsick_DM_46095.md +++ /dev/null @@ -1,21 +0,0 @@ - - -### Backwards-incompatible changes - -- - -### New features - -- Improved feedback in GitHub Check Runs: - - - If a YAML file has bad syntax causing a parsing error, we now post the YAML error as a check run annotation — including the location of the error in the file. - - - If the notebook has incorrect Jinja templating in its Markdown cells, we now post a check run annotation with the error message and cell number. - -### Bug fixes - -- - -### Other changes - -- The GitHub Check Run service methods are now part of a new `GitHubCheckRunService` class, separate from the `GitHubRepoService`. This internal change helps clarify what functionality is needed for the GitHub Checks functionality, as opposed to syncing data with GitHub repositories.