diff --git a/analytics/Makefile b/analytics/Makefile index bbca61489..2cfb02512 100644 --- a/analytics/Makefile +++ b/analytics/Makefile @@ -210,9 +210,7 @@ percent-complete: @echo "=> Running percent complete deliverable" @echo "=====================================================" $(POETRY) analytics calculate deliverable_percent_complete \ - --sprint-file $(SPRINT_FILE) \ - --roadmap-file $(ROADMAP_FILE) \ - --issue-file $(ISSUE_FILE) \ + --issue-file $(DELIVERY_FILE) \ --output-dir $(OUTPUT_DIR) \ --include-status "In Progress" \ --include-status "Planning" \ diff --git a/analytics/src/analytics/cli.py b/analytics/src/analytics/cli.py index b70950b26..8ec11b82e 100644 --- a/analytics/src/analytics/cli.py +++ b/analytics/src/analytics/cli.py @@ -9,7 +9,6 @@ from slack_sdk import WebClient from sqlalchemy import text -from analytics.datasets.deliverable_tasks import DeliverableTasks from analytics.datasets.issues import GitHubIssues from analytics.etl.github import GitHubProjectConfig, GitHubProjectETL from analytics.etl.utils import load_config @@ -165,7 +164,6 @@ def calculate_sprint_burnup( @metrics_app.command(name="deliverable_percent_complete") def calculate_deliverable_percent_complete( - sprint_file: Annotated[str, SPRINT_FILE_ARG], issue_file: Annotated[str, ISSUE_FILE_ARG], # Typer uses the Unit enum to validate user inputs from the CLI # but the default arg must be a string or the CLI will throw an error @@ -174,23 +172,10 @@ def calculate_deliverable_percent_complete( show_results: Annotated[bool, SHOW_RESULTS_ARG] = False, post_results: Annotated[bool, POST_RESULTS_ARG] = False, output_dir: Annotated[str, OUTPUT_DIR_ARG] = "data", - roadmap_file: Annotated[Optional[str], ROADMAP_FILE_ARG] = None, # noqa: UP007 include_status: Annotated[Optional[list[str]], STATUS_ARG] = None, # noqa: UP007 ) -> None: """Calculate percentage completion by deliverable.""" - if roadmap_file: - # load the input data using the new join path with roadmap data - task_data = DeliverableTasks.load_from_json_files_with_roadmap_data( - sprint_file=sprint_file, - issue_file=issue_file, - roadmap_file=roadmap_file, - ) - else: - # load the input data using the original join path without roadmap data - task_data = DeliverableTasks.load_from_json_files( - sprint_file=sprint_file, - issue_file=issue_file, - ) + task_data = GitHubIssues.from_json(issue_file) # calculate percent complete metric = DeliverablePercentComplete( dataset=task_data, diff --git a/analytics/src/analytics/datasets/issues.py b/analytics/src/analytics/datasets/issues.py index 704fa4db5..1b206941e 100644 --- a/analytics/src/analytics/datasets/issues.py +++ b/analytics/src/analytics/datasets/issues.py @@ -4,7 +4,7 @@ from enum import Enum import pandas as pd -from pydantic import BaseModel, Field +from pydantic import BaseModel, Field, computed_field from analytics.datasets.base import BaseDataset @@ -26,6 +26,13 @@ class IssueType(Enum): NONE = None +class IssueState(Enum): + """Whether the issue is open or closed.""" + + OPEN = "open" + CLOSED = "closed" + + class IssueMetadata(BaseModel): """Stores information about issue type and parent (if applicable).""" @@ -58,9 +65,19 @@ class IssueMetadata(BaseModel): # Parent metadata -- attributes about parent issues populated via lookup deliverable_url: str | None = Field(default=None) deliverable_title: str | None = Field(default=None) + deliverable_status: str | None = Field(default=None) epic_url: str | None = Field(default=None) epic_title: str | None = Field(default=None) + # See https://docs.pydantic.dev/2.0/usage/computed_fields/ + @computed_field # type: ignore[misc] + @property + def issue_state(self) -> str: + """Whether the issue is open or closed.""" + if self.issue_is_closed: + return IssueState.CLOSED.value + return IssueState.OPEN.value + # =============================================================== # Dataset class diff --git a/analytics/src/analytics/etl/github.py b/analytics/src/analytics/etl/github.py index 0cd48a845..3d5494c50 100644 --- a/analytics/src/analytics/etl/github.py +++ b/analytics/src/analytics/etl/github.py @@ -288,6 +288,7 @@ def flatten_issue_data(lookup: dict[str, IssueMetadata]) -> list[dict]: issue.deliverable_title = deliverable.issue_title issue.deliverable_url = deliverable.issue_url issue.deliverable_pillar = deliverable.deliverable_pillar + issue.deliverable_status = deliverable.issue_status # Set quad metadata issue.quad_id = deliverable.quad_id issue.quad_name = deliverable.quad_name @@ -306,7 +307,7 @@ def flatten_issue_data(lookup: dict[str, IssueMetadata]) -> list[dict]: issue.epic_url = epic.issue_url # Add the issue to the results - result.append(issue.__dict__) + result.append(issue.model_dump()) # Return the results return result diff --git a/analytics/src/analytics/integrations/github/getRoadmapData.graphql b/analytics/src/analytics/integrations/github/getRoadmapData.graphql index 866753215..5b4bd1aaf 100644 --- a/analytics/src/analytics/integrations/github/getRoadmapData.graphql +++ b/analytics/src/analytics/integrations/github/getRoadmapData.graphql @@ -27,6 +27,9 @@ query ( pillar: fieldValueByName(name: $pillarField) { ...singleSelectContent } + status: fieldValueByName(name: "Status") { + ...singleSelectContent + } } } } diff --git a/analytics/src/analytics/integrations/github/main.py b/analytics/src/analytics/integrations/github/main.py index 1cf702c7a..7a5e4aa53 100644 --- a/analytics/src/analytics/integrations/github/main.py +++ b/analytics/src/analytics/integrations/github/main.py @@ -73,6 +73,7 @@ def export_sprint_data( issue_url: .content.url, issue_parent: .content.parent.url, issue_type: .content.issueType.name, + issue_status: .status.name, issue_is_closed: .content.closed, issue_opened_at: .content.createdAt, issue_closed_at: .content.closedAt, @@ -146,6 +147,7 @@ def export_roadmap_data( issue_url: .content.url, issue_parent: .content.parent.url, issue_type: .content.issueType.name, + issue_status: .status.name, issue_is_closed: .content.closed, issue_opened_at: .content.createdAt, issue_closed_at: .content.closedAt, diff --git a/analytics/src/analytics/metrics/percent_complete.py b/analytics/src/analytics/metrics/percent_complete.py index 6064532c1..113e36f7d 100644 --- a/analytics/src/analytics/metrics/percent_complete.py +++ b/analytics/src/analytics/metrics/percent_complete.py @@ -6,25 +6,26 @@ import plotly.express as px from plotly.graph_objects import Figure -from analytics.datasets.deliverable_tasks import DeliverableTasks +from analytics.datasets.issues import GitHubIssues from analytics.metrics.base import BaseMetric, Statistic, Unit -class DeliverablePercentComplete(BaseMetric[DeliverableTasks]): +class DeliverablePercentComplete(BaseMetric[GitHubIssues]): """Calculate the percentage of issues or points completed per deliverable.""" def __init__( self, - dataset: DeliverableTasks, + dataset: GitHubIssues, unit: Unit, statuses_to_include: list[str] | None = None, ) -> None: """Initialize the DeliverablePercentComplete metric.""" self.dataset = dataset self.deliverable_col = "deliverable_title" - self.status_col = "status" + self.status_col = "issue_state" self.deliverable_status_col = "deliverable_status" self.unit = unit + self.unit_col = dataset.points_col if unit == Unit.points else unit.value self.statuses_to_include = statuses_to_include self.deliverable_data = self._isolate_deliverables_by_status() super().__init__(dataset) @@ -80,7 +81,7 @@ def get_stats(self) -> dict[str, Statistic]: """Calculate stats for this metric.""" df_src = self.deliverable_data # get the total number of issues and the number of issues with points per deliverable - is_pointed = df_src[Unit.points.value] >= 1 + is_pointed = df_src[self.dataset.points_col] >= 1 issues_total = df_src.value_counts(self.deliverable_col).to_frame() issues_pointed = ( df_src[is_pointed].value_counts(self.deliverable_col).to_frame() @@ -127,7 +128,7 @@ def _get_count_by_deliverable( """Get the count of issues (or points) by deliverable and status.""" # create local copies of the dataset and key column names df = self.deliverable_data.copy() - unit_col = self.unit.value + unit_col = self.unit_col key_cols = [self.deliverable_col, unit_col] # create a dummy column to sum per row if the unit is issues if self.unit == Unit.issues: diff --git a/analytics/tests/metrics/test_percent_complete.py b/analytics/tests/metrics/test_percent_complete.py index ea5cd0bc7..5a819706e 100644 --- a/analytics/tests/metrics/test_percent_complete.py +++ b/analytics/tests/metrics/test_percent_complete.py @@ -4,9 +4,9 @@ import pytest -from analytics.datasets.deliverable_tasks import DeliverableTasks +from analytics.datasets.issues import GitHubIssues, IssueMetadata, IssueType from analytics.metrics.percent_complete import DeliverablePercentComplete, Unit -from tests.conftest import MockSlackbot +from tests.conftest import MockSlackbot, DAY_0, DAY_1 def task_row( @@ -17,15 +17,21 @@ def task_row( status: str | None = "open", ) -> dict: """Create a sample row of the DeliverableTasks dataset.""" - return { - "deliverable_number": deliverable, - "deliverable_title": f"Deliverable {deliverable}", - "deliverable_status": deliverable_status, - "issue_number": task, - "issue_title": f"Task {task}" if task else None, - "points": points, - "status": status, - } + issue = IssueMetadata( + project_owner="HHS", + project_number=1, + issue_title=f"Task {task}", + issue_url=f"task{task}", + issue_type=IssueType.TASK.value, + issue_parent=None, + issue_points=points, + issue_is_closed=status == "closed", + issue_opened_at=DAY_0, + issue_closed_at=DAY_1 if status == "closed" else None, + deliverable_title=f"Deliverable {deliverable}", + deliverable_status=deliverable_status, + ) + return issue.model_dump() @pytest.fixture(name="percent_complete", scope="module") @@ -37,7 +43,7 @@ def sample_percent_complete() -> DeliverablePercentComplete: task_row(deliverable=1, task=2, status="closed"), task_row(deliverable=2, task=3, status="open"), ] - test_data = DeliverableTasks.from_dict(test_rows) + test_data = GitHubIssues.from_dict(test_rows) # return sprint burndown by points return DeliverablePercentComplete(test_data, unit=Unit.points) @@ -53,7 +59,7 @@ def test_percent_complete_based_on_task_count(self): task_row(deliverable=1, task=2, status="closed"), task_row(deliverable=2, task=3, status="open"), ] - test_data = DeliverableTasks.from_dict(test_rows) + test_data = GitHubIssues.from_dict(test_rows) # execution df = DeliverablePercentComplete(test_data, unit=Unit.issues).results df = df.set_index("deliverable_title") @@ -80,7 +86,7 @@ def test_percent_complete_based_on_points(self): task_row(deliverable=1, task=2, points=3, status="closed"), task_row(deliverable=2, task=3, points=5, status="open"), ] - test_data = DeliverableTasks.from_dict(test_rows) + test_data = GitHubIssues.from_dict(test_rows) # execution df = DeliverablePercentComplete(test_data, unit=Unit.points).results df = df.set_index("deliverable_title") @@ -106,7 +112,7 @@ def test_show_0_pct_for_deliverables_without_tasks(self): task_row(deliverable=1, task=2, status="closed"), task_row(deliverable=2, task=None, status=None), ] - test_data = DeliverableTasks.from_dict(test_rows) + test_data = GitHubIssues.from_dict(test_rows) # execution - use tasks as the unit df = DeliverablePercentComplete(test_data, unit=Unit.issues).results df = df.set_index("deliverable_title") @@ -132,7 +138,7 @@ def test_show_0_pct_for_deliverables_without_points(self): task_row(deliverable=1, task=2, points=2, status="closed"), task_row(deliverable=2, task=None, points=None, status=None), ] - test_data = DeliverableTasks.from_dict(test_rows) + test_data = GitHubIssues.from_dict(test_rows) # execution - use points as the unit df = DeliverablePercentComplete(test_data, unit=Unit.points).results df = df.set_index("deliverable_title") @@ -164,7 +170,7 @@ class TestFilteringReportByDeliverableStatus: def test_filter_out_deliverables_with_excluded_status(self): """The results should exclude deliverables with a status that wasn't passed.""" # setup - create test dataset - test_data = DeliverableTasks.from_dict(self.TEST_ROWS) + test_data = GitHubIssues.from_dict(self.TEST_ROWS) # execution df = DeliverablePercentComplete( test_data, @@ -180,7 +186,7 @@ def test_filter_out_deliverables_with_excluded_status(self): def test_invert_statuses_selected(self): """We should filter out the other deliverable if invert statuses selected.""" # setup - create test dataset - test_data = DeliverableTasks.from_dict(self.TEST_ROWS) + test_data = GitHubIssues.from_dict(self.TEST_ROWS) # execution df = DeliverablePercentComplete( test_data, @@ -196,7 +202,7 @@ def test_invert_statuses_selected(self): def test_list_selected_statuses_in_slack_message(self): """If we filter on status, those statuses should be listed in the slack message.""" # setup - create test dataset - test_data = DeliverableTasks.from_dict(self.TEST_ROWS) + test_data = GitHubIssues.from_dict(self.TEST_ROWS) # execution metric = DeliverablePercentComplete( test_data, @@ -211,7 +217,7 @@ def test_list_selected_statuses_in_slack_message(self): def test_stats_also_filter_out_deliverables_with_excluded_status(self): """Filtered deliverables should also be excluded from get_stats().""" # setup - create test dataset - test_data = DeliverableTasks.from_dict(self.TEST_ROWS) + test_data = GitHubIssues.from_dict(self.TEST_ROWS) # execution metric = DeliverablePercentComplete( test_data, @@ -236,7 +242,7 @@ def test_all_issues_are_pointed(self): task_row(deliverable=2, task=3, points=3, status="open"), task_row(deliverable=2, task=3, points=1, status="open"), ] - test_data = DeliverableTasks.from_dict(test_rows) + test_data = GitHubIssues.from_dict(test_rows) # execution output = DeliverablePercentComplete(test_data, unit=Unit.issues) # validation @@ -256,7 +262,7 @@ def test_some_issues_are_not_pointed(self): task_row(deliverable=2, task=3, points=3, status="open"), task_row(deliverable=2, task=3, points=None, status="open"), ] - test_data = DeliverableTasks.from_dict(test_rows) + test_data = GitHubIssues.from_dict(test_rows) # execution output = DeliverablePercentComplete(test_data, unit=Unit.issues) # validation @@ -275,7 +281,7 @@ def test_deliverables_without_tasks_have_0_pct_pointed(self): task_row(deliverable=1, task=2, points=1, status="closed"), task_row(deliverable=2, task=None, points=None, status=None), ] - test_data = DeliverableTasks.from_dict(test_rows) + test_data = GitHubIssues.from_dict(test_rows) # execution output = DeliverablePercentComplete(test_data, unit=Unit.issues) # validation @@ -295,7 +301,7 @@ def test_slack_message_contains_right_number_of_lines(self): task_row(deliverable=2, task=2, points=1, status="closed"), task_row(deliverable=3, task=3, points=3, status="open"), ] - test_data = DeliverableTasks.from_dict(test_rows) + test_data = GitHubIssues.from_dict(test_rows) # execution output = DeliverablePercentComplete(test_data, unit=Unit.issues) lines = output.format_slack_message().splitlines() @@ -309,7 +315,7 @@ def test_title_includes_issues_when_unit_is_issue(self): task_row(deliverable=1, task=1, points=2, status="open"), task_row(deliverable=2, task=2, points=1, status=None), ] - test_data = DeliverableTasks.from_dict(test_rows) + test_data = GitHubIssues.from_dict(test_rows) # execution output = DeliverablePercentComplete(test_data, unit=Unit.issues) title = output.format_slack_message().splitlines()[0] @@ -323,7 +329,7 @@ def test_title_includes_points_when_unit_is_points(self): task_row(deliverable=1, task=1, points=2, status="open"), task_row(deliverable=2, task=2, points=1, status=None), ] - test_data = DeliverableTasks.from_dict(test_rows) + test_data = GitHubIssues.from_dict(test_rows) # execution output = DeliverablePercentComplete(test_data, unit=Unit.points) title = output.format_slack_message().splitlines()[0] @@ -343,7 +349,7 @@ def test_plot_results_output_stored_in_chart_property(self): task_row(deliverable=2, task=3, points=3, status="open"), task_row(deliverable=2, task=3, points=None, status="open"), ] - test_data = DeliverableTasks.from_dict(test_rows) + test_data = GitHubIssues.from_dict(test_rows) # execution output = DeliverablePercentComplete(test_data, unit=Unit.issues) # validation - check that the chart attribute matches output of plot_results() diff --git a/analytics/tests/test_cli.py b/analytics/tests/test_cli.py index 0c323eb93..8c230aaae 100644 --- a/analytics/tests/test_cli.py +++ b/analytics/tests/test_cli.py @@ -40,8 +40,8 @@ def test_file_fixtures(tmp_path: Path) -> MockFiles: json_issue_row(issue=2, labels=["deliverable: 30k ft"]), ] delivery_data = [ - issue(issue=1).__dict__, - issue(issue=2).__dict__, + issue(issue=1).model_dump(), + issue(issue=2).model_dump(), ] # write test data to json files write_test_data_to_file(issue_data, issue_file) @@ -217,10 +217,8 @@ def test_calculate_deliverable_percent_complete(self, mock_files: MockFiles): command = [ "calculate", "deliverable_percent_complete", - "--sprint-file", - str(mock_files.sprint_file), "--issue-file", - str(mock_files.issue_file), + str(mock_files.delivery_file), ] # execution result = runner.invoke(app, command) @@ -238,10 +236,8 @@ def test_stdout_message_includes_points_if_no_unit_is_set( command = [ "calculate", "deliverable_percent_complete", - "--sprint-file", - str(mock_files.sprint_file), "--issue-file", - str(mock_files.issue_file), + str(mock_files.delivery_file), "--show-results", ] # execution @@ -262,10 +258,8 @@ def test_stdout_message_includes_issues_if_unit_set_to_issues( command = [ "calculate", "deliverable_percent_complete", - "--sprint-file", - str(mock_files.sprint_file), "--issue-file", - str(mock_files.issue_file), + str(mock_files.delivery_file), "--unit", "issues", "--show-results",