Skip to content

Commit

Permalink
[Issue #2489] Updates DeliverablePercentComplete to use `GitHubIssu…
Browse files Browse the repository at this point in the history
…es` dataset (#2710)

Updates the percent complete by deliverable metric to use the new
`GitHubIssues` dataset:
- Adds a `deliverable_status` and `issue_closed` attributes to the
`GitHubIssues` dataset, needed to calculate percent complete by
deliverable
- Replaces `DeliverableTasks` with `GitHubIssues` as the dataset used to
calculate `DeliverablePercentComplete` metric
- Updates the entry point to calculate deliverable percent complete
  • Loading branch information
widal001 authored Nov 4, 2024
1 parent 96cc55e commit d69d56d
Show file tree
Hide file tree
Showing 9 changed files with 72 additions and 65 deletions.
4 changes: 1 addition & 3 deletions analytics/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,7 @@ percent-complete:
@echo "=> Running percent complete deliverable"
@echo "====================================================="
$(POETRY) analytics calculate deliverable_percent_complete \
--sprint-file $(SPRINT_FILE) \
--roadmap-file $(ROADMAP_FILE) \
--issue-file $(ISSUE_FILE) \
--issue-file $(DELIVERY_FILE) \
--output-dir $(OUTPUT_DIR) \
--include-status "In Progress" \
--include-status "Planning" \
Expand Down
17 changes: 1 addition & 16 deletions analytics/src/analytics/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from slack_sdk import WebClient
from sqlalchemy import text

from analytics.datasets.deliverable_tasks import DeliverableTasks
from analytics.datasets.issues import GitHubIssues
from analytics.etl.github import GitHubProjectConfig, GitHubProjectETL
from analytics.etl.utils import load_config
Expand Down Expand Up @@ -165,7 +164,6 @@ def calculate_sprint_burnup(

@metrics_app.command(name="deliverable_percent_complete")
def calculate_deliverable_percent_complete(
sprint_file: Annotated[str, SPRINT_FILE_ARG],
issue_file: Annotated[str, ISSUE_FILE_ARG],
# Typer uses the Unit enum to validate user inputs from the CLI
# but the default arg must be a string or the CLI will throw an error
Expand All @@ -174,23 +172,10 @@ def calculate_deliverable_percent_complete(
show_results: Annotated[bool, SHOW_RESULTS_ARG] = False,
post_results: Annotated[bool, POST_RESULTS_ARG] = False,
output_dir: Annotated[str, OUTPUT_DIR_ARG] = "data",
roadmap_file: Annotated[Optional[str], ROADMAP_FILE_ARG] = None, # noqa: UP007
include_status: Annotated[Optional[list[str]], STATUS_ARG] = None, # noqa: UP007
) -> None:
"""Calculate percentage completion by deliverable."""
if roadmap_file:
# load the input data using the new join path with roadmap data
task_data = DeliverableTasks.load_from_json_files_with_roadmap_data(
sprint_file=sprint_file,
issue_file=issue_file,
roadmap_file=roadmap_file,
)
else:
# load the input data using the original join path without roadmap data
task_data = DeliverableTasks.load_from_json_files(
sprint_file=sprint_file,
issue_file=issue_file,
)
task_data = GitHubIssues.from_json(issue_file)
# calculate percent complete
metric = DeliverablePercentComplete(
dataset=task_data,
Expand Down
19 changes: 18 additions & 1 deletion analytics/src/analytics/datasets/issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from enum import Enum

import pandas as pd
from pydantic import BaseModel, Field
from pydantic import BaseModel, Field, computed_field

from analytics.datasets.base import BaseDataset

Expand All @@ -26,6 +26,13 @@ class IssueType(Enum):
NONE = None


class IssueState(Enum):
"""Whether the issue is open or closed."""

OPEN = "open"
CLOSED = "closed"


class IssueMetadata(BaseModel):
"""Stores information about issue type and parent (if applicable)."""

Expand Down Expand Up @@ -58,9 +65,19 @@ class IssueMetadata(BaseModel):
# Parent metadata -- attributes about parent issues populated via lookup
deliverable_url: str | None = Field(default=None)
deliverable_title: str | None = Field(default=None)
deliverable_status: str | None = Field(default=None)
epic_url: str | None = Field(default=None)
epic_title: str | None = Field(default=None)

# See https://docs.pydantic.dev/2.0/usage/computed_fields/
@computed_field # type: ignore[misc]
@property
def issue_state(self) -> str:
"""Whether the issue is open or closed."""
if self.issue_is_closed:
return IssueState.CLOSED.value
return IssueState.OPEN.value


# ===============================================================
# Dataset class
Expand Down
3 changes: 2 additions & 1 deletion analytics/src/analytics/etl/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,7 @@ def flatten_issue_data(lookup: dict[str, IssueMetadata]) -> list[dict]:
issue.deliverable_title = deliverable.issue_title
issue.deliverable_url = deliverable.issue_url
issue.deliverable_pillar = deliverable.deliverable_pillar
issue.deliverable_status = deliverable.issue_status
# Set quad metadata
issue.quad_id = deliverable.quad_id
issue.quad_name = deliverable.quad_name
Expand All @@ -306,7 +307,7 @@ def flatten_issue_data(lookup: dict[str, IssueMetadata]) -> list[dict]:
issue.epic_url = epic.issue_url

# Add the issue to the results
result.append(issue.__dict__)
result.append(issue.model_dump())

# Return the results
return result
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ query (
pillar: fieldValueByName(name: $pillarField) {
...singleSelectContent
}
status: fieldValueByName(name: "Status") {
...singleSelectContent
}
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions analytics/src/analytics/integrations/github/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def export_sprint_data(
issue_url: .content.url,
issue_parent: .content.parent.url,
issue_type: .content.issueType.name,
issue_status: .status.name,
issue_is_closed: .content.closed,
issue_opened_at: .content.createdAt,
issue_closed_at: .content.closedAt,
Expand Down Expand Up @@ -146,6 +147,7 @@ def export_roadmap_data(
issue_url: .content.url,
issue_parent: .content.parent.url,
issue_type: .content.issueType.name,
issue_status: .status.name,
issue_is_closed: .content.closed,
issue_opened_at: .content.createdAt,
issue_closed_at: .content.closedAt,
Expand Down
13 changes: 7 additions & 6 deletions analytics/src/analytics/metrics/percent_complete.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,26 @@
import plotly.express as px
from plotly.graph_objects import Figure

from analytics.datasets.deliverable_tasks import DeliverableTasks
from analytics.datasets.issues import GitHubIssues
from analytics.metrics.base import BaseMetric, Statistic, Unit


class DeliverablePercentComplete(BaseMetric[DeliverableTasks]):
class DeliverablePercentComplete(BaseMetric[GitHubIssues]):
"""Calculate the percentage of issues or points completed per deliverable."""

def __init__(
self,
dataset: DeliverableTasks,
dataset: GitHubIssues,
unit: Unit,
statuses_to_include: list[str] | None = None,
) -> None:
"""Initialize the DeliverablePercentComplete metric."""
self.dataset = dataset
self.deliverable_col = "deliverable_title"
self.status_col = "status"
self.status_col = "issue_state"
self.deliverable_status_col = "deliverable_status"
self.unit = unit
self.unit_col = dataset.points_col if unit == Unit.points else unit.value
self.statuses_to_include = statuses_to_include
self.deliverable_data = self._isolate_deliverables_by_status()
super().__init__(dataset)
Expand Down Expand Up @@ -80,7 +81,7 @@ def get_stats(self) -> dict[str, Statistic]:
"""Calculate stats for this metric."""
df_src = self.deliverable_data
# get the total number of issues and the number of issues with points per deliverable
is_pointed = df_src[Unit.points.value] >= 1
is_pointed = df_src[self.dataset.points_col] >= 1
issues_total = df_src.value_counts(self.deliverable_col).to_frame()
issues_pointed = (
df_src[is_pointed].value_counts(self.deliverable_col).to_frame()
Expand Down Expand Up @@ -127,7 +128,7 @@ def _get_count_by_deliverable(
"""Get the count of issues (or points) by deliverable and status."""
# create local copies of the dataset and key column names
df = self.deliverable_data.copy()
unit_col = self.unit.value
unit_col = self.unit_col
key_cols = [self.deliverable_col, unit_col]
# create a dummy column to sum per row if the unit is issues
if self.unit == Unit.issues:
Expand Down
60 changes: 33 additions & 27 deletions analytics/tests/metrics/test_percent_complete.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

import pytest

from analytics.datasets.deliverable_tasks import DeliverableTasks
from analytics.datasets.issues import GitHubIssues, IssueMetadata, IssueType
from analytics.metrics.percent_complete import DeliverablePercentComplete, Unit
from tests.conftest import MockSlackbot
from tests.conftest import MockSlackbot, DAY_0, DAY_1


def task_row(
Expand All @@ -17,15 +17,21 @@ def task_row(
status: str | None = "open",
) -> dict:
"""Create a sample row of the DeliverableTasks dataset."""
return {
"deliverable_number": deliverable,
"deliverable_title": f"Deliverable {deliverable}",
"deliverable_status": deliverable_status,
"issue_number": task,
"issue_title": f"Task {task}" if task else None,
"points": points,
"status": status,
}
issue = IssueMetadata(
project_owner="HHS",
project_number=1,
issue_title=f"Task {task}",
issue_url=f"task{task}",
issue_type=IssueType.TASK.value,
issue_parent=None,
issue_points=points,
issue_is_closed=status == "closed",
issue_opened_at=DAY_0,
issue_closed_at=DAY_1 if status == "closed" else None,
deliverable_title=f"Deliverable {deliverable}",
deliverable_status=deliverable_status,
)
return issue.model_dump()


@pytest.fixture(name="percent_complete", scope="module")
Expand All @@ -37,7 +43,7 @@ def sample_percent_complete() -> DeliverablePercentComplete:
task_row(deliverable=1, task=2, status="closed"),
task_row(deliverable=2, task=3, status="open"),
]
test_data = DeliverableTasks.from_dict(test_rows)
test_data = GitHubIssues.from_dict(test_rows)
# return sprint burndown by points
return DeliverablePercentComplete(test_data, unit=Unit.points)

Expand All @@ -53,7 +59,7 @@ def test_percent_complete_based_on_task_count(self):
task_row(deliverable=1, task=2, status="closed"),
task_row(deliverable=2, task=3, status="open"),
]
test_data = DeliverableTasks.from_dict(test_rows)
test_data = GitHubIssues.from_dict(test_rows)
# execution
df = DeliverablePercentComplete(test_data, unit=Unit.issues).results
df = df.set_index("deliverable_title")
Expand All @@ -80,7 +86,7 @@ def test_percent_complete_based_on_points(self):
task_row(deliverable=1, task=2, points=3, status="closed"),
task_row(deliverable=2, task=3, points=5, status="open"),
]
test_data = DeliverableTasks.from_dict(test_rows)
test_data = GitHubIssues.from_dict(test_rows)
# execution
df = DeliverablePercentComplete(test_data, unit=Unit.points).results
df = df.set_index("deliverable_title")
Expand All @@ -106,7 +112,7 @@ def test_show_0_pct_for_deliverables_without_tasks(self):
task_row(deliverable=1, task=2, status="closed"),
task_row(deliverable=2, task=None, status=None),
]
test_data = DeliverableTasks.from_dict(test_rows)
test_data = GitHubIssues.from_dict(test_rows)
# execution - use tasks as the unit
df = DeliverablePercentComplete(test_data, unit=Unit.issues).results
df = df.set_index("deliverable_title")
Expand All @@ -132,7 +138,7 @@ def test_show_0_pct_for_deliverables_without_points(self):
task_row(deliverable=1, task=2, points=2, status="closed"),
task_row(deliverable=2, task=None, points=None, status=None),
]
test_data = DeliverableTasks.from_dict(test_rows)
test_data = GitHubIssues.from_dict(test_rows)
# execution - use points as the unit
df = DeliverablePercentComplete(test_data, unit=Unit.points).results
df = df.set_index("deliverable_title")
Expand Down Expand Up @@ -164,7 +170,7 @@ class TestFilteringReportByDeliverableStatus:
def test_filter_out_deliverables_with_excluded_status(self):
"""The results should exclude deliverables with a status that wasn't passed."""
# setup - create test dataset
test_data = DeliverableTasks.from_dict(self.TEST_ROWS)
test_data = GitHubIssues.from_dict(self.TEST_ROWS)
# execution
df = DeliverablePercentComplete(
test_data,
Expand All @@ -180,7 +186,7 @@ def test_filter_out_deliverables_with_excluded_status(self):
def test_invert_statuses_selected(self):
"""We should filter out the other deliverable if invert statuses selected."""
# setup - create test dataset
test_data = DeliverableTasks.from_dict(self.TEST_ROWS)
test_data = GitHubIssues.from_dict(self.TEST_ROWS)
# execution
df = DeliverablePercentComplete(
test_data,
Expand All @@ -196,7 +202,7 @@ def test_invert_statuses_selected(self):
def test_list_selected_statuses_in_slack_message(self):
"""If we filter on status, those statuses should be listed in the slack message."""
# setup - create test dataset
test_data = DeliverableTasks.from_dict(self.TEST_ROWS)
test_data = GitHubIssues.from_dict(self.TEST_ROWS)
# execution
metric = DeliverablePercentComplete(
test_data,
Expand All @@ -211,7 +217,7 @@ def test_list_selected_statuses_in_slack_message(self):
def test_stats_also_filter_out_deliverables_with_excluded_status(self):
"""Filtered deliverables should also be excluded from get_stats()."""
# setup - create test dataset
test_data = DeliverableTasks.from_dict(self.TEST_ROWS)
test_data = GitHubIssues.from_dict(self.TEST_ROWS)
# execution
metric = DeliverablePercentComplete(
test_data,
Expand All @@ -236,7 +242,7 @@ def test_all_issues_are_pointed(self):
task_row(deliverable=2, task=3, points=3, status="open"),
task_row(deliverable=2, task=3, points=1, status="open"),
]
test_data = DeliverableTasks.from_dict(test_rows)
test_data = GitHubIssues.from_dict(test_rows)
# execution
output = DeliverablePercentComplete(test_data, unit=Unit.issues)
# validation
Expand All @@ -256,7 +262,7 @@ def test_some_issues_are_not_pointed(self):
task_row(deliverable=2, task=3, points=3, status="open"),
task_row(deliverable=2, task=3, points=None, status="open"),
]
test_data = DeliverableTasks.from_dict(test_rows)
test_data = GitHubIssues.from_dict(test_rows)
# execution
output = DeliverablePercentComplete(test_data, unit=Unit.issues)
# validation
Expand All @@ -275,7 +281,7 @@ def test_deliverables_without_tasks_have_0_pct_pointed(self):
task_row(deliverable=1, task=2, points=1, status="closed"),
task_row(deliverable=2, task=None, points=None, status=None),
]
test_data = DeliverableTasks.from_dict(test_rows)
test_data = GitHubIssues.from_dict(test_rows)
# execution
output = DeliverablePercentComplete(test_data, unit=Unit.issues)
# validation
Expand All @@ -295,7 +301,7 @@ def test_slack_message_contains_right_number_of_lines(self):
task_row(deliverable=2, task=2, points=1, status="closed"),
task_row(deliverable=3, task=3, points=3, status="open"),
]
test_data = DeliverableTasks.from_dict(test_rows)
test_data = GitHubIssues.from_dict(test_rows)
# execution
output = DeliverablePercentComplete(test_data, unit=Unit.issues)
lines = output.format_slack_message().splitlines()
Expand All @@ -309,7 +315,7 @@ def test_title_includes_issues_when_unit_is_issue(self):
task_row(deliverable=1, task=1, points=2, status="open"),
task_row(deliverable=2, task=2, points=1, status=None),
]
test_data = DeliverableTasks.from_dict(test_rows)
test_data = GitHubIssues.from_dict(test_rows)
# execution
output = DeliverablePercentComplete(test_data, unit=Unit.issues)
title = output.format_slack_message().splitlines()[0]
Expand All @@ -323,7 +329,7 @@ def test_title_includes_points_when_unit_is_points(self):
task_row(deliverable=1, task=1, points=2, status="open"),
task_row(deliverable=2, task=2, points=1, status=None),
]
test_data = DeliverableTasks.from_dict(test_rows)
test_data = GitHubIssues.from_dict(test_rows)
# execution
output = DeliverablePercentComplete(test_data, unit=Unit.points)
title = output.format_slack_message().splitlines()[0]
Expand All @@ -343,7 +349,7 @@ def test_plot_results_output_stored_in_chart_property(self):
task_row(deliverable=2, task=3, points=3, status="open"),
task_row(deliverable=2, task=3, points=None, status="open"),
]
test_data = DeliverableTasks.from_dict(test_rows)
test_data = GitHubIssues.from_dict(test_rows)
# execution
output = DeliverablePercentComplete(test_data, unit=Unit.issues)
# validation - check that the chart attribute matches output of plot_results()
Expand Down
Loading

0 comments on commit d69d56d

Please sign in to comment.