Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Security vulnerabilities #57

Merged
merged 5 commits into from
Dec 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions metrics/app.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@
"command": "python -m metrics github",
"schedule": "@daily"
},
{
"command": "python -m metrics.github.security",
"schedule": "@daily"
},
{
"command": "python -m metrics slack tech-support",
"schedule": "@daily"
Expand Down
128 changes: 128 additions & 0 deletions metrics/github/security.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import os

import requests
import structlog
from sqlalchemy import create_engine

from ..timescaledb import TimescaleDBWriter, drop_tables
from ..timescaledb.tables import GitHubVulnerabilities
from ..timescaledb.writer import TIMESCALEDB_URL
from ..tools import dates


log = structlog.get_logger()

GITHUB_TOKEN = os.environ["GITHUB_TOKEN"]

session = requests.Session()
session.headers = {
"Authorization": f"bearer {GITHUB_TOKEN}",
"User-Agent": "Bennett Metrics Testing",
}


def make_request(query, variables):
response = session.post(
"https://api.github.com/graphql", json={"query": query, "variables": variables}
)

if not response.ok:
log.info(response.headers)
log.info(response.content)

response.raise_for_status()
return response.json()


def get_vulnerabilities(org):
query = """
query vulnerabilities($org: String!) {
organization(login: $org) {
repositories(first: 100) {
nodes {
name
archivedAt
vulnerabilityAlerts(first: 100) {
nodes {
number
createdAt
fixedAt
dismissedAt
}
pageInfo {
hasNextPage endCursor
}
}
}
}
}
}
"""
variables = {"org": org}
response = make_request(query, variables)
return response["data"]["organization"]["repositories"]["nodes"]


def date_before(date_string, target_date):
if not date_string:
return False

return dates.date_from_iso(date_string) <= target_date


def parse_vulnerabilities_for_date(vulns, repo, target_date, org):
closed_vulns = 0
open_vulns = 0
for row in vulns:
if date_before(row["fixedAt"], target_date) or date_before(
row["dismissedAt"], target_date
):
closed_vulns += 1
elif date_before(row["createdAt"], target_date):
open_vulns += 1

return {
"date": target_date,
"closed": closed_vulns,
"open": open_vulns,
"organisation": org,
"repo": repo,
}


def parse_vulnerabilities(vulnerabilities, org):
results = []

for repo in vulnerabilities:
repo_name = repo["name"]
alerts = repo["vulnerabilityAlerts"]["nodes"]

if repo["archivedAt"] or not alerts:
continue

earliest_date = dates.date_from_iso(alerts[0]["createdAt"])
latest_date = dates.date_from_iso(alerts[-1]["createdAt"])

for day in dates.iter_days(earliest_date, latest_date):
results.append(parse_vulnerabilities_for_date(alerts, repo_name, day, org))

return results


def vulnerabilities(org):
vulns = parse_vulnerabilities(get_vulnerabilities(org), org)
with TimescaleDBWriter(GitHubVulnerabilities) as writer:
for v in vulns:
date = v.pop("date")
writer.write(date, value=0, **v)


if __name__ == "__main__": # pragma: no cover
log.info("Dropping existing github_vulnerabilities table")
engine = create_engine(TIMESCALEDB_URL)
with engine.begin() as connection:
drop_tables(connection, prefix="github_vulnerabilities")
log.info("Dropped existing github_vulnerabilities table")

vulnerabilities("ebmdatalab")
vulnerabilities("opensafely-core")
13 changes: 13 additions & 0 deletions metrics/timescaledb/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,19 @@
Column("repo", Text, primary_key=True),
)


GitHubVulnerabilities = Table(
"github_vulnerabilities",
metadata,
Column("time", TIMESTAMP(timezone=True), primary_key=True),
Column("value", Integer),
Column("open", Integer),
Column("closed", Integer),
Column("organisation", Text),
Column("repo", Text, primary_key=True),
)


SlackTechSupport = Table(
"slack_tech_support",
metadata,
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ source = [
]

[tool.coverage.report]
fail_under = 70
fail_under = 76
skip_covered = true
show_missing = true

Expand Down
99 changes: 99 additions & 0 deletions tests/metrics/github/test_security.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
from datetime import date

from metrics.github import security


def fake_vulnerabilities(org):
github_response = [
{
"name": "opencodelists",
"archivedAt": None,
"vulnerabilityAlerts": {
"nodes": [
{
"number": 8,
"createdAt": "2022-02-10T01:36:54Z",
"fixedAt": None,
"dismissedAt": None,
},
{
"number": 23,
"createdAt": "2022-10-18T17:20:30Z",
"fixedAt": "2022-10-24T14:27:29Z",
"dismissedAt": None,
},
{
"number": 24,
"createdAt": "2022-10-18T21:08:22Z",
"fixedAt": "2022-11-09T13:01:04Z",
"dismissedAt": None,
},
{
"number": 25,
"createdAt": "2022-11-01T17:52:53Z",
"fixedAt": "2022-11-07T15:14:37Z",
"dismissedAt": None,
},
{
"number": 55,
"createdAt": "2023-08-30T04:44:56Z",
"fixedAt": None,
"dismissedAt": "2023-09-04T15:07:44Z",
},
{
"number": 57,
"createdAt": "2023-10-03T02:46:00Z",
"fixedAt": None,
"dismissedAt": None,
},
{
"number": 64,
"createdAt": "2023-10-26T15:02:17Z",
"fixedAt": None,
"dismissedAt": None,
},
]
},
}
]
return github_response


def test_security_number_of_alerts_today():
today = date(2023, 11, 28)

alerts = fake_vulnerabilities("test-org")[0]["vulnerabilityAlerts"]["nodes"]
result = security.parse_vulnerabilities_for_date(
alerts, "opencodelists", today, "test-org"
)

assert str(result["date"]) == "2023-11-28"
assert result["closed"] == 4
assert result["open"] == 3


def test_security_number_of_alerts_last_year():
target_date = date(2022, 11, 1)

alerts = fake_vulnerabilities("test-org")[0]["vulnerabilityAlerts"]["nodes"]
result = security.parse_vulnerabilities_for_date(
alerts, "opencodelists", target_date, "test-org"
)

assert str(result["date"]) == "2022-11-01"
assert result["closed"] == 1
assert result["open"] == 3


def test_security_parse_vulnerabilities_earliest_and_latest_date():
result = security.parse_vulnerabilities(
fake_vulnerabilities("test-org"), "test-org"
)

assert len(result) == 624
assert str(result[0]["date"]) == "2022-02-10"
assert result[0]["closed"] == 0
assert result[0]["open"] == 1
assert str(result[-1]["date"]) == "2023-10-26"
assert result[-1]["closed"] == 4
assert result[-1]["open"] == 3