-
Notifications
You must be signed in to change notification settings - Fork 3
/
verification.py
68 lines (43 loc) · 2.13 KB
/
verification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import glob
import json
import pandas as pd
def get_daily_reports():
"""Get all daily reports"""
daily_reports = []
file_names = reversed(sorted(glob.glob("data/daily_reports/2*.json")))
for file_path in file_names: # Should have us covered for a millennium
with open(file_path, "r") as fp:
daily_reports.append(json.load(fp))
return pd.DataFrame(daily_reports).sort_values(by="date", ascending=False)
def check_document_for_outliers(df: pd.DataFrame, document: pd.DataFrame) -> list:
"""Outputs dict outliers keys based on dataframe data"""
outlier = list()
for column in df.columns:
try:
standard_deviation = df[column].std()
mean = df[column].mean()
latest_value = document[column].values[0]
if mean - standard_deviation*3 > latest_value or latest_value > mean + standard_deviation*3:
print(f"{column}: {mean - standard_deviation*3} < {latest_value} > {mean + standard_deviation*3}")
outlier.append(column)
except (KeyError, TypeError) as error:
pass
except Exception as error:
print(error)
outlier.append(column)
return outlier
def verify_latest_report(invalid_outliers: set, expected_keys: list):
"""True if the latest report does not contain outliers we care about"""
daily_report_df = get_daily_reports()
past_reports_df = daily_report_df.iloc[1:365, :]
latest_report_df = daily_report_df.iloc[0:1, :]
if not all(key in latest_report_df.columns for key in expected_keys):
return False
outliers = set(check_document_for_outliers(past_reports_df, latest_report_df))
return not bool(outliers.intersection(invalid_outliers))
if __name__ == "__main__":
from create_daily_ospool_report_json import OUTLIERS_WE_CARE_ABOUT, EXPECTED_KEYS, write_document_to_file
if verify_latest_report(OUTLIERS_WE_CARE_ABOUT, EXPECTED_KEYS):
with open(sorted(glob.glob("data/daily_reports/2*.json"))[-1], "r") as fp:
latest_document = json.load(fp)
write_document_to_file(latest_document, True, True)