Skip to content

Commit

Permalink
SFR-2188: Removed Metrics_Type Column + Updated File Names (#400)
Browse files Browse the repository at this point in the history
* Removed metrics type column

* Removed comment
  • Loading branch information
fatimarahman authored Oct 16, 2024
1 parent 49c98e4 commit 2f35903
Show file tree
Hide file tree
Showing 8 changed files with 23 additions and 26 deletions.
4 changes: 2 additions & 2 deletions analytics/upress_reporting/counter_5_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,12 @@ def create_reports(self):

view_data_poller = InteractionEventPoller(date_range=self.reporting_period,
reporting_data=df,
file_id_regex=r"REST.GET.OBJECT manifests/(.*?json)\s",
file_id_regex=VIEW_FILE_ID_REGEX,
bucket_name=self.view_bucket,
interaction_type=InteractionType.VIEW)
download_data_poller = InteractionEventPoller(date_range=self.reporting_period,
reporting_data=df,
file_id_regex=r"REST.GET.OBJECT (.+pdf\s)",
file_id_regex=DOWNLOAD_FILE_ID_REGEX,
bucket_name=self.download_bucket,
interaction_type=InteractionType.DOWNLOAD)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,5 +25,4 @@ class InteractionEvent():
publication_year: Optional[str]
disciplines: Optional[str]
usage_type: str
interaction_type: Optional[str]
timestamp: Optional[str]
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,6 @@ def _match_log_info_with_drb_data(self, log_object) -> InteractionEvent | None:
publication_year=match_data["publication_year"],
disciplines=match_data["disciplines"],
usage_type=match_data["usage_type"],
interaction_type=self.interaction_type.value,
timestamp=match_time[0]
)

Expand Down
19 changes: 9 additions & 10 deletions analytics/upress_reporting/models/reports/counter_5_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,9 @@ def aggregate_interaction_events(self, events, reporting_data):
"Publication Year",
"Disciplines",
"Usage Type",
"Metric Type",
"Timestamp"
]

interaction_type = events[0].interaction_type
accessed_titles_df = self._create_events_df(events, columns)
accessed_titles_df["Timestamp"] = accessed_titles_df["Timestamp"].apply(
self._reformat_timestamp_data)
Expand All @@ -64,7 +62,7 @@ def aggregate_interaction_events(self, events, reporting_data):

zeroed_out_titles_df = self._format_zeroed_out_titles(
df=reporting_data, columns=columns,
monthly_columns=monthly_columns, interaction_type=interaction_type)
monthly_columns=monthly_columns)

merged_df = pandas.concat(
[accessed_titles_df, zeroed_out_titles_df], ignore_index=True)
Expand All @@ -87,11 +85,9 @@ def aggregate_interaction_events_by_country(self, events, reporting_data):
"Publication Year",
"Disciplines",
"Usage Type",
"Metric Type",
"Timestamp"
]

interaction_type = events[0].interaction_type
accessed_titles_df = self._create_events_df(events=events,
columns=columns,
include_country=True)
Expand All @@ -117,7 +113,7 @@ def aggregate_interaction_events_by_country(self, events, reporting_data):

zeroed_out_titles_df = self._format_zeroed_out_titles(
df=reporting_data, columns=columns,
monthly_columns=monthly_columns, interaction_type=interaction_type,
monthly_columns=monthly_columns,
include_country=True)

accessed_titles_df.loc[:,
Expand All @@ -133,7 +129,7 @@ def aggregate_interaction_events_by_country(self, events, reporting_data):

return (merged_df.columns.tolist(), merged_df.to_dict(orient="records"))

def build_header(self, report_name, report_description):
def build_header(self, report_name, report_description, metric_type):
"""TODO: Add further Record.source mappings to publishers as we advance
in project (ex. University of Louisiana, Lafayette)"""
publisher_mappings = {
Expand All @@ -144,13 +140,17 @@ def build_header(self, report_name, report_description):
"Report_ID": self.generate_report_id(),
"Report_Description": report_description,
"Publisher_Name": publisher_mappings.get(self.publisher, ""),
"Metric_Type": metric_type,
"Reporting_Period": self._format_reporting_period_to_string(),
"Created": self.created,
"Created_By": "NYPL",
}

def write_to_csv(self, file_name, header, column_names, data):
with open(file_name, 'w') as csv_file:
if "/" in file_name:
file_name = file_name.replace("/ ", "(") + ")"

with open(file_name+".csv", 'w') as csv_file:
writer = csv.writer(csv_file, delimiter="|",
quoting=csv.QUOTE_NONE)
for key, value in header.items():
Expand All @@ -161,7 +161,7 @@ def write_to_csv(self, file_name, header, column_names, data):
writer.writerow(title.values())

def _format_zeroed_out_titles(self, df, columns, monthly_columns,
interaction_type, include_country=False):
include_country=False):
unaccessed_titles = df.loc[df["accessed"] == False]
recarray = unaccessed_titles.to_records()

Expand All @@ -175,7 +175,6 @@ def _format_zeroed_out_titles(self, df, columns, monthly_columns,
publication_year=title.publication_year,
disciplines=title.disciplines,
usage_type=title.usage_type,
interaction_type=interaction_type,
timestamp=None) for title in recarray]

zeroed_out_df = self._create_events_df(zeroed_out_events, columns,
Expand Down
6 changes: 3 additions & 3 deletions analytics/upress_reporting/models/reports/country_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,13 @@ def build_report(self, events, reporting_data):
print("Building country-level report...")

if len(events) > 0:
file_name = f"{self.publisher}_country_level_report_{self.created}.csv"
header = self.build_header(report_name="NYPL DRB Total Item Requests by Title by Country",
report_description="Usage of your books on NYPL's Digital Research Books by country.")
report_description="Usage of your books on NYPL's Digital Research Books by country.",
metric_type="Views + Downloads")
columns, final_data = self.aggregate_interaction_events_by_country(events,
reporting_data)

self.write_to_csv(file_name=file_name,
self.write_to_csv(file_name=header["Report_Name"],
header=header,
column_names=columns,
data=final_data)
Expand Down
6 changes: 3 additions & 3 deletions analytics/upress_reporting/models/reports/downloads.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ def build_report(self, events, reporting_data):
print("Building downloads report...")

if len(events) > 0:
file_name = f"{self.publisher}_downloads_report_{self.created}.csv"
header = self.build_header(report_name="NYPL DRB Total Item Requests by Title / Downloads",
report_description="Downloads of your books from NYPL's Digital Research Books by title.")
report_description="Downloads of your books from NYPL's Digital Research Books by title.",
metric_type="Downloads (loading of title contents)")
columns, final_data = self.aggregate_interaction_events(events, reporting_data)

self.write_to_csv(file_name=file_name,
self.write_to_csv(file_name=header["Report_Name"],
header=header,
column_names=columns,
data=final_data)
Expand Down
6 changes: 3 additions & 3 deletions analytics/upress_reporting/models/reports/total_usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ def build_report(self, events, reporting_data):
print("Building total usage report...")

if len(events) > 0:
file_name = f"{self.publisher}_total_usage_{self.created}.csv"
header = self.build_header(report_name="NYPL DRB Total Item Requests by Title",
report_description="Usage of your books on NYPL's Digital Research Books.")
report_description="Usage of your books on NYPL's Digital Research Books.",
metric_type="Views (clicks on title) + Downloads (loading of title contents)")
columns, final_data = self.aggregate_interaction_events(events, reporting_data)

self.write_to_csv(file_name=file_name,
self.write_to_csv(file_name=header["Report_Name"],
header=header,
column_names=columns,
data=final_data)
Expand Down
6 changes: 3 additions & 3 deletions analytics/upress_reporting/models/reports/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ def build_report(self, events, reporting_data):
print("Building views report...")

if len(events) > 0:
file_name = f"{self.publisher}_views_report_{self.created}.csv"
header = self.build_header(report_name="NYPL DRB Total Item Requests by Title / Views",
report_description="Views of your books from NYPL's Digital Research Books by title.")
report_description="Views of your books from NYPL's Digital Research Books by title.",
metric_type="Views (clicks on title)")
columns, final_data = self.aggregate_interaction_events(events, reporting_data)

self.write_to_csv(file_name=file_name,
self.write_to_csv(file_name=header["Report_Name"],
header=header,
column_names=columns,
data=final_data)
Expand Down

0 comments on commit 2f35903

Please sign in to comment.