diff --git a/8Knot/cache_manager/db_init.py b/8Knot/cache_manager/db_init.py
index 5cff6fb3..dd2c2a83 100644
--- a/8Knot/cache_manager/db_init.py
+++ b/8Knot/cache_manager/db_init.py
@@ -134,11 +134,11 @@ def _create_application_tables() -> None:
cur.execute(
"""
CREATE UNLOGGED TABLE IF NOT EXISTS issues_query(
- repo_id int,
+ repo_id bigint,
repo_name text,
- issue int,
- issue_number int,
- gh_issue int,
+ issue bigint,
+ issue_number bigint,
+ gh_issue bigint,
reporter_id text,
issue_closer text,
created_at text,
diff --git a/8Knot/pages/chaoss/visualizations/contrib_importance_pie.py b/8Knot/pages/chaoss/visualizations/contrib_importance_pie.py
index c2d36cca..46b70abb 100644
--- a/8Knot/pages/chaoss/visualizations/contrib_importance_pie.py
+++ b/8Knot/pages/chaoss/visualizations/contrib_importance_pie.py
@@ -35,20 +35,18 @@
dbc.PopoverHeader("Graph Info:"),
dbc.PopoverBody(
"""
- For a given action type, visualizes the proportional share of the top k anonymous
+ AKA Bus factor. For a given action type, this visualizes the proportional share of the top k anonymous
contributors, aggregating the remaining contributors as "Other". Suppose Contributor A
opens the most PRs of all contributors, accounting for 1/5 of all PRs. If k = 1,
then the chart will have one slice for Contributor A accounting for 1/5 of the area,
- with the remaining 4/5 representing all other contributors. By default, contributors
- who have 'potential-bot-filter' in their login are filtered out. Optionally, contributors
- can be filtered out by their logins with custom keyword(s). Note: Some commits may have a
+ with the remaining 4/5 representing all other contributors. Note: Some commits may have a
Contributor ID of 'None' if there is no GitHub account is associated with the email that
the contributor committed as.
"""
),
],
id=f"popover-{PAGE}-{VIZ_ID}",
- target=f"popover-target-{PAGE}-{VIZ_ID}", # needs to be the same as dbc.Button id
+ target=f"popover-target-{PAGE}-{VIZ_ID}",
placement="top",
is_open=False,
),
@@ -134,31 +132,6 @@
],
align="center",
),
- dbc.Row(
- [
- dbc.Label(
- "Filter Out Contributors with Keyword(s) in Login:",
- html_for=f"patterns-{PAGE}-{VIZ_ID}",
- width="auto",
- ),
- dbc.Col(
- [
- dmc.MultiSelect(
- id=f"patterns-{PAGE}-{VIZ_ID}",
- placeholder="Bot filter values",
- data=[
- {"value": "bot", "label": "bot"},
- ],
- classNames={"values": "dmc-multiselect-custom"},
- creatable=True,
- searchable=True,
- ),
- ],
- className="me-2",
- ),
- ],
- align="center",
- ),
dbc.Row(
[
dbc.Col(
@@ -227,14 +200,13 @@ def graph_title(k, action_type):
Input("repo-choices", "data"),
Input(f"action-type-{PAGE}-{VIZ_ID}", "value"),
Input(f"top-k-contributors-{PAGE}-{VIZ_ID}", "value"),
- Input(f"patterns-{PAGE}-{VIZ_ID}", "value"),
Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "start_date"),
Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "end_date"),
Input("bot-switch", "value"),
],
background=True,
)
-def create_top_k_cntrbs_graph(repolist, action_type, top_k, patterns, start_date, end_date, bot_switch):
+def create_top_k_cntrbs_graph(repolist, action_type, top_k, start_date, end_date, bot_switch):
# wait for data to asynchronously download and become available.
while not_cached := cf.get_uncached(func_name=ctq.__name__, repolist=repolist):
logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE")
@@ -265,7 +237,7 @@ def create_top_k_cntrbs_graph(repolist, action_type, top_k, patterns, start_date
df = df[~df["cntrb_id"].isin(app.bots_list)]
# function for all data pre processing
- df = process_data(df, action_type, top_k, patterns, start_date, end_date)
+ df = process_data(df, action_type, top_k, start_date, end_date)
fig = create_figure(df, action_type)
@@ -273,7 +245,7 @@ def create_top_k_cntrbs_graph(repolist, action_type, top_k, patterns, start_date
return fig, False
-def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end_date):
+def process_data(df: pd.DataFrame, action_type, top_k, start_date, end_date):
# convert to datetime objects rather than strings
df["created_at"] = pd.to_datetime(df["created_at"], utc=True)
@@ -289,31 +261,23 @@ def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end
# subset the df such that it only contains rows where the Action column value is the action type
df = df[df["Action"].str.contains(action_type)]
- # option to filter out potential bots
- if patterns:
- # remove rows where login column value contains any keyword in patterns
- patterns_mask = df["login"].str.contains("|".join(patterns), na=False)
- df = df[~patterns_mask]
+ # get the number of total contributions of the specific action type
+ t_sum = df.shape[0]
# count the number of contributions for each contributor
df = (df.groupby("cntrb_id")["Action"].count()).to_frame()
# sort rows according to amount of contributions from greatest to least
- df.sort_values(by="cntrb_id", ascending=False, inplace=True)
+ df.sort_values(by="Action", ascending=False, inplace=True)
+
df = df.reset_index()
# rename Action column to action_type
df = df.rename(columns={"Action": action_type})
- # get the number of total contributions
- t_sum = df[action_type].sum()
-
# index df to get first k rows
df = df.head(top_k)
- # convert cntrb_id from type UUID to String
- df["cntrb_id"] = df["cntrb_id"].apply(lambda x: str(x).split("-")[0])
-
# get the number of total top k contributions
df_sum = df[action_type].sum()
diff --git a/8Knot/pages/contributions/visualizations/pr_over_time.py b/8Knot/pages/contributions/visualizations/pr_over_time.py
index 255b896a..3dbcb813 100644
--- a/8Knot/pages/contributions/visualizations/pr_over_time.py
+++ b/8Knot/pages/contributions/visualizations/pr_over_time.py
@@ -233,7 +233,7 @@ def create_figure(
hovertemplate=hover + "
Created: %{y}
" + "",
offsetgroup=0,
marker=dict(color=color_seq[2]),
- name="created_at",
+ name="Opened",
)
fig.add_bar(
x=df_closed_merged["Date"],
diff --git a/8Knot/pages/contributors/visualizations/contrib_importance_pie.py b/8Knot/pages/contributors/visualizations/contrib_importance_pie.py
index 5952416b..40cca1a0 100644
--- a/8Knot/pages/contributors/visualizations/contrib_importance_pie.py
+++ b/8Knot/pages/contributors/visualizations/contrib_importance_pie.py
@@ -35,20 +35,18 @@
dbc.PopoverHeader("Graph Info:"),
dbc.PopoverBody(
"""
- For a given action type, visualizes the proportional share of the top k anonymous
+ AKA Bus factor. For a given action type, this visualizes the proportional share of the top k anonymous
contributors, aggregating the remaining contributors as "Other". Suppose Contributor A
opens the most PRs of all contributors, accounting for 1/5 of all PRs. If k = 1,
then the chart will have one slice for Contributor A accounting for 1/5 of the area,
- with the remaining 4/5 representing all other contributors. By default, contributors
- who have 'potential-bot-filter' in their login are filtered out. Optionally, contributors
- can be filtered out by their logins with custom keyword(s). Note: Some commits may have a
+ with the remaining 4/5 representing all other contributors. Note: Some commits may have a
Contributor ID of 'None' if there is no Github account is associated with the email that
the contributor committed as.
"""
),
],
id=f"popover-{PAGE}-{VIZ_ID}",
- target=f"popover-target-{PAGE}-{VIZ_ID}", # needs to be the same as dbc.Button id
+ target=f"popover-target-{PAGE}-{VIZ_ID}",
placement="top",
is_open=False,
),
@@ -134,31 +132,6 @@
],
align="center",
),
- dbc.Row(
- [
- dbc.Label(
- "Filter Out Contributors with Keyword(s) in Login:",
- html_for=f"patterns-{PAGE}-{VIZ_ID}",
- width="auto",
- ),
- dbc.Col(
- [
- dmc.MultiSelect(
- id=f"patterns-{PAGE}-{VIZ_ID}",
- placeholder="Bot filter values",
- data=[
- {"value": "bot", "label": "bot"},
- ],
- classNames={"values": "dmc-multiselect-custom"},
- creatable=True,
- searchable=True,
- ),
- ],
- className="me-2",
- ),
- ],
- align="center",
- ),
dbc.Row(
[
dbc.Col(
@@ -227,14 +200,13 @@ def graph_title(k, action_type):
Input("repo-choices", "data"),
Input(f"action-type-{PAGE}-{VIZ_ID}", "value"),
Input(f"top-k-contributors-{PAGE}-{VIZ_ID}", "value"),
- Input(f"patterns-{PAGE}-{VIZ_ID}", "value"),
Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "start_date"),
Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "end_date"),
Input("bot-switch", "value"),
],
background=True,
)
-def create_top_k_cntrbs_graph(repolist, action_type, top_k, patterns, start_date, end_date, bot_switch):
+def create_top_k_cntrbs_graph(repolist, action_type, top_k, start_date, end_date, bot_switch):
# wait for data to asynchronously download and become available.
while not_cached := cf.get_uncached(func_name=ctq.__name__, repolist=repolist):
logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE")
@@ -266,7 +238,7 @@ def create_top_k_cntrbs_graph(repolist, action_type, top_k, patterns, start_date
return dash.no_update, True
# function for all data pre processing
- df = process_data(df, action_type, top_k, patterns, start_date, end_date)
+ df = process_data(df, action_type, top_k, start_date, end_date)
fig = create_figure(df, action_type)
@@ -274,7 +246,7 @@ def create_top_k_cntrbs_graph(repolist, action_type, top_k, patterns, start_date
return fig, False
-def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end_date):
+def process_data(df: pd.DataFrame, action_type, top_k, start_date, end_date):
# convert to datetime objects rather than strings
df["created_at"] = pd.to_datetime(df["created_at"], utc=True)
@@ -290,17 +262,15 @@ def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end
# subset the df such that it only contains rows where the Action column value is the action type
df = df[df["Action"].str.contains(action_type)]
- # option to filter out potential bots
- if patterns:
- # remove rows where login column value contains any keyword in patterns
- patterns_mask = df["login"].str.contains("|".join(patterns), na=False)
- df = df[~patterns_mask]
+ # get the number of total contributions of the specific action type
+ t_sum = df.shape[0]
# count the number of contributions for each contributor
df = (df.groupby("cntrb_id")["Action"].count()).to_frame()
# sort rows according to amount of contributions from greatest to least
- df.sort_values(by="cntrb_id", ascending=False, inplace=True)
+ df.sort_values(by="Action", ascending=False, inplace=True)
+
df = df.reset_index()
# rename Action column to action_type
@@ -312,9 +282,6 @@ def process_data(df: pd.DataFrame, action_type, top_k, patterns, start_date, end
# index df to get first k rows
df = df.head(top_k)
- # convert cntrb_id from type UUID to String
- df["cntrb_id"] = df["cntrb_id"].apply(lambda x: str(x).split("-")[0])
-
# get the number of total top k contributions
df_sum = df[action_type].sum()
diff --git a/8Knot/pages/repo_overview/visualizations/package_version.py b/8Knot/pages/repo_overview/visualizations/package_version.py
index d4030752..6be983f9 100644
--- a/8Knot/pages/repo_overview/visualizations/package_version.py
+++ b/8Knot/pages/repo_overview/visualizations/package_version.py
@@ -114,7 +114,7 @@ def package_version_graph(repolist):
df = pd.DataFrame(df["dep_age"].value_counts().reset_index())
# graph generation
- fig = px.pie(df, names="index", values="dep_age", color_discrete_sequence=color_seq)
+ fig = px.pie(df, names="dep_age", values="count", color_discrete_sequence=color_seq)
fig.update_traces(
textposition="inside",
textinfo="percent+label",
diff --git a/8Knot/pages/visualization_template/viz_template.py b/8Knot/pages/visualization_template/viz_template.py
index 71c4d703..fd3eb093 100644
--- a/8Knot/pages/visualization_template/viz_template.py
+++ b/8Knot/pages/visualization_template/viz_template.py
@@ -13,6 +13,7 @@
import cache_manager.cache_facade as cf
from pages.utils.job_utils import nodata_graph
import time
+import app
"""
NOTE: VARIABLES TO CHANGE:
@@ -187,6 +188,7 @@ def toggle_popover(n, is_open):
[
Input("repo-choices", "data"),
Input(f"date-radio-{PAGE}-{VIZ_ID}", "value"),
+ # Input("bot-switch", "value"),
# add additional inputs here
],
background=True,
@@ -211,6 +213,13 @@ def NAME_OF_VISUALIZATION_graph(repolist, interval):
logging.warning(f"{VIZ_ID} - NO DATA AVAILABLE")
return nodata_graph
+ # uncomment if bot filter applies to viz
+ """
+ # remove bot data
+ if bot_switch:
+ df = df[~df["cntrb_id"].isin(app.bots_list)]
+ """
+
# function for all data pre processing, COULD HAVE ADDITIONAL INPUTS AND OUTPUTS
df = process_data(df, interval)
diff --git a/8Knot/queries/query_template.py b/8Knot/queries/query_template.py
index c05d5bfb..6b4e62af 100644
--- a/8Knot/queries/query_template.py
+++ b/8Knot/queries/query_template.py
@@ -39,7 +39,7 @@
def NAME_query(self, repos):
"""
(Worker Query)
- Executes SQL query against Augur database for contributor data.
+ Executes SQL query against Augur database for GitHub data.
Args:
-----