Merge branch 'dev' into repo_info_page

cdolfi · Jan 10, 2024 · 0e6518c · 0e6518c
2 parents aef90ff + e080319
commit 0e6518c
Show file tree

Hide file tree

Showing 12 changed files with 167 additions and 54 deletions.
diff --git a/.github/workflows/spellcheck.yaml b/.github/workflows/spellcheck.yaml
@@ -14,4 +14,4 @@ jobs:
       - name: Code Checkout
         uses: actions/checkout@v2
       - name: Spellcheck
-        uses: rojopolis/spellcheck-github-actions@0.27.0
+        uses: rojopolis/spellcheck-github-actions@0.35.0
diff --git a/8Knot/pages/affiliation/visualizations/company_associated_activity.py b/8Knot/pages/affiliation/visualizations/company_associated_activity.py
@@ -78,6 +78,19 @@
                                     className="me-2",
                                     width=2,
                                 ),
+                                dbc.Col(
+                                    dbc.Checklist(
+                                        id=f"email-filter-{PAGE}-{VIZ_ID}",
+                                        options=[
+                                            {"label": "Exclude Gmail", "value": "gmail"},
+                                            {"label": "Exclude GitHub", "value": "github"},
+                                        ],
+                                        value=[""],
+                                        inline=True,
+                                        switch=True,
+                                    ),
+                                    width=4,
+                                ),
                             ],
                             align="center",
                         ),
@@ -135,11 +148,12 @@ def toggle_popover(n, is_open):
         Input(f"company-contributions-required-{PAGE}-{VIZ_ID}", "value"),
         Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "start_date"),
         Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "end_date"),
+        Input(f"email-filter-{PAGE}-{VIZ_ID}", "value"),
         Input("bot-switch", "value"),
     ],
     background=True,
 )
-def compay_associated_activity_graph(repolist, num, start_date, end_date, bot_switch):
+def compay_associated_activity_graph(repolist, num, start_date, end_date, email_filter, bot_switch):
     """Each contribution is associated with a contributor. That contributor can be associated with
 
     more than one different email. Hence each contribution is associated with all of the emails that a contributor has historically used.
@@ -177,15 +191,15 @@ def compay_associated_activity_graph(repolist, num, start_date, end_date, bot_sw
         df = df[~df["cntrb_id"].isin(app.bots_list)]
 
     # function for all data pre processing, COULD HAVE ADDITIONAL INPUTS AND OUTPUTS
-    df = process_data(df, num, start_date, end_date)
+    df = process_data(df, num, start_date, end_date, email_filter)
 
     fig = create_figure(df)
 
     logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}")
     return fig
 
 
-def process_data(df: pd.DataFrame, num, start_date, end_date):
+def process_data(df: pd.DataFrame, num, start_date, end_date, email_filter):
     # convert to datetime objects rather than strings
     df["created"] = pd.to_datetime(df["created"], utc=True)
 
@@ -224,6 +238,16 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
         .reset_index(drop=True)
     )
 
+    # remove other from set
+    df = df[df.domains != "Other"]
+
+    # removes entries with gmail or other if checked
+    if email_filter is not None:
+        if "gmail" in email_filter:
+            df = df[df.domains != "gmail.com"]
+        if "github" in email_filter:
+            df = df[df.domains != "users.noreply.github.com"]
+
     return df
 
 

diff --git a/8Knot/pages/affiliation/visualizations/company_core_contributors.py b/8Knot/pages/affiliation/visualizations/company_core_contributors.py
@@ -33,7 +33,7 @@
                         dbc.PopoverHeader("Graph Info:"),
                         dbc.PopoverBody(
                             "This graph counts the number of core contributions that COULD be linked to each company.\n\
-                            The methodology behind this is to take each associated email to someones github account\n\
+                            The methodology behind this is to take each associated email to someones GitHub account\n\
                             and link the contributions to each as it is unknown which initity the actvity was done for.\n\
                             Then the graph groups contributions by contributors and filters by contributors that are core.\n\
                             Contributions required is the amount of contributions necessary to be consider a core contributor\n\
@@ -103,6 +103,19 @@
                                     ),
                                     width="auto",
                                 ),
+                                dbc.Col(
+                                    dbc.Checklist(
+                                        id=f"email-filter-{PAGE}-{VIZ_ID}",
+                                        options=[
+                                            {"label": "Exclude Gmail", "value": "gmail"},
+                                            {"label": "Exclude GitHub", "value": "github"},
+                                        ],
+                                        value=[""],
+                                        inline=True,
+                                        switch=True,
+                                    ),
+                                    width=4,
+                                ),
                                 dbc.Col(
                                     dbc.Button(
                                         "About Graph",
@@ -146,11 +159,14 @@ def toggle_popover(n, is_open):
         Input(f"contributors-required-{PAGE}-{VIZ_ID}", "value"),
         Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "start_date"),
         Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "end_date"),
+        Input(f"email-filter-{PAGE}-{VIZ_ID}", "value"),
         Input("bot-switch", "value"),
     ],
     background=True,
 )
-def compay_associated_activity_graph(repolist, contributions, contributors, start_date, end_date, bot_switch):
+def compay_associated_activity_graph(
+    repolist, contributions, contributors, start_date, end_date, email_filter, bot_switch
+):
     # wait for data to asynchronously download and become available.
     while not_cached := cf.get_uncached(func_name=cmq.__name__, repolist=repolist):
         logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE")
@@ -175,15 +191,15 @@ def compay_associated_activity_graph(repolist, contributions, contributors, star
         df = df[~df["cntrb_id"].isin(app.bots_list)]
 
     # function for all data pre processing, COULD HAVE ADDITIONAL INPUTS AND OUTPUTS
-    df = process_data(df, contributions, contributors, start_date, end_date)
+    df = process_data(df, contributions, contributors, start_date, end_date, email_filter)
 
     fig = create_figure(df)
 
     logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}")
     return fig
 
 
-def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_date):
+def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_date, email_filter):
     # convert to datetime objects rather than strings
     df["created"] = pd.to_datetime(df["created"], utc=True)
 
@@ -229,6 +245,16 @@ def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_
         .reset_index(drop=True)
     )
 
+    # remove other from set
+    df = df[df.domains != "Other"]
+
+    # removes entries with gmail or other if checked
+    if email_filter is not None:
+        if "gmail" in email_filter:
+            df = df[df.domains != "gmail.com"]
+        if "github" in email_filter:
+            df = df[df.domains != "users.noreply.github.com"]
+
     return df
 
 

diff --git a/8Knot/pages/affiliation/visualizations/gh_company_affiliation.py b/8Knot/pages/affiliation/visualizations/gh_company_affiliation.py
@@ -24,7 +24,7 @@
         dbc.CardBody(
             [
                 html.H3(
-                    "Company Affiliation by Github Account Info",
+                    "Company Affiliation by GitHub Account Info",
                     className="card-title",
                     style={"textAlign": "center"},
                 ),
@@ -33,7 +33,7 @@
                         dbc.PopoverHeader("Graph Info:"),
                         dbc.PopoverBody(
                             """
-                            Visualizes Github account institution affiliation.\n
+                            Visualizes GitHub account institution affiliation.\n
                             Many individuals don't report an affiliated institution, but\n
                             this count may be considered an absolute lower-bound on affiliation.
                             """

diff --git a/8Knot/pages/chaoss/visualizations/contrib_importance_pie.py b/8Knot/pages/chaoss/visualizations/contrib_importance_pie.py
@@ -42,7 +42,7 @@
                                         with the remaining 4/5 representing all other contributors. By default, contributors
                                         who have 'potential-bot-filter' in their login are filtered out. Optionally, contributors
                                         can be filtered out by their logins with custom keyword(s). Note: Some commits may have a
-                                        Contributor ID of 'None' if there is no Github account is associated with the email that
+                                        Contributor ID of 'None' if there is no GitHub account is associated with the email that
                                         the contributor committed as.
                                         """
                         ),

diff --git a/8Knot/pages/contributions/visualizations/issues_over_time.py b/8Knot/pages/contributions/visualizations/issues_over_time.py
@@ -11,6 +11,8 @@
 from queries.issues_query import issues_query as iq
 import time
 import cache_manager.cache_facade as cf
+import datetime as dt
+from dateutil.relativedelta import relativedelta
 
 PAGE = "contributions"
 VIZ_ID = "issues-over-time"
@@ -71,6 +73,24 @@
                                     ),
                                     className="me-2",
                                 ),
+                            ],
+                            align="center",
+                        ),
+                        dbc.Row(
+                            [
+                                dbc.Col(
+                                    dcc.DatePickerRange(
+                                        id=f"date-picker-range-{PAGE}-{VIZ_ID}",
+                                        min_date_allowed=dt.date(2005, 1, 1),
+                                        max_date_allowed=dt.date.today(),
+                                        initial_visible_month=dt.date(dt.date.today().year, 1, 1),
+                                        start_date=dt.date(
+                                            dt.date.today().year - 2, dt.date.today().month, dt.date.today().day
+                                        ),
+                                        clearable=True,
+                                    ),
+                                    width="auto",
+                                ),
                                 dbc.Col(
                                     dbc.Button(
                                         "About Graph",
@@ -83,6 +103,7 @@
                                 ),
                             ],
                             align="center",
+                            justify="between",
                         ),
                     ]
                 ),
@@ -110,10 +131,12 @@ def toggle_popover(n, is_open):
     [
         Input("repo-choices", "data"),
         Input(f"date-interval-{PAGE}-{VIZ_ID}", "value"),
+        Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "start_date"),
+        Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "end_date"),
     ],
     background=True,
 )
-def issues_over_time_graph(repolist, interval):
+def issues_over_time_graph(repolist, interval, start_date, end_date):
     # wait for data to asynchronously download and become available.
     while not_cached := cf.get_uncached(func_name=iq.__name__, repolist=repolist):
         logging.warning(f"ISSUES OVER TIME - WAITING ON DATA TO BECOME AVAILABLE")
@@ -135,7 +158,7 @@ def issues_over_time_graph(repolist, interval):
         return nodata_graph
 
     # function for all data pre processing
-    df_created, df_closed, df_open = process_data(df, interval)
+    df_created, df_closed, df_open = process_data(df, interval, start_date, end_date)
 
     fig = create_figure(df_created, df_closed, df_open, interval)
 
@@ -144,10 +167,10 @@ def issues_over_time_graph(repolist, interval):
     return fig
 
 
-def process_data(df: pd.DataFrame, interval):
+def process_data(df: pd.DataFrame, interval, start_date, end_date):
     # convert to datetime objects rather than strings
-    df["created"] = pd.to_datetime(df["created"], utc=True)
-    df["closed"] = pd.to_datetime(df["closed"], utc=True)
+    df["created"] = pd.to_datetime(df["created"], utc=False)
+    df["closed"] = pd.to_datetime(df["closed"], utc=False)
 
     # order values chronologically by creation date
     df = df.sort_values(by="created", axis=0, ascending=True)
@@ -175,19 +198,21 @@ def process_data(df: pd.DataFrame, interval):
     df_closed = closed_range.to_frame().reset_index().rename(columns={"index": "Date"})
     df_closed["Date"] = pd.to_datetime(df_closed["Date"].astype(str).str[:period_slice])
 
-    # formatting for graph generation
-    if interval == "M":
-        df_created["Date"] = df_created["Date"].dt.strftime("%Y-%m-01")
-        df_closed["Date"] = df_closed["Date"].dt.strftime("%Y-%m-01")
-    elif interval == "Y":
-        df_created["Date"] = df_created["Date"].dt.strftime("%Y-01-01")
-        df_closed["Date"] = df_closed["Date"].dt.strftime("%Y-01-01")
-
     # first and last elements of the dataframe are the
     # earliest and latest events respectively
     earliest = df["created"].min()
     latest = max(df["created"].max(), df["closed"].max())
 
+    # filter values based on date picker, needs to be after open issue for correct counting
+    if start_date is not None:
+        df_created = df_created[df_created.Date >= start_date]
+        df_closed = df_closed[df_closed.Date >= start_date]
+        earliest = start_date
+    if end_date is not None:
+        df_created = df_created[df_created.Date <= end_date]
+        df_closed = df_closed[df_closed.Date <= end_date]
+        latest = end_date
+
     # beginning to the end of time by the specified interval
     dates = pd.date_range(start=earliest, end=latest, freq="D", inclusive="both")
 
@@ -197,6 +222,14 @@ def process_data(df: pd.DataFrame, interval):
     # aplies function to get the amount of open issues for each day
     df_open["Open"] = df_open.apply(lambda row: get_open(df, row.Date), axis=1)
 
+    # formatting for graph generation
+    if interval == "M":
+        df_created["Date"] = df_created["Date"].dt.strftime("%Y-%m-01")
+        df_closed["Date"] = df_closed["Date"].dt.strftime("%Y-%m-01")
+    elif interval == "Y":
+        df_created["Date"] = df_created["Date"].dt.strftime("%Y-01-01")
+        df_closed["Date"] = df_closed["Date"].dt.strftime("%Y-01-01")
+
     df_open["Date"] = df_open["Date"].dt.strftime("%Y-%m-%d")
 
     return df_created, df_closed, df_open
@@ -226,13 +259,13 @@ def create_figure(df_created: pd.DataFrame, df_closed: pd.DataFrame, df_open: pd
         marker=dict(color=color_seq[4]),
         name="Closed",
     )
-    fig.update_xaxes(
+    """fig.update_xaxes(
         showgrid=True,
         ticklabelmode="period",
         dtick=period,
         rangeslider_yaxis_rangemode="match",
         range=x_r,
-    )
+    )"""
     fig.update_layout(
         xaxis_title=x_name,
         yaxis_title="Number of Issues",

diff --git a/8Knot/pages/contributors/visualizations/contrib_activity_cycle.py b/8Knot/pages/contributors/visualizations/contrib_activity_cycle.py
@@ -176,13 +176,13 @@ def process_data(df: pd.DataFrame, interval):
 def create_figure(df: pd.DataFrame, interval):
     column = "Weekday"
     order = [
-        "Sunday",
         "Monday",
         "Tuesday",
         "Wednesday",
         "Thursday",
         "Friday",
         "Saturday",
+        "Sunday",
     ]
     if interval == "H":
         column = "Hour"