Skip to content

Commit

Permalink
Merge branch 'dev' into repo_info_page
Browse files Browse the repository at this point in the history
  • Loading branch information
cdolfi authored Jan 10, 2024
2 parents aef90ff + e080319 commit 0e6518c
Show file tree
Hide file tree
Showing 12 changed files with 167 additions and 54 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/spellcheck.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@ jobs:
- name: Code Checkout
uses: actions/checkout@v2
- name: Spellcheck
uses: rojopolis/spellcheck-github-actions@0.27.0
uses: rojopolis/spellcheck-github-actions@0.35.0
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,19 @@
className="me-2",
width=2,
),
dbc.Col(
dbc.Checklist(
id=f"email-filter-{PAGE}-{VIZ_ID}",
options=[
{"label": "Exclude Gmail", "value": "gmail"},
{"label": "Exclude GitHub", "value": "github"},
],
value=[""],
inline=True,
switch=True,
),
width=4,
),
],
align="center",
),
Expand Down Expand Up @@ -135,11 +148,12 @@ def toggle_popover(n, is_open):
Input(f"company-contributions-required-{PAGE}-{VIZ_ID}", "value"),
Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "start_date"),
Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "end_date"),
Input(f"email-filter-{PAGE}-{VIZ_ID}", "value"),
Input("bot-switch", "value"),
],
background=True,
)
def compay_associated_activity_graph(repolist, num, start_date, end_date, bot_switch):
def compay_associated_activity_graph(repolist, num, start_date, end_date, email_filter, bot_switch):
"""Each contribution is associated with a contributor. That contributor can be associated with
more than one different email. Hence each contribution is associated with all of the emails that a contributor has historically used.
Expand Down Expand Up @@ -177,15 +191,15 @@ def compay_associated_activity_graph(repolist, num, start_date, end_date, bot_sw
df = df[~df["cntrb_id"].isin(app.bots_list)]

# function for all data pre processing, COULD HAVE ADDITIONAL INPUTS AND OUTPUTS
df = process_data(df, num, start_date, end_date)
df = process_data(df, num, start_date, end_date, email_filter)

fig = create_figure(df)

logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}")
return fig


def process_data(df: pd.DataFrame, num, start_date, end_date):
def process_data(df: pd.DataFrame, num, start_date, end_date, email_filter):
# convert to datetime objects rather than strings
df["created"] = pd.to_datetime(df["created"], utc=True)

Expand Down Expand Up @@ -224,6 +238,16 @@ def process_data(df: pd.DataFrame, num, start_date, end_date):
.reset_index(drop=True)
)

# remove other from set
df = df[df.domains != "Other"]

# removes entries with gmail or other if checked
if email_filter is not None:
if "gmail" in email_filter:
df = df[df.domains != "gmail.com"]
if "github" in email_filter:
df = df[df.domains != "users.noreply.github.com"]

return df


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
dbc.PopoverHeader("Graph Info:"),
dbc.PopoverBody(
"This graph counts the number of core contributions that COULD be linked to each company.\n\
The methodology behind this is to take each associated email to someones github account\n\
The methodology behind this is to take each associated email to someones GitHub account\n\
and link the contributions to each as it is unknown which initity the actvity was done for.\n\
Then the graph groups contributions by contributors and filters by contributors that are core.\n\
Contributions required is the amount of contributions necessary to be consider a core contributor\n\
Expand Down Expand Up @@ -103,6 +103,19 @@
),
width="auto",
),
dbc.Col(
dbc.Checklist(
id=f"email-filter-{PAGE}-{VIZ_ID}",
options=[
{"label": "Exclude Gmail", "value": "gmail"},
{"label": "Exclude GitHub", "value": "github"},
],
value=[""],
inline=True,
switch=True,
),
width=4,
),
dbc.Col(
dbc.Button(
"About Graph",
Expand Down Expand Up @@ -146,11 +159,14 @@ def toggle_popover(n, is_open):
Input(f"contributors-required-{PAGE}-{VIZ_ID}", "value"),
Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "start_date"),
Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "end_date"),
Input(f"email-filter-{PAGE}-{VIZ_ID}", "value"),
Input("bot-switch", "value"),
],
background=True,
)
def compay_associated_activity_graph(repolist, contributions, contributors, start_date, end_date, bot_switch):
def compay_associated_activity_graph(
repolist, contributions, contributors, start_date, end_date, email_filter, bot_switch
):
# wait for data to asynchronously download and become available.
while not_cached := cf.get_uncached(func_name=cmq.__name__, repolist=repolist):
logging.warning(f"{VIZ_ID}- WAITING ON DATA TO BECOME AVAILABLE")
Expand All @@ -175,15 +191,15 @@ def compay_associated_activity_graph(repolist, contributions, contributors, star
df = df[~df["cntrb_id"].isin(app.bots_list)]

# function for all data pre processing, COULD HAVE ADDITIONAL INPUTS AND OUTPUTS
df = process_data(df, contributions, contributors, start_date, end_date)
df = process_data(df, contributions, contributors, start_date, end_date, email_filter)

fig = create_figure(df)

logging.warning(f"{VIZ_ID} - END - {time.perf_counter() - start}")
return fig


def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_date):
def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_date, email_filter):
# convert to datetime objects rather than strings
df["created"] = pd.to_datetime(df["created"], utc=True)

Expand Down Expand Up @@ -229,6 +245,16 @@ def process_data(df: pd.DataFrame, contributions, contributors, start_date, end_
.reset_index(drop=True)
)

# remove other from set
df = df[df.domains != "Other"]

# removes entries with gmail or other if checked
if email_filter is not None:
if "gmail" in email_filter:
df = df[df.domains != "gmail.com"]
if "github" in email_filter:
df = df[df.domains != "users.noreply.github.com"]

return df


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
dbc.CardBody(
[
html.H3(
"Company Affiliation by Github Account Info",
"Company Affiliation by GitHub Account Info",
className="card-title",
style={"textAlign": "center"},
),
Expand All @@ -33,7 +33,7 @@
dbc.PopoverHeader("Graph Info:"),
dbc.PopoverBody(
"""
Visualizes Github account institution affiliation.\n
Visualizes GitHub account institution affiliation.\n
Many individuals don't report an affiliated institution, but\n
this count may be considered an absolute lower-bound on affiliation.
"""
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
with the remaining 4/5 representing all other contributors. By default, contributors
who have 'potential-bot-filter' in their login are filtered out. Optionally, contributors
can be filtered out by their logins with custom keyword(s). Note: Some commits may have a
Contributor ID of 'None' if there is no Github account is associated with the email that
Contributor ID of 'None' if there is no GitHub account is associated with the email that
the contributor committed as.
"""
),
Expand Down
63 changes: 48 additions & 15 deletions 8Knot/pages/contributions/visualizations/issues_over_time.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from queries.issues_query import issues_query as iq
import time
import cache_manager.cache_facade as cf
import datetime as dt
from dateutil.relativedelta import relativedelta

PAGE = "contributions"
VIZ_ID = "issues-over-time"
Expand Down Expand Up @@ -71,6 +73,24 @@
),
className="me-2",
),
],
align="center",
),
dbc.Row(
[
dbc.Col(
dcc.DatePickerRange(
id=f"date-picker-range-{PAGE}-{VIZ_ID}",
min_date_allowed=dt.date(2005, 1, 1),
max_date_allowed=dt.date.today(),
initial_visible_month=dt.date(dt.date.today().year, 1, 1),
start_date=dt.date(
dt.date.today().year - 2, dt.date.today().month, dt.date.today().day
),
clearable=True,
),
width="auto",
),
dbc.Col(
dbc.Button(
"About Graph",
Expand All @@ -83,6 +103,7 @@
),
],
align="center",
justify="between",
),
]
),
Expand Down Expand Up @@ -110,10 +131,12 @@ def toggle_popover(n, is_open):
[
Input("repo-choices", "data"),
Input(f"date-interval-{PAGE}-{VIZ_ID}", "value"),
Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "start_date"),
Input(f"date-picker-range-{PAGE}-{VIZ_ID}", "end_date"),
],
background=True,
)
def issues_over_time_graph(repolist, interval):
def issues_over_time_graph(repolist, interval, start_date, end_date):
# wait for data to asynchronously download and become available.
while not_cached := cf.get_uncached(func_name=iq.__name__, repolist=repolist):
logging.warning(f"ISSUES OVER TIME - WAITING ON DATA TO BECOME AVAILABLE")
Expand All @@ -135,7 +158,7 @@ def issues_over_time_graph(repolist, interval):
return nodata_graph

# function for all data pre processing
df_created, df_closed, df_open = process_data(df, interval)
df_created, df_closed, df_open = process_data(df, interval, start_date, end_date)

fig = create_figure(df_created, df_closed, df_open, interval)

Expand All @@ -144,10 +167,10 @@ def issues_over_time_graph(repolist, interval):
return fig


def process_data(df: pd.DataFrame, interval):
def process_data(df: pd.DataFrame, interval, start_date, end_date):
# convert to datetime objects rather than strings
df["created"] = pd.to_datetime(df["created"], utc=True)
df["closed"] = pd.to_datetime(df["closed"], utc=True)
df["created"] = pd.to_datetime(df["created"], utc=False)
df["closed"] = pd.to_datetime(df["closed"], utc=False)

# order values chronologically by creation date
df = df.sort_values(by="created", axis=0, ascending=True)
Expand Down Expand Up @@ -175,19 +198,21 @@ def process_data(df: pd.DataFrame, interval):
df_closed = closed_range.to_frame().reset_index().rename(columns={"index": "Date"})
df_closed["Date"] = pd.to_datetime(df_closed["Date"].astype(str).str[:period_slice])

# formatting for graph generation
if interval == "M":
df_created["Date"] = df_created["Date"].dt.strftime("%Y-%m-01")
df_closed["Date"] = df_closed["Date"].dt.strftime("%Y-%m-01")
elif interval == "Y":
df_created["Date"] = df_created["Date"].dt.strftime("%Y-01-01")
df_closed["Date"] = df_closed["Date"].dt.strftime("%Y-01-01")

# first and last elements of the dataframe are the
# earliest and latest events respectively
earliest = df["created"].min()
latest = max(df["created"].max(), df["closed"].max())

# filter values based on date picker, needs to be after open issue for correct counting
if start_date is not None:
df_created = df_created[df_created.Date >= start_date]
df_closed = df_closed[df_closed.Date >= start_date]
earliest = start_date
if end_date is not None:
df_created = df_created[df_created.Date <= end_date]
df_closed = df_closed[df_closed.Date <= end_date]
latest = end_date

# beginning to the end of time by the specified interval
dates = pd.date_range(start=earliest, end=latest, freq="D", inclusive="both")

Expand All @@ -197,6 +222,14 @@ def process_data(df: pd.DataFrame, interval):
# aplies function to get the amount of open issues for each day
df_open["Open"] = df_open.apply(lambda row: get_open(df, row.Date), axis=1)

# formatting for graph generation
if interval == "M":
df_created["Date"] = df_created["Date"].dt.strftime("%Y-%m-01")
df_closed["Date"] = df_closed["Date"].dt.strftime("%Y-%m-01")
elif interval == "Y":
df_created["Date"] = df_created["Date"].dt.strftime("%Y-01-01")
df_closed["Date"] = df_closed["Date"].dt.strftime("%Y-01-01")

df_open["Date"] = df_open["Date"].dt.strftime("%Y-%m-%d")

return df_created, df_closed, df_open
Expand Down Expand Up @@ -226,13 +259,13 @@ def create_figure(df_created: pd.DataFrame, df_closed: pd.DataFrame, df_open: pd
marker=dict(color=color_seq[4]),
name="Closed",
)
fig.update_xaxes(
"""fig.update_xaxes(
showgrid=True,
ticklabelmode="period",
dtick=period,
rangeslider_yaxis_rangemode="match",
range=x_r,
)
)"""
fig.update_layout(
xaxis_title=x_name,
yaxis_title="Number of Issues",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -176,13 +176,13 @@ def process_data(df: pd.DataFrame, interval):
def create_figure(df: pd.DataFrame, interval):
column = "Weekday"
order = [
"Sunday",
"Monday",
"Tuesday",
"Wednesday",
"Thursday",
"Friday",
"Saturday",
"Sunday",
]
if interval == "H":
column = "Hour"
Expand Down
Loading

0 comments on commit 0e6518c

Please sign in to comment.