Skip to content

Commit

Permalink
Unify optional columns in survey and tracker
Browse files Browse the repository at this point in the history
  • Loading branch information
rantahar committed Oct 7, 2024
1 parent c78eb73 commit 9f3601d
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 9 deletions.
15 changes: 10 additions & 5 deletions niimpy/preprocessing/survey.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,8 @@ def clean_survey_column_names(df):
df : pandas.DataFrame
The DataFrame with cleaned column names.
"""
assert isinstance(df, pd.DataFrame), "Please input data as a pandas DataFrame type"

for char in ['.', ',', ':', ';', '!', '?', '(', ')', '[', ']', '{', '}']:
df.columns = df.columns.str.replace(char, "")
for char in ['-', '_', '—']:
Expand Down Expand Up @@ -194,7 +196,7 @@ def convert_survey_to_numerical_answer(df, id_map, use_prefix=False):
df[col] = df[col].map(map)
return df

def survey_statistic(df, config):
def survey_statistic(df, config=None):
'''
Return statistics for a single survey question or a list of questions.
Assuming that each of the columns contains numerical values representing
Expand All @@ -205,7 +207,7 @@ def survey_statistic(df, config):
----------
df: pandas.DataFrame
Input data frame
config: dict
config: dict, optional
Dictionary keys containing optional arguments for the computation of screen
information
Expand All @@ -222,12 +224,15 @@ def survey_statistic(df, config):
dict: pandas.DataFrame
A dataframe containing summaries of each questionaire.
'''
assert isinstance(df, pd.DataFrame), "df_u is not a pandas dataframe"
if config is None:
config = {}
assert isinstance(config, dict), "config is not a dictionary"

columns = config.get('columns', None)
prefix = config.get('prefix', None)
resample_args = config.get('resample_args', {"rule":"1D"})

assert isinstance(df, pd.DataFrame), "df is not a pandas dataframe."
if columns is not None:
assert type(columns) == str or type(columns) == list, "columns is not a string or a list of strings."
if prefix is not None:
Expand All @@ -244,7 +249,7 @@ def survey_statistic(df, config):
columns = [c for c in df.columns if c.startswith(prefix)]

if type(columns) == str:
columns = [columns]
columns = [columns]

def calculate_statistic(df):
result = {}
Expand Down Expand Up @@ -282,7 +287,7 @@ def sum_survey_scores(df, survey_prefix=None):
survey_score: pandas DataFrame
DataFrame contains the sum of each questionnaires marked with survey_prefix
"""

assert isinstance(df, pd.DataFrame), "df_u is not a pandas dataframe"
assert type(survey_prefix) == str or type(survey_prefix) == list, "survey_prefix is not a string or a list of strings."

result = pd.DataFrame(df["user"])
Expand Down
15 changes: 11 additions & 4 deletions niimpy/preprocessing/tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
group_by_columns = ["user", "device"]


def step_summary(df, config={}):
def step_summary(df, config=None):
# value_col='values', user_id=None, start_date=None, end_date=None):
"""Return the summary of step count in a time range. The summary includes the following information
of step count per day: mean, standard deviation, min, max
Expand All @@ -13,7 +13,7 @@ def step_summary(df, config={}):
----------
df : Pandas Dataframe
Dataframe containing the hourly step count of an individual. The dataframe must be date time index.
config: dict
config: dict, optional
Dictionary keys containing optional arguments. These can be:
value_col: str.
Expand All @@ -33,6 +33,9 @@ def step_summary(df, config={}):

assert 'user' in df.columns, 'User column does not exist'
assert df.index.inferred_type == 'datetime64', "Dataframe must have a datetime index"
if config is None:
config = {}
assert isinstance(config, dict), "config is not a dictionary"

value_col = config.get("value_col", "values")
user_id = config.get("user_id", None)
Expand Down Expand Up @@ -73,7 +76,7 @@ def step_summary(df, config={}):
return summary_df


def tracker_step_distribution(steps_df, config={}):
def tracker_step_distribution(steps_df, config=None):
"""Return distribution of steps within a time range.
The number of step is sampled according to the frequency rule in resample_args.
This is divided by the total number of steps in a larger time frame, given by
Expand All @@ -85,7 +88,7 @@ def tracker_step_distribution(steps_df, config={}):
----------
steps_df : Pandas Dataframe
Dataframe the step distribution of each individual.
config: dict
config: dict, optional
Dictionary keys containing optional arguments. These can be:
steps_column: str. Optional
Expand All @@ -100,6 +103,10 @@ def tracker_step_distribution(steps_df, config={}):
df: pandas DataFrame
A dataframe containing the distribution of step count.
"""
assert isinstance(steps_df, pd.DataFrame), "df_u is not a pandas dataframe"
if config is None:
config = {}
assert isinstance(config, dict), "config is not a dictionary"

steps_column = config.get("steps_column", "steps")
resample_args = config.get("resample_args", {'rule': 'h'})
Expand Down

0 comments on commit 9f3601d

Please sign in to comment.