Unify optional columns in survey and tracker

digitraceslab · Oct 7, 2024 · 9f3601d · 9f3601d
1 parent c78eb73
commit 9f3601d
Show file tree

Hide file tree

Showing 2 changed files with 21 additions and 9 deletions.
diff --git a/niimpy/preprocessing/survey.py b/niimpy/preprocessing/survey.py
@@ -134,6 +134,8 @@ def clean_survey_column_names(df):
         df : pandas.DataFrame
           The DataFrame with cleaned column names.
     """
+    assert isinstance(df, pd.DataFrame), "Please input data as a pandas DataFrame type"
+
     for char in ['.', ',', ':', ';', '!', '?', '(', ')', '[', ']', '{', '}']:
         df.columns = df.columns.str.replace(char, "")
     for char in ['-', '_', '—']:
@@ -194,7 +196,7 @@ def convert_survey_to_numerical_answer(df, id_map, use_prefix=False):
             df[col] = df[col].map(map)
     return df
 
-def survey_statistic(df, config):
+def survey_statistic(df, config=None):
     '''
     Return statistics for a single survey question or a list of questions.
     Assuming that each of the columns contains numerical values representing
@@ -205,7 +207,7 @@ def survey_statistic(df, config):
     ----------
     df: pandas.DataFrame
         Input data frame
-    config: dict
+    config: dict, optional
         Dictionary keys containing optional arguments for the computation of screen
         information
 
@@ -222,12 +224,15 @@ def survey_statistic(df, config):
     dict: pandas.DataFrame
         A dataframe containing summaries of each questionaire.
     '''
+    assert isinstance(df, pd.DataFrame), "df_u is not a pandas dataframe"
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), "config is not a dictionary"
 
     columns = config.get('columns', None)
     prefix = config.get('prefix', None)
     resample_args = config.get('resample_args', {"rule":"1D"})    
 
-    assert isinstance(df, pd.DataFrame), "df is not a pandas dataframe."
     if columns is not None:
         assert type(columns) == str or type(columns) == list, "columns is not a string or a list of strings."
     if prefix is not None:
@@ -244,7 +249,7 @@ def survey_statistic(df, config):
             columns = [c for c in df.columns if c.startswith(prefix)]
 
     if type(columns) == str:
-        columns = [columns] 
+        columns = [columns]
 
     def calculate_statistic(df):
         result = {}
@@ -282,7 +287,7 @@ def sum_survey_scores(df, survey_prefix=None):
     survey_score: pandas DataFrame
         DataFrame contains the sum of each questionnaires marked with survey_prefix
     """
-
+    assert isinstance(df, pd.DataFrame), "df_u is not a pandas dataframe"
     assert type(survey_prefix) == str or type(survey_prefix) == list, "survey_prefix is not a string or a list of strings."
 
     result = pd.DataFrame(df["user"])

diff --git a/niimpy/preprocessing/tracker.py b/niimpy/preprocessing/tracker.py
@@ -4,7 +4,7 @@
 group_by_columns = ["user", "device"]
 
 
-def step_summary(df, config={}):
+def step_summary(df, config=None):
     # value_col='values', user_id=None, start_date=None, end_date=None):
     """Return the summary of step count in a time range. The summary includes the following information
     of step count per day: mean, standard deviation, min, max
@@ -13,7 +13,7 @@ def step_summary(df, config={}):
     ----------
     df : Pandas Dataframe
         Dataframe containing the hourly step count of an individual. The dataframe must be date time index.
-    config: dict
+    config: dict, optional
         Dictionary keys containing optional arguments. These can be:
 
         value_col: str.
@@ -33,6 +33,9 @@ def step_summary(df, config={}):
 
     assert 'user' in df.columns, 'User column does not exist'
     assert df.index.inferred_type == 'datetime64', "Dataframe must have a datetime index"
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), "config is not a dictionary"
 
     value_col = config.get("value_col", "values")
     user_id = config.get("user_id", None)
@@ -73,7 +76,7 @@ def step_summary(df, config={}):
     return summary_df
 
 
-def tracker_step_distribution(steps_df, config={}):
+def tracker_step_distribution(steps_df, config=None):
     """Return distribution of steps within a time range.
     The number of step is sampled according to the frequency rule in resample_args.
     This is divided by the total number of steps in a larger time frame, given by
@@ -85,7 +88,7 @@ def tracker_step_distribution(steps_df, config={}):
     ----------
     steps_df : Pandas Dataframe
         Dataframe the step distribution of each individual.
-    config: dict
+    config: dict, optional
         Dictionary keys containing optional arguments. These can be:
 
         steps_column: str. Optional
@@ -100,6 +103,10 @@ def tracker_step_distribution(steps_df, config={}):
     df: pandas DataFrame
         A dataframe containing the distribution of step count.
     """
+    assert isinstance(steps_df, pd.DataFrame), "df_u is not a pandas dataframe"
+    if config is None:
+        config = {}
+    assert isinstance(config, dict), "config is not a dictionary"
 
     steps_column = config.get("steps_column", "steps")
     resample_args = config.get("resample_args", {'rule': 'h'})