diff --git a/niimpy/preprocessing/survey.py b/niimpy/preprocessing/survey.py index de721c8a..72970c50 100644 --- a/niimpy/preprocessing/survey.py +++ b/niimpy/preprocessing/survey.py @@ -134,6 +134,8 @@ def clean_survey_column_names(df): df : pandas.DataFrame The DataFrame with cleaned column names. """ + assert isinstance(df, pd.DataFrame), "Please input data as a pandas DataFrame type" + for char in ['.', ',', ':', ';', '!', '?', '(', ')', '[', ']', '{', '}']: df.columns = df.columns.str.replace(char, "") for char in ['-', '_', '—']: @@ -194,7 +196,7 @@ def convert_survey_to_numerical_answer(df, id_map, use_prefix=False): df[col] = df[col].map(map) return df -def survey_statistic(df, config): +def survey_statistic(df, config=None): ''' Return statistics for a single survey question or a list of questions. Assuming that each of the columns contains numerical values representing @@ -205,7 +207,7 @@ def survey_statistic(df, config): ---------- df: pandas.DataFrame Input data frame - config: dict + config: dict, optional Dictionary keys containing optional arguments for the computation of screen information @@ -222,12 +224,15 @@ def survey_statistic(df, config): dict: pandas.DataFrame A dataframe containing summaries of each questionaire. ''' + assert isinstance(df, pd.DataFrame), "df_u is not a pandas dataframe" + if config is None: + config = {} + assert isinstance(config, dict), "config is not a dictionary" columns = config.get('columns', None) prefix = config.get('prefix', None) resample_args = config.get('resample_args', {"rule":"1D"}) - assert isinstance(df, pd.DataFrame), "df is not a pandas dataframe." if columns is not None: assert type(columns) == str or type(columns) == list, "columns is not a string or a list of strings." if prefix is not None: @@ -244,7 +249,7 @@ def survey_statistic(df, config): columns = [c for c in df.columns if c.startswith(prefix)] if type(columns) == str: - columns = [columns] + columns = [columns] def calculate_statistic(df): result = {} @@ -282,7 +287,7 @@ def sum_survey_scores(df, survey_prefix=None): survey_score: pandas DataFrame DataFrame contains the sum of each questionnaires marked with survey_prefix """ - + assert isinstance(df, pd.DataFrame), "df_u is not a pandas dataframe" assert type(survey_prefix) == str or type(survey_prefix) == list, "survey_prefix is not a string or a list of strings." result = pd.DataFrame(df["user"]) diff --git a/niimpy/preprocessing/tracker.py b/niimpy/preprocessing/tracker.py index a59ca56b..1f9ba3f8 100644 --- a/niimpy/preprocessing/tracker.py +++ b/niimpy/preprocessing/tracker.py @@ -4,7 +4,7 @@ group_by_columns = ["user", "device"] -def step_summary(df, config={}): +def step_summary(df, config=None): # value_col='values', user_id=None, start_date=None, end_date=None): """Return the summary of step count in a time range. The summary includes the following information of step count per day: mean, standard deviation, min, max @@ -13,7 +13,7 @@ def step_summary(df, config={}): ---------- df : Pandas Dataframe Dataframe containing the hourly step count of an individual. The dataframe must be date time index. - config: dict + config: dict, optional Dictionary keys containing optional arguments. These can be: value_col: str. @@ -33,6 +33,9 @@ def step_summary(df, config={}): assert 'user' in df.columns, 'User column does not exist' assert df.index.inferred_type == 'datetime64', "Dataframe must have a datetime index" + if config is None: + config = {} + assert isinstance(config, dict), "config is not a dictionary" value_col = config.get("value_col", "values") user_id = config.get("user_id", None) @@ -73,7 +76,7 @@ def step_summary(df, config={}): return summary_df -def tracker_step_distribution(steps_df, config={}): +def tracker_step_distribution(steps_df, config=None): """Return distribution of steps within a time range. The number of step is sampled according to the frequency rule in resample_args. This is divided by the total number of steps in a larger time frame, given by @@ -85,7 +88,7 @@ def tracker_step_distribution(steps_df, config={}): ---------- steps_df : Pandas Dataframe Dataframe the step distribution of each individual. - config: dict + config: dict, optional Dictionary keys containing optional arguments. These can be: steps_column: str. Optional @@ -100,6 +103,10 @@ def tracker_step_distribution(steps_df, config={}): df: pandas DataFrame A dataframe containing the distribution of step count. """ + assert isinstance(steps_df, pd.DataFrame), "df_u is not a pandas dataframe" + if config is None: + config = {} + assert isinstance(config, dict), "config is not a dictionary" steps_column = config.get("steps_column", "steps") resample_args = config.get("resample_args", {'rule': 'h'})