Skip to content

Commit

Permalink
Move reading.read.read_csv and reading.read.read_sqlite to their own …
Browse files Browse the repository at this point in the history
…files
  • Loading branch information
rantahar committed Jul 9, 2024
1 parent 3d9191a commit 125171e
Show file tree
Hide file tree
Showing 10 changed files with 220 additions and 221 deletions.
4 changes: 2 additions & 2 deletions niimpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

from niimpy.reading.database import open, Data1, ALL
from niimpy.preprocessing.filter import filter_dataframe
from niimpy.reading.read import read_sqlite, read_sqlite_tables
from niimpy.reading.read import read_csv, read_csv_string
from niimpy.reading.sqlite import read_sqlite, read_sqlite_tables
from niimpy.reading.csv import read_csv, read_csv_string
from niimpy.preprocessing import sampledata
from niimpy.preprocessing import util

Expand Down
37 changes: 36 additions & 1 deletion niimpy/preprocessing/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,42 @@ def unlink_if_exists(x):
unlink_if_exists(SQLITE3_EXTENSIONS_FILENAME)


#TODO: reanme to data.py
def read_preprocess(df, add_group=None):
"""Standard preprocessing arguments when reading.
This is a preprocessing filter which handles some standard arguments
when reading files. This should be considered a private, unstable
function.
Parameters
----------
df: pandas.DataFrame
Input data frame
add_group: string, optional
If given, add a new 'group' column with all values set to this
given identifier.
Returns
-------
df: dataframe
Resulting dataframe (modified in-place if possible, but may also
be a copy)
"""
if add_group is not None:
df['group'] = add_group
#df['group'] = df['group'].astype('category')
#pd.Categorical(add_group)
return df


def df_normalize(df, tz=None, old_tz=None):
"""Normalize a df (from sql) before presenting it to the user.
Expand Down
3 changes: 2 additions & 1 deletion niimpy/reading/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from . import mhealth
from . import read
from . import csv
from . import google_takeout
from . import sqlite
78 changes: 78 additions & 0 deletions niimpy/reading/csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
"""Read data from a CSV file
"""

import pandas as pd
import warnings

from niimpy.preprocessing import util


def read_csv(filename, read_csv_options={}, add_group=None,
tz=None):
"""Read DataFrame from csv file
This will read data from a csv file and then process the result with
`niimpy.util.df_normalize`.
Parameters
----------
filename : str
filename of csv file
read_csv_options: dict
Dictionary of options to pandas.read_csv, if this is necessary for custom
csv files.
add_group : object
If given, add a 'group' column with all values set to this.
"""
if tz is None:
warnings.warn(DeprecationWarning("From now on, you should explicitely specify timezone with e.g. tz='Europe/Helsinki'"), stacklevel=2)

df = pd.read_csv(filename, **read_csv_options)

# df_normalize converts sets the index to time values and does other time
# conversions. Inplace.
util.df_normalize(df, tz=tz)
df = util.read_preprocess(df, add_group=add_group)
return df


def read_csv_string(string, tz=None):
"""Parse a string containing CSV and return dataframe
This should not be used for serious reading of CSV from disk, but
can be useful for tests and examples. Various CSV reading options
are turned on in order to be better for examples:
- Allow comments in the CSV file
- Remove the `datetime` column (redundant with `index` but some
older functions break without it, so default readers need to leave
it).
Parameters
----------
string : string containing CSV file
Returns
-------
df: pandas.DataFrame
"""
if tz is None:
warnings.warn(DeprecationWarning("From now on, you should explicitely specify timezone with e.g. tz='Europe/Helsinki'"), stacklevel=2)
import io
df = read_csv(io.StringIO(string),
tz=tz,
read_csv_options={
'comment': '#',
},
)
if 'datetime' in df.columns:
del df['datetime']
return df
212 changes: 0 additions & 212 deletions niimpy/reading/read.py

This file was deleted.

Loading

0 comments on commit 125171e

Please sign in to comment.