Skip to content

Commit

Permalink
TYP: make the type annotations of read_csv & read_table discoverable (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
topper-123 authored Jun 25, 2020
1 parent e23bd26 commit a7d96fa
Show file tree
Hide file tree
Showing 3 changed files with 246 additions and 158 deletions.
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1025,6 +1025,7 @@ I/O
- Bug in :meth:`~SQLDatabase.execute` was raising a ``ProgrammingError`` for some DB-API drivers when the SQL statement contained the `%` character and no parameters were present (:issue:`34211`)
- Bug in :meth:`~pandas.io.stata.StataReader` which resulted in categorical variables with difference dtypes when reading data using an iterator. (:issue:`31544`)
- :meth:`HDFStore.keys` has now an optional `include` parameter that allows the retrieval of all native HDF5 table names (:issue:`29916`)
- `TypeError` exceptions raised by :meth:`read_csv` and :meth:`read_table` were showing as ``parser_f`` when an unexpected keyword argument was passed (:issue:`25648`)
- Bug in :meth:`read_excel` for ODS files removes 0.0 values (:issue:`27222`)
- Bug in :meth:`ujson.encode` was raising an `OverflowError` with numbers larger than sys.maxsize (:issue: `34395`)

Expand Down
369 changes: 211 additions & 158 deletions pandas/io/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,176 +530,229 @@ def _read(filepath_or_buffer: FilePathOrBuffer, kwds):
_deprecated_args: Set[str] = set()


def _make_parser_function(name, default_sep=","):
def parser_f(
filepath_or_buffer: FilePathOrBuffer,
sep=default_sep,
delimiter=None,
# Column and Index Locations and Names
header="infer",
names=None,
index_col=None,
usecols=None,
squeeze=False,
prefix=None,
mangle_dupe_cols=True,
# General Parsing Configuration
dtype=None,
engine=None,
converters=None,
true_values=None,
false_values=None,
skipinitialspace=False,
skiprows=None,
skipfooter=0,
nrows=None,
# NA and Missing Data Handling
na_values=None,
keep_default_na=True,
na_filter=True,
verbose=False,
skip_blank_lines=True,
# Datetime Handling
parse_dates=False,
infer_datetime_format=False,
keep_date_col=False,
date_parser=None,
dayfirst=False,
cache_dates=True,
# Iteration
iterator=False,
chunksize=None,
# Quoting, Compression, and File Format
compression="infer",
thousands=None,
decimal: str = ".",
lineterminator=None,
quotechar='"',
quoting=csv.QUOTE_MINIMAL,
doublequote=True,
escapechar=None,
comment=None,
encoding=None,
dialect=None,
# Error Handling
error_bad_lines=True,
warn_bad_lines=True,
# Internal
delim_whitespace=False,
low_memory=_c_parser_defaults["low_memory"],
memory_map=False,
float_precision=None,
):

# gh-23761
#
# When a dialect is passed, it overrides any of the overlapping
# parameters passed in directly. We don't want to warn if the
# default parameters were passed in (since it probably means
# that the user didn't pass them in explicitly in the first place).
#
# "delimiter" is the annoying corner case because we alias it to
# "sep" before doing comparison to the dialect values later on.
# Thus, we need a flag to indicate that we need to "override"
# the comparison to dialect values by checking if default values
# for BOTH "delimiter" and "sep" were provided.
if dialect is not None:
sep_override = delimiter is None and sep == default_sep
kwds = dict(sep_override=sep_override)
else:
kwds = dict()

# Alias sep -> delimiter.
if delimiter is None:
delimiter = sep

if delim_whitespace and delimiter != default_sep:
raise ValueError(
"Specified a delimiter with both sep and "
"delim_whitespace=True; you can only specify one."
)
@Appender(
_doc_read_csv_and_table.format(
func_name="read_csv",
summary="Read a comma-separated values (csv) file into DataFrame.",
_default_sep="','",
)
)
def read_csv(
filepath_or_buffer: FilePathOrBuffer,
sep=",",
delimiter=None,
# Column and Index Locations and Names
header="infer",
names=None,
index_col=None,
usecols=None,
squeeze=False,
prefix=None,
mangle_dupe_cols=True,
# General Parsing Configuration
dtype=None,
engine=None,
converters=None,
true_values=None,
false_values=None,
skipinitialspace=False,
skiprows=None,
skipfooter=0,
nrows=None,
# NA and Missing Data Handling
na_values=None,
keep_default_na=True,
na_filter=True,
verbose=False,
skip_blank_lines=True,
# Datetime Handling
parse_dates=False,
infer_datetime_format=False,
keep_date_col=False,
date_parser=None,
dayfirst=False,
cache_dates=True,
# Iteration
iterator=False,
chunksize=None,
# Quoting, Compression, and File Format
compression="infer",
thousands=None,
decimal: str = ".",
lineterminator=None,
quotechar='"',
quoting=csv.QUOTE_MINIMAL,
doublequote=True,
escapechar=None,
comment=None,
encoding=None,
dialect=None,
# Error Handling
error_bad_lines=True,
warn_bad_lines=True,
# Internal
delim_whitespace=False,
low_memory=_c_parser_defaults["low_memory"],
memory_map=False,
float_precision=None,
):
# gh-23761
#
# When a dialect is passed, it overrides any of the overlapping
# parameters passed in directly. We don't want to warn if the
# default parameters were passed in (since it probably means
# that the user didn't pass them in explicitly in the first place).
#
# "delimiter" is the annoying corner case because we alias it to
# "sep" before doing comparison to the dialect values later on.
# Thus, we need a flag to indicate that we need to "override"
# the comparison to dialect values by checking if default values
# for BOTH "delimiter" and "sep" were provided.
default_sep = ","

if dialect is not None:
sep_override = delimiter is None and sep == default_sep
kwds = dict(sep_override=sep_override)
else:
kwds = dict()

if engine is not None:
engine_specified = True
else:
engine = "c"
engine_specified = False
# Alias sep -> delimiter.
if delimiter is None:
delimiter = sep

kwds.update(
delimiter=delimiter,
engine=engine,
dialect=dialect,
compression=compression,
engine_specified=engine_specified,
doublequote=doublequote,
escapechar=escapechar,
quotechar=quotechar,
quoting=quoting,
skipinitialspace=skipinitialspace,
lineterminator=lineterminator,
header=header,
index_col=index_col,
names=names,
prefix=prefix,
skiprows=skiprows,
skipfooter=skipfooter,
na_values=na_values,
true_values=true_values,
false_values=false_values,
keep_default_na=keep_default_na,
thousands=thousands,
comment=comment,
decimal=decimal,
parse_dates=parse_dates,
keep_date_col=keep_date_col,
dayfirst=dayfirst,
date_parser=date_parser,
cache_dates=cache_dates,
nrows=nrows,
iterator=iterator,
chunksize=chunksize,
converters=converters,
dtype=dtype,
usecols=usecols,
verbose=verbose,
encoding=encoding,
squeeze=squeeze,
memory_map=memory_map,
float_precision=float_precision,
na_filter=na_filter,
delim_whitespace=delim_whitespace,
warn_bad_lines=warn_bad_lines,
error_bad_lines=error_bad_lines,
low_memory=low_memory,
mangle_dupe_cols=mangle_dupe_cols,
infer_datetime_format=infer_datetime_format,
skip_blank_lines=skip_blank_lines,
if delim_whitespace and delimiter != default_sep:
raise ValueError(
"Specified a delimiter with both sep and "
"delim_whitespace=True; you can only specify one."
)

return _read(filepath_or_buffer, kwds)

parser_f.__name__ = name

return parser_f
if engine is not None:
engine_specified = True
else:
engine = "c"
engine_specified = False

kwds.update(
delimiter=delimiter,
engine=engine,
dialect=dialect,
compression=compression,
engine_specified=engine_specified,
doublequote=doublequote,
escapechar=escapechar,
quotechar=quotechar,
quoting=quoting,
skipinitialspace=skipinitialspace,
lineterminator=lineterminator,
header=header,
index_col=index_col,
names=names,
prefix=prefix,
skiprows=skiprows,
skipfooter=skipfooter,
na_values=na_values,
true_values=true_values,
false_values=false_values,
keep_default_na=keep_default_na,
thousands=thousands,
comment=comment,
decimal=decimal,
parse_dates=parse_dates,
keep_date_col=keep_date_col,
dayfirst=dayfirst,
date_parser=date_parser,
cache_dates=cache_dates,
nrows=nrows,
iterator=iterator,
chunksize=chunksize,
converters=converters,
dtype=dtype,
usecols=usecols,
verbose=verbose,
encoding=encoding,
squeeze=squeeze,
memory_map=memory_map,
float_precision=float_precision,
na_filter=na_filter,
delim_whitespace=delim_whitespace,
warn_bad_lines=warn_bad_lines,
error_bad_lines=error_bad_lines,
low_memory=low_memory,
mangle_dupe_cols=mangle_dupe_cols,
infer_datetime_format=infer_datetime_format,
skip_blank_lines=skip_blank_lines,
)

return _read(filepath_or_buffer, kwds)

read_csv = _make_parser_function("read_csv", default_sep=",")
read_csv = Appender(
_doc_read_csv_and_table.format(
func_name="read_csv",
summary="Read a comma-separated values (csv) file into DataFrame.",
_default_sep="','",
)
)(read_csv)

read_table = _make_parser_function("read_table", default_sep="\t")
read_table = Appender(
@Appender(
_doc_read_csv_and_table.format(
func_name="read_table",
summary="Read general delimited file into DataFrame.",
_default_sep=r"'\\t' (tab-stop)",
)
)(read_table)
)
def read_table(
filepath_or_buffer: FilePathOrBuffer,
sep="\t",
delimiter=None,
# Column and Index Locations and Names
header="infer",
names=None,
index_col=None,
usecols=None,
squeeze=False,
prefix=None,
mangle_dupe_cols=True,
# General Parsing Configuration
dtype=None,
engine=None,
converters=None,
true_values=None,
false_values=None,
skipinitialspace=False,
skiprows=None,
skipfooter=0,
nrows=None,
# NA and Missing Data Handling
na_values=None,
keep_default_na=True,
na_filter=True,
verbose=False,
skip_blank_lines=True,
# Datetime Handling
parse_dates=False,
infer_datetime_format=False,
keep_date_col=False,
date_parser=None,
dayfirst=False,
cache_dates=True,
# Iteration
iterator=False,
chunksize=None,
# Quoting, Compression, and File Format
compression="infer",
thousands=None,
decimal: str = ".",
lineterminator=None,
quotechar='"',
quoting=csv.QUOTE_MINIMAL,
doublequote=True,
escapechar=None,
comment=None,
encoding=None,
dialect=None,
# Error Handling
error_bad_lines=True,
warn_bad_lines=True,
# Internal
delim_whitespace=False,
low_memory=_c_parser_defaults["low_memory"],
memory_map=False,
float_precision=None,
):
return read_csv(**locals())


def read_fwf(
Expand Down
Loading

0 comments on commit a7d96fa

Please sign in to comment.