diff --git a/pandas/_typing.py b/pandas/_typing.py new file mode 100644 index 0000000000000..5225be33d7604 --- /dev/null +++ b/pandas/_typing.py @@ -0,0 +1,4 @@ +from pathlib import Path +from typing import IO, AnyStr, Union + +FilePathOrBuffer = Union[str, Path, IO[AnyStr]] diff --git a/pandas/io/gcs.py b/pandas/io/gcs.py index aa1cb648f05d1..89dade27ad543 100644 --- a/pandas/io/gcs.py +++ b/pandas/io/gcs.py @@ -12,5 +12,6 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, mode = 'rb' fs = gcsfs.GCSFileSystem() - filepath_or_buffer = fs.open(filepath_or_buffer, mode) + filepath_or_buffer = fs.open( + filepath_or_buffer, mode) # type: gcsfs.GCSFile return filepath_or_buffer, None, compression, True diff --git a/pandas/io/parsers.py b/pandas/io/parsers.py index 06f326d0a0850..f402116d8c4a3 100755 --- a/pandas/io/parsers.py +++ b/pandas/io/parsers.py @@ -30,6 +30,7 @@ from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.dtypes.missing import isna +from pandas._typing import FilePathOrBuffer from pandas.core import algorithms from pandas.core.arrays import Categorical from pandas.core.frame import DataFrame @@ -400,7 +401,7 @@ def _validate_names(names): return names -def _read(filepath_or_buffer, kwds): +def _read(filepath_or_buffer: FilePathOrBuffer, kwds): """Generic reader of line files.""" encoding = kwds.get('encoding', None) if encoding is not None: @@ -409,7 +410,12 @@ def _read(filepath_or_buffer, kwds): compression = kwds.get('compression', 'infer') compression = _infer_compression(filepath_or_buffer, compression) - filepath_or_buffer, _, compression, should_close = get_filepath_or_buffer( + + # TODO: get_filepath_or_buffer could return + # Union[FilePathOrBuffer, s3fs.S3File, gcsfs.GCSFile] + # though mypy handling of conditional imports is difficult. + # See https://github.com/python/mypy/issues/1297 + fp_or_buf, _, compression, should_close = get_filepath_or_buffer( filepath_or_buffer, encoding, compression) kwds['compression'] = compression @@ -426,7 +432,7 @@ def _read(filepath_or_buffer, kwds): _validate_names(kwds.get("names", None)) # Create the parser. - parser = TextFileReader(filepath_or_buffer, **kwds) + parser = TextFileReader(fp_or_buf, **kwds) if chunksize or iterator: return parser @@ -438,7 +444,7 @@ def _read(filepath_or_buffer, kwds): if should_close: try: - filepath_or_buffer.close() + fp_or_buf.close() except ValueError: pass @@ -533,7 +539,7 @@ def _make_parser_function(name, default_sep=','): else: sep = default_sep - def parser_f(filepath_or_buffer, + def parser_f(filepath_or_buffer: FilePathOrBuffer, sep=sep, delimiter=None, @@ -725,8 +731,11 @@ def parser_f(filepath_or_buffer, )(read_table) -def read_fwf(filepath_or_buffer, colspecs='infer', widths=None, - infer_nrows=100, **kwds): +def read_fwf(filepath_or_buffer: FilePathOrBuffer, + colspecs='infer', + widths=None, + infer_nrows=100, + **kwds): r""" Read a table of fixed-width formatted lines into DataFrame. diff --git a/pandas/io/s3.py b/pandas/io/s3.py index 6129f0dabe027..f127bb4c8094c 100644 --- a/pandas/io/s3.py +++ b/pandas/io/s3.py @@ -31,5 +31,6 @@ def get_filepath_or_buffer(filepath_or_buffer, encoding=None, # A NoCredentialsError is raised if you don't have creds # for that bucket. fs = s3fs.S3FileSystem(anon=True) - filepath_or_buffer = fs.open(_strip_schema(filepath_or_buffer), mode) + filepath_or_buffer = fs.open( + _strip_schema(filepath_or_buffer), mode) # type: s3fs.S3File return filepath_or_buffer, None, compression, True