Skip to content
This repository has been archived by the owner on Sep 9, 2020. It is now read-only.

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
falkerson committed Jul 19, 2019
1 parent 7679fcd commit f0be1b8
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 22 deletions.
40 changes: 18 additions & 22 deletions datarobot_batch_scoring/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -434,25 +434,6 @@ def sniff_dialect(sample, sep, skip_dialect, ui):
return dialect


def get_opener_and_mode(is_gz, text=False):
if is_gz:
if six.PY2:
mode = 'r' if text else 'rb'
return (gzip_with_encoding, mode)
else:
mode = 'rt' if text else 'rb'
return (gzip.open, mode)
else:
if six.PY2:
if text:
from io import open as io_open
return (io_open, 'r')
return (open, 'rU')
else:
mode = 'rt' if text else 'rb'
return (open, mode)


def investigate_encoding_and_dialect(dataset, sep, ui, fast=False,
encoding=None, skip_dialect=False,
output_delimiter=None):
Expand All @@ -465,7 +446,7 @@ def investigate_encoding_and_dialect(dataset, sep, ui, fast=False,
else:
sample_size = DETECT_SAMPLE_SIZE_SLOW
is_gz = dataset.endswith('.gz')
opener, mode = get_opener_and_mode(is_gz)
opener, mode = _get_opener_and_mode(is_gz)
with opener(dataset, mode) as dfile:
sample = dfile.read(sample_size)

Expand All @@ -479,7 +460,7 @@ def investigate_encoding_and_dialect(dataset, sep, ui, fast=False,
encoding = encoding.lower()
sample[:1000].decode(encoding) # Fail here if the encoding is invalid

opener, mode = get_opener_and_mode(is_gz, text=True)
opener, mode = _get_opener_and_mode(is_gz, text=True)
with opener(dataset, mode, encoding=encoding) as dfile:
sample = dfile.read(sample_size)

Expand Down Expand Up @@ -539,7 +520,7 @@ def auto_sampler(dataset, encoding, ui):

sample_size = AUTO_SAMPLE_SIZE
is_gz = dataset.endswith('.gz')
opener, mode = get_opener_and_mode(is_gz, text=True)
opener, mode = _get_opener_and_mode(is_gz, text=True)
with opener(dataset, mode, encoding=encoding) as dfile:
sample = dfile.read(sample_size)

Expand Down Expand Up @@ -612,3 +593,18 @@ def peek_row(dataset, delimiter, ui, fast_mode, encoding):
except StopIteration:
raise ValueError('Cannot peek first row from {}'.format(dataset))
return batch


def _get_opener_and_mode(is_gz, text=False):
if is_gz:
if six.PY2:
return (gzip_with_encoding, 'r' if text else 'rb')
else:
return (gzip.open, 'rt' if text else 'rb')
else:
mode = 'rt' if text else 'rb'
if six.PY2:
from io import open as io_open
return (io_open, mode)
else:
return (open, mode)
Loading

0 comments on commit f0be1b8

Please sign in to comment.