From d6d320c7e3acb8499d23158cc1c528239dccebfb Mon Sep 17 00:00:00 2001 From: Briton Barker Date: Tue, 7 Feb 2017 16:14:12 -0800 Subject: [PATCH] change inspect to print, add get_inspect to get object, make more unicode friendly; add ability to skip dicom and graph integration tests --- integration-tests/clean.sh | 3 +++ integration-tests/runtests.sh | 13 +++++++++-- integration-tests/tests/doctgen.py | 11 +++++++++- integration-tests/tests/test__smoke.py | 2 +- python/sparktk/atable.py | 13 +++++++++-- .../sparktk/frame/constructors/import_csv.py | 2 +- python/sparktk/frame/frame.py | 2 +- python/sparktk/frame/ops/inspect.py | 22 +++++++++++++++++++ python/sparktk/graph/graph.py | 4 ++-- 9 files changed, 62 insertions(+), 10 deletions(-) diff --git a/integration-tests/clean.sh b/integration-tests/clean.sh index 2721f2de..1b9e5ae7 100755 --- a/integration-tests/clean.sh +++ b/integration-tests/clean.sh @@ -28,3 +28,6 @@ rm -f tests/test_docs_generated.py echo "$NAME rm -r tests/sandbox/*" rm -rf tests/sandbox/* + +echo "$NAME rm -r tests/.coverage*" +rm -f tests/.coverage* diff --git a/integration-tests/runtests.sh b/integration-tests/runtests.sh index 0c103c0b..88b0cf79 100755 --- a/integration-tests/runtests.sh +++ b/integration-tests/runtests.sh @@ -34,13 +34,22 @@ if [ -z "$HADOOP_CONF_DIR" ]; then fi echo $NAME HADOOP_CONF_DIR=$HADOOP_CONF_DIR +doctgen_args="" +other_args="" +if [ $1 = "--skiplong" ]; then + doctgen_args="--skiplong" + shift + other_args="--ignore=test_dicom.py --ignore=test_graph.py" +fi + echo "$NAME Calling clean.sh" ./clean.sh cd tests + echo "$NAME Generating the doctests test file" -python2.7 doctgen.py +python2.7 doctgen.py $doctgen_args GEN_DOCTESTS_SUCCESS=$? if [[ $GEN_DOCTESTS_SUCCESS != 0 ]] then @@ -55,4 +64,4 @@ fi #python2.7 -m pytest -k test_kmeans # example to run individual test #python2.7 -m pytest -k test_docs_python_sparktk_frame_ops_drop_columns_py # example to run individual doc test export COVERAGE_FILE=$DIR/../coverage/integration_test_coverage.dat -py.test --cov-config=$DIR/pycoverage.ini --cov=$DIR/../python --cov-report=html:$DIR/../coverage/pytest_integration $@ +py.test --cov-config=$DIR/pycoverage.ini --cov=$DIR/../python --cov-report=html:$DIR/../coverage/pytest_integration $other_args $@ diff --git a/integration-tests/tests/doctgen.py b/integration-tests/tests/doctgen.py index 65148d20..e40df061 100644 --- a/integration-tests/tests/doctgen.py +++ b/integration-tests/tests/doctgen.py @@ -107,7 +107,7 @@ def _trim_test_path(path): def filter_exemptions(paths): """returns the given paths with the exemptions removed""" chop = len(path_to_frameops) + 1 # the + 1 is for the extra forward slash - filtered_paths = [p for p in paths if p[chop:] not in exemptions] + filtered_paths = [p for p in paths if p and p[chop:] not in exemptions] return filtered_paths @@ -229,6 +229,15 @@ def main(): else: print "[%s] Removed pre-existing .pyc file %s" % (this_script_name, pyc) + if '--skiplong' in sys.argv: + print "[%s] --skiplong argument found, skipping dicom, graph tests, maybe more" % this_script_name + global path_to_dicom, path_to_dicomops, path_to_graph, path_to_graphops + path_to_dicom = '' + path_to_dicomops = '' + path_to_graph = '' + path_to_graphops = '' + + # Python flatmap --> [item for list in listoflists for item in list] test_paths = [test_path for folder_path in [path_to_frameops, path_to_framecons, diff --git a/integration-tests/tests/test__smoke.py b/integration-tests/tests/test__smoke.py index 9cb754cb..5d88e0f5 100644 --- a/integration-tests/tests/test__smoke.py +++ b/integration-tests/tests/test__smoke.py @@ -69,7 +69,7 @@ def test_back_and_forth_py_scala(tc): f.bin_column("a", [5, 8, 10.0, 30.0, 50, 80]) # python f.filter(lambda row: row.a > 5) - results = str(f.inspect()) + results = str(f.get_inspect()) expected = """[#] a b c a_binned ============================ [0] 6 six 10 0 diff --git a/python/sparktk/atable.py b/python/sparktk/atable.py index 12ffa6f0..7c32c244 100644 --- a/python/sparktk/atable.py +++ b/python/sparktk/atable.py @@ -233,6 +233,9 @@ def __init__(self, rows, schema, offset, format_settings=None): def __repr__(self): return self._repr() + def __unicode__(self): + return unicode(self._repr()) + def _repr_wrap(self): """print rows in a 'clumps' style""" row_index_str_format = '[%s]' + ' ' * spaces_between_cols @@ -315,7 +318,9 @@ def _get_value_formatter(self, data_type): @staticmethod def _get_wrap_entry(data, size, formatter, relative_column_index, extra_tuples): - entry = unicode(formatter(data)).encode('utf-8') + if isinstance(data, str): + data = data.decode('utf-8') + entry = unicode(formatter(data)) if isinstance(data, basestring): lines = entry.splitlines() if len(lines) > 1: @@ -427,7 +432,11 @@ def _get_col_sizes(rows, row_index, row_count, header_sizes, formatters): row = rows[r] for c in xrange(len(sizes)): value = row[c] - entry = unicode(formatters[c](value)) + entry = formatters[c](value) + if isinstance(entry, str): + entry = entry.decode('utf-8') + else: + entry = unicode(entry) lines = entry.splitlines() max = 0 for line in lines: diff --git a/python/sparktk/frame/constructors/import_csv.py b/python/sparktk/frame/constructors/import_csv.py index 0efd3151..931a06d8 100644 --- a/python/sparktk/frame/constructors/import_csv.py +++ b/python/sparktk/frame/constructors/import_csv.py @@ -91,7 +91,7 @@ def import_csv(path, delimiter=",", header=False, schema=None, datetime_format=" >>> frame = tc.frame.import_csv(file_path, schema=schema, header=False) -etc- - >>> frame.inspect() + >>> print unicode(frame.get_inspect()).encode('utf-8') # because this file if UT8 and this docstring is str [#] a b c ============ [0] à ë ñ diff --git a/python/sparktk/frame/frame.py b/python/sparktk/frame/frame.py index 704cccb9..0dabda28 100644 --- a/python/sparktk/frame/frame.py +++ b/python/sparktk/frame/frame.py @@ -384,7 +384,7 @@ def column_names(self): from sparktk.frame.ops.flatten_columns import flatten_columns from sparktk.frame.ops.group_by import group_by from sparktk.frame.ops.histogram import histogram - from sparktk.frame.ops.inspect import inspect + from sparktk.frame.ops.inspect import inspect, get_inspect from sparktk.frame.ops.join_cross import join_cross from sparktk.frame.ops.join_inner import join_inner from sparktk.frame.ops.join_left import join_left diff --git a/python/sparktk/frame/ops/inspect.py b/python/sparktk/frame/ops/inspect.py index b0829854..add311aa 100644 --- a/python/sparktk/frame/ops/inspect.py +++ b/python/sparktk/frame/ops/inspect.py @@ -119,6 +119,28 @@ def inspect(self, with_types False """ + atable = self.get_inspect(n=n, + offset=offset, + columns=columns, + wrap=wrap, + truncate=truncate, + round=round, + width=width, + margin=margin, + with_types=with_types) + print unicode(atable) + + +def get_inspect(self, + n=10, + offset=0, + columns=None, + wrap=inspect_settings._unspecified, + truncate=inspect_settings._unspecified, + round=inspect_settings._unspecified, + width=inspect_settings._unspecified, + margin=inspect_settings._unspecified, + with_types=inspect_settings._unspecified): from sparktk.frame.ops.take import take_rich format_settings = inspect_settings.copy(wrap, truncate, round, width, margin, with_types) result = take_rich(self, n, offset, columns) diff --git a/python/sparktk/graph/graph.py b/python/sparktk/graph/graph.py index 17421e96..7c81ce26 100644 --- a/python/sparktk/graph/graph.py +++ b/python/sparktk/graph/graph.py @@ -155,9 +155,9 @@ class Graph(object): >>> assert str(graph) == str(example) - >>> assert str(example.create_vertices_frame().inspect(20)) == str(vertices.inspect(20)) + >>> assert str(example.create_vertices_frame().get_inspect(20)) == str(vertices.get_inspect(20)) - >>> assert str(example.create_edges_frame().inspect(20)) == str(edges.inspect(20)) + >>> assert str(example.create_edges_frame().get_inspect(20)) == str(edges.get_inspect(20))