From d6d320c7e3acb8499d23158cc1c528239dccebfb Mon Sep 17 00:00:00 2001
From: Briton Barker <briton.barker@intel.com>
Date: Tue, 7 Feb 2017 16:14:12 -0800
Subject: [PATCH] change inspect to print, add get_inspect to get object, make
 more unicode friendly; add ability to skip dicom and graph integration tests

---
 integration-tests/clean.sh                    |  3 +++
 integration-tests/runtests.sh                 | 13 +++++++++--
 integration-tests/tests/doctgen.py            | 11 +++++++++-
 integration-tests/tests/test__smoke.py        |  2 +-
 python/sparktk/atable.py                      | 13 +++++++++--
 .../sparktk/frame/constructors/import_csv.py  |  2 +-
 python/sparktk/frame/frame.py                 |  2 +-
 python/sparktk/frame/ops/inspect.py           | 22 +++++++++++++++++++
 python/sparktk/graph/graph.py                 |  4 ++--
 9 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/integration-tests/clean.sh b/integration-tests/clean.sh
index 2721f2de..1b9e5ae7 100755
--- a/integration-tests/clean.sh
+++ b/integration-tests/clean.sh
@@ -28,3 +28,6 @@ rm -f tests/test_docs_generated.py
 
 echo "$NAME rm -r tests/sandbox/*"
 rm -rf tests/sandbox/*
+
+echo "$NAME rm -r tests/.coverage*"
+rm -f tests/.coverage*
diff --git a/integration-tests/runtests.sh b/integration-tests/runtests.sh
index 0c103c0b..88b0cf79 100755
--- a/integration-tests/runtests.sh
+++ b/integration-tests/runtests.sh
@@ -34,13 +34,22 @@ if [ -z "$HADOOP_CONF_DIR" ]; then
 fi
 echo $NAME HADOOP_CONF_DIR=$HADOOP_CONF_DIR
 
+doctgen_args=""
+other_args=""
+if [ $1 = "--skiplong" ]; then
+    doctgen_args="--skiplong"
+    shift
+    other_args="--ignore=test_dicom.py --ignore=test_graph.py"
+fi
+
 echo "$NAME Calling clean.sh"
 ./clean.sh
 
 cd tests
 
+
 echo "$NAME Generating the doctests test file"
-python2.7 doctgen.py
+python2.7 doctgen.py $doctgen_args
 GEN_DOCTESTS_SUCCESS=$?
 if [[ $GEN_DOCTESTS_SUCCESS != 0 ]]
 then
@@ -55,4 +64,4 @@ fi
 #python2.7 -m pytest -k test_kmeans  # example to run individual test
 #python2.7 -m pytest -k test_docs_python_sparktk_frame_ops_drop_columns_py  # example to run individual doc test
 export COVERAGE_FILE=$DIR/../coverage/integration_test_coverage.dat
-py.test --cov-config=$DIR/pycoverage.ini --cov=$DIR/../python --cov-report=html:$DIR/../coverage/pytest_integration $@
+py.test --cov-config=$DIR/pycoverage.ini --cov=$DIR/../python --cov-report=html:$DIR/../coverage/pytest_integration $other_args $@
diff --git a/integration-tests/tests/doctgen.py b/integration-tests/tests/doctgen.py
index 65148d20..e40df061 100644
--- a/integration-tests/tests/doctgen.py
+++ b/integration-tests/tests/doctgen.py
@@ -107,7 +107,7 @@ def _trim_test_path(path):
 def filter_exemptions(paths):
     """returns the given paths with the exemptions removed"""
     chop = len(path_to_frameops) + 1  # the + 1 is for the extra forward slash
-    filtered_paths = [p for p in paths if p[chop:] not in exemptions]
+    filtered_paths = [p for p in paths if p and p[chop:] not in exemptions]
     return filtered_paths
 
 
@@ -229,6 +229,15 @@ def main():
     else:
         print "[%s] Removed pre-existing .pyc file %s" % (this_script_name, pyc)
 
+    if '--skiplong' in sys.argv:
+        print "[%s] --skiplong argument found, skipping dicom, graph tests, maybe more" % this_script_name
+        global path_to_dicom, path_to_dicomops, path_to_graph, path_to_graphops
+        path_to_dicom = ''
+        path_to_dicomops = ''
+        path_to_graph = ''
+        path_to_graphops = ''
+
+
     # Python flatmap --> [item for list in listoflists for item in list]
     test_paths = [test_path for folder_path in [path_to_frameops,
                                                 path_to_framecons,
diff --git a/integration-tests/tests/test__smoke.py b/integration-tests/tests/test__smoke.py
index 9cb754cb..5d88e0f5 100644
--- a/integration-tests/tests/test__smoke.py
+++ b/integration-tests/tests/test__smoke.py
@@ -69,7 +69,7 @@ def test_back_and_forth_py_scala(tc):
     f.bin_column("a", [5, 8, 10.0, 30.0, 50, 80])
     # python
     f.filter(lambda row: row.a > 5)
-    results = str(f.inspect())
+    results = str(f.get_inspect())
     expected = """[#]  a   b      c   a_binned
 ============================
 [0]   6  six    10         0
diff --git a/python/sparktk/atable.py b/python/sparktk/atable.py
index 12ffa6f0..7c32c244 100644
--- a/python/sparktk/atable.py
+++ b/python/sparktk/atable.py
@@ -233,6 +233,9 @@ def __init__(self, rows, schema, offset, format_settings=None):
     def __repr__(self):
         return self._repr()
 
+    def __unicode__(self):
+        return unicode(self._repr())
+
     def _repr_wrap(self):
         """print rows in a 'clumps' style"""
         row_index_str_format = '[%s]' + ' ' * spaces_between_cols
@@ -315,7 +318,9 @@ def _get_value_formatter(self, data_type):
 
     @staticmethod
     def _get_wrap_entry(data, size, formatter, relative_column_index, extra_tuples):
-        entry = unicode(formatter(data)).encode('utf-8')
+        if isinstance(data, str):
+            data =  data.decode('utf-8')
+        entry = unicode(formatter(data))
         if isinstance(data, basestring):
             lines = entry.splitlines()
             if len(lines) > 1:
@@ -427,7 +432,11 @@ def _get_col_sizes(rows, row_index, row_count, header_sizes, formatters):
             row = rows[r]
             for c in xrange(len(sizes)):
                 value = row[c]
-                entry = unicode(formatters[c](value))
+                entry = formatters[c](value)
+                if isinstance(entry, str):
+                    entry = entry.decode('utf-8')
+                else:
+                    entry = unicode(entry)
                 lines = entry.splitlines()
                 max = 0
                 for line in lines:
diff --git a/python/sparktk/frame/constructors/import_csv.py b/python/sparktk/frame/constructors/import_csv.py
index 0efd3151..931a06d8 100644
--- a/python/sparktk/frame/constructors/import_csv.py
+++ b/python/sparktk/frame/constructors/import_csv.py
@@ -91,7 +91,7 @@ def import_csv(path, delimiter=",", header=False, schema=None, datetime_format="
         >>> frame = tc.frame.import_csv(file_path, schema=schema, header=False)
         -etc-
 
-        >>> frame.inspect()
+        >>> print unicode(frame.get_inspect()).encode('utf-8')  # because this file if UT8 and this docstring is str
         [#]  a  b  c
         ============
         [0]  à  ë  ñ
diff --git a/python/sparktk/frame/frame.py b/python/sparktk/frame/frame.py
index 704cccb9..0dabda28 100644
--- a/python/sparktk/frame/frame.py
+++ b/python/sparktk/frame/frame.py
@@ -384,7 +384,7 @@ def column_names(self):
     from sparktk.frame.ops.flatten_columns import flatten_columns
     from sparktk.frame.ops.group_by import group_by
     from sparktk.frame.ops.histogram import histogram
-    from sparktk.frame.ops.inspect import inspect
+    from sparktk.frame.ops.inspect import inspect, get_inspect
     from sparktk.frame.ops.join_cross import join_cross
     from sparktk.frame.ops.join_inner import join_inner
     from sparktk.frame.ops.join_left import join_left
diff --git a/python/sparktk/frame/ops/inspect.py b/python/sparktk/frame/ops/inspect.py
index b0829854..add311aa 100644
--- a/python/sparktk/frame/ops/inspect.py
+++ b/python/sparktk/frame/ops/inspect.py
@@ -119,6 +119,28 @@ def inspect(self,
         with_types    False
 
     """
+    atable = self.get_inspect(n=n,
+                              offset=offset,
+                              columns=columns,
+                              wrap=wrap,
+                              truncate=truncate,
+                              round=round,
+                              width=width,
+                              margin=margin,
+                              with_types=with_types)
+    print unicode(atable)
+
+
+def get_inspect(self,
+                n=10,
+                offset=0,
+                columns=None,
+                wrap=inspect_settings._unspecified,
+                truncate=inspect_settings._unspecified,
+                round=inspect_settings._unspecified,
+                width=inspect_settings._unspecified,
+                margin=inspect_settings._unspecified,
+                with_types=inspect_settings._unspecified):
     from sparktk.frame.ops.take import take_rich
     format_settings = inspect_settings.copy(wrap, truncate, round, width, margin, with_types)
     result = take_rich(self, n, offset, columns)
diff --git a/python/sparktk/graph/graph.py b/python/sparktk/graph/graph.py
index 17421e96..7c81ce26 100644
--- a/python/sparktk/graph/graph.py
+++ b/python/sparktk/graph/graph.py
@@ -155,9 +155,9 @@ class Graph(object):
 
         >>> assert str(graph) == str(example)
 
-        >>> assert str(example.create_vertices_frame().inspect(20)) == str(vertices.inspect(20))
+        >>> assert str(example.create_vertices_frame().get_inspect(20)) == str(vertices.get_inspect(20))
 
-        >>> assert str(example.create_edges_frame().inspect(20)) == str(edges.inspect(20))
+        >>> assert str(example.create_edges_frame().get_inspect(20)) == str(edges.get_inspect(20))
 
         </hide>