closes #14, crash on non-utf8 py files, merge into master

tarpas · Jan 6, 2018 · 30d96ea · 30d96ea
1 parent 31fe409
commit 30d96ea
Show file tree

Hide file tree

Showing 10 changed files with 92 additions and 25 deletions.
diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
     name='pytest-testmon',
     description='take TDD to a new level with py.test and testmon',
     long_description=''.join(open('README.rst').readlines()),
-    version='0.9.6',
+    version='0.9.7',
     license='MIT',
     platforms=['linux', 'osx', 'win32'],
     packages=['testmon'],

diff --git a/test/samples/2lines.py b/test/samples/2lines.py
@@ -0,0 +1,2 @@
+# -*- coding: cp1250 -*-
+#2ndline
diff --git a/test/samples/__init__.py b/test/samples/__init__.py
diff --git a/test/samples/empty.py b/test/samples/empty.py
diff --git a/test/samples/print1250r.py b/test/samples/print1250r.py
@@ -0,0 +1,3 @@
+# -*- coding: cp1250 -*-
+
+print("š")
diff --git a/test/test_core.py b/test/test_core.py
@@ -1,10 +1,9 @@
 import pytest
 from collections import namedtuple
 
-from testmon.process_code import Module
+from testmon.process_code import Module, read_file_with_checksum
 from test.test_process_code import CodeSample
-from testmon.testmon_core import TestmonData as CoreTestmonData, SourceTree, flip_dictionary, unaffected, \
-    read_file_with_checksum
+from testmon.testmon_core import TestmonData as CoreTestmonData, SourceTree, flip_dictionary, unaffected
 
 pytest_plugins = "pytester",
 

diff --git a/test/test_process_code.py b/test/test_process_code.py
@@ -1,13 +1,61 @@
+#  -- coding:utf8 --
+
 from test.coveragepy.coveragetest import CoverageTest
 
 import pytest
-from testmon.process_code import Block, Module, checksum_coverage
+from testmon.process_code import Block, Module, checksum_coverage, read_file_with_checksum, process_encoding
+try:
+    from StringIO import StringIO as MemFile
+except ImportError:
+    from io import BytesIO as MemFile
 
 
 def parse(source_code, file_name='a.py'):
     return Module(source_code=source_code, file_name=file_name).blocks
 
 
+def test_detect_encoding1():
+    lines = []
+    output = MemFile(b'#first comment\n#  -- coding: abcd --')
+    assert process_encoding(lines, output) == None
+    assert lines == [b'#first comment\n']
+    assert process_encoding(lines, output) == 'abcd'
+    assert lines == [b'#first comment\n']
+
+
+def test_detect_encoding2():
+    lines = []
+    output = MemFile(b'1\n2\n')
+    assert process_encoding(lines, output) == None
+    assert lines == [b'1\n']
+    assert process_encoding(lines, output) == None
+    assert lines == [b'1\n', b'2\n']
+
+
+def test_detect_encoding2():
+    with open('test/samples/print1250r.py', 'rb') as f:
+        lines = []
+        process_encoding(lines, f) == 'cp1250'
+        assert lines == []
+
+
+def test_read_file_with_checksum():
+    assert u'š' in read_file_with_checksum('test/samples/print1250r.py')[0]
+
+
+def test_read_empty_file_with_checksum():
+    assert read_file_with_checksum('test/samples/empty.py')[0] == ''
+
+
+def test_read_2lines_file_with_checksum():
+    assert read_file_with_checksum('test/samples/2lines.py')[0] == '#2ndline'
+
+
+def test_module_with_1250():
+    code_repr = Module(None, 'test/samples/print1250r.py').blocks[0].code
+    assert "Str('\\xc5\\xa1')" in code_repr or "Str('š')" in Module(None, 'test/samples/print1250r.py').blocks[0].code
+
+
 class TestSourceIntoBlocks(object):
 
     def test_empty(self):
@@ -253,7 +301,6 @@ def test_classes(self):
         assert module1.blocks[1] != module2.blocks[1]
         assert module1.blocks[2] == module2.blocks[2]
 
-
     def test_classes_header(self):
         module1 = Module(code_samples['classes'].source_code)
         module2 = Module(code_samples['classes_c'].source_code)
@@ -277,5 +324,5 @@ def test_easy(self):
         for name, mod_cov in code_samples.items():
             if mod_cov.expected_coverage:
                 self.check_coverage(mod_cov.source_code,
-                                    cov_data = mod_cov.expected_coverage,
+                                    cov_data=mod_cov.expected_coverage,
                                     msg="This is for code_sample['{}']".format(name))
diff --git a/test/test_testmon.py b/test/test_testmon.py
@@ -2,6 +2,7 @@
 import sys
 
 import pytest
+import testmon.process_code
 from test.coveragepy import coveragetest
 from testmon.process_code import Module, checksum_coverage
 from testmon.testmon_core import eval_variant

diff --git a/testmon/process_code.py b/testmon/process_code.py
@@ -1,8 +1,13 @@
 import ast
+import hashlib
 import textwrap
 import zlib
 import os
 
+import re
+
+coding_re = re.compile(b'coding[=:]\s*([-\w.]+)')
+
 
 class Block():
     def __init__(self, start, end, code=0, name=''):
@@ -44,15 +49,14 @@ def __init__(self, source_code=None, file_name='<unknown>', rootdir=''):
         self.blocks = []
         self.counter = 0
         if source_code is None:
-            with open(os.path.join(rootdir, file_name)) as f:
-                source_code = f.read()
+            source_code, _ = read_file_with_checksum(os.path.join(rootdir, file_name))
         else:
             source_code = textwrap.dedent(source_code)
         lines = source_code.splitlines()
         try:
             tree = ast.parse(source_code, file_name)
             self.dump_and_block(tree, len(lines), name=file_name)
-        except SyntaxError:
+        except SyntaxError as e:
             pass
 
     def dump_and_block(self, node, end, name='unknown', into_block=False):
@@ -122,3 +126,27 @@ def checksum_coverage(blocks, lines):
             break
 
     return result
+
+
+def process_encoding(lines, afile):
+    line = afile.readline()
+    match = coding_re.search(line)
+    if match:
+        return match.group(1).decode('ascii')
+    else:
+        lines.append(line)
+        return None
+
+
+def read_file_with_checksum(absfilename):
+    hasher = hashlib.sha1()
+    with open(absfilename, 'rb') as afile:
+        lines = []
+        encoding = process_encoding(lines, afile)
+        if not encoding:
+            encoding = process_encoding(lines, afile)
+        if not encoding:
+            encoding = 'utf8'
+        source = b''.join(lines) + afile.read()
+    hasher.update(source)
+    return source.decode(encoding), hasher.hexdigest()
diff --git a/testmon/testmon_core.py b/testmon/testmon_core.py
@@ -11,13 +11,13 @@
 import sqlite3
 import sys
 import textwrap
-import zlib
 
 import coverage
 
-from testmon.process_code import checksum_coverage
+from testmon.process_code import checksum_coverage, read_file_with_checksum
 from testmon.process_code import Module
 
+
 if sys.version_info > (3,):
     buffer = memoryview
     encode = lambda x: bytes(x, 'utf_8')
@@ -148,19 +148,6 @@ def get_variant_inifile(inifile):
     return eval_variant(run_variant_expression)
 
 
-def read_file_with_checksum(absfilename):
-    hasher = hashlib.sha1()
-    try:
-        with open(absfilename) as afile:
-            source = afile.read()
-    except UnicodeDecodeError:
-        raise Exception("""You are hitting https://github.com/tarpas/pytest-testmon/issues/14"""
-        """ when reading {}""".format(absfilename))
-
-    hasher.update(encode(source))
-    return source, hasher.hexdigest()
-
-
 def parse_file(filename, rootdir, source_code):
     return Module(source_code=source_code, file_name=filename, rootdir=rootdir)