diff --git a/test/print1250r.py b/test/print1250r.py deleted file mode 100644 index fb0cdbf..0000000 --- a/test/print1250r.py +++ /dev/null @@ -1,4 +0,0 @@ -# anything -# -- coding:cp1250 -- - -print("š") diff --git a/test/samples/__init__.py b/test/samples/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/samples/print1250r.py b/test/samples/print1250r.py new file mode 100644 index 0000000..e352dea --- /dev/null +++ b/test/samples/print1250r.py @@ -0,0 +1,3 @@ +# -*- coding: cp1250 -*- + +print("š") diff --git a/test/test_core.py b/test/test_core.py index 30518bb..ac73282 100644 --- a/test/test_core.py +++ b/test/test_core.py @@ -1,10 +1,9 @@ import pytest from collections import namedtuple -from testmon.process_code import Module +from testmon.process_code import Module, read_file_with_checksum from test.test_process_code import CodeSample -from testmon.testmon_core import TestmonData as CoreTestmonData, SourceTree, flip_dictionary, unaffected, \ - read_file_with_checksum +from testmon.testmon_core import TestmonData as CoreTestmonData, SourceTree, flip_dictionary, unaffected pytest_plugins = "pytester", diff --git a/test/test_process_code.py b/test/test_process_code.py index 3cf7a94..1987f1d 100644 --- a/test/test_process_code.py +++ b/test/test_process_code.py @@ -1,13 +1,53 @@ +# -- coding:utf8 -- + from test.coveragepy.coveragetest import CoverageTest import pytest -from testmon.process_code import Block, Module, checksum_coverage +from testmon.process_code import Block, Module, checksum_coverage, read_file_with_checksum, process_encoding +try: + from StringIO import StringIO as MemFile +except ImportError: + from io import BytesIO as MemFile def parse(source_code, file_name='a.py'): return Module(source_code=source_code, file_name=file_name).blocks +def test_detect_encoding1(): + lines = [] + output = MemFile(b'#first comment\n# -- coding: abcd --') + assert process_encoding(lines, output) == None + assert lines == [b'#first comment\n'] + assert process_encoding(lines, output) == 'abcd' + assert lines == [b'#first comment\n'] + + +def test_detect_encoding2(): + lines = [] + output = MemFile(b'1\n2\n') + assert process_encoding(lines, output) == None + assert lines == [b'1\n'] + assert process_encoding(lines, output) == None + assert lines == [b'1\n', b'2\n'] + + +def test_detect_encoding2(): + with open('test/samples/print1250r.py', 'rb') as f: + lines = [] + process_encoding(lines, f) == 'cp1250' + assert lines == [] + + +def test_read_file_with_checksum(): + assert u'Å¡' in read_file_with_checksum('test/samples/print1250r.py')[0] + + +def test_module_with_1250(): + code_repr = Module(None, 'test/samples/print1250r.py').blocks[0].code + assert "Str('\\xc5\\xa1')" in code_repr or "Str('Å¡')" in Module(None, 'test/samples/print1250r.py').blocks[0].code + + class TestSourceIntoBlocks(object): def test_empty(self): @@ -253,7 +293,6 @@ def test_classes(self): assert module1.blocks[1] != module2.blocks[1] assert module1.blocks[2] == module2.blocks[2] - def test_classes_header(self): module1 = Module(code_samples['classes'].source_code) module2 = Module(code_samples['classes_c'].source_code) @@ -277,5 +316,5 @@ def test_easy(self): for name, mod_cov in code_samples.items(): if mod_cov.expected_coverage: self.check_coverage(mod_cov.source_code, - cov_data = mod_cov.expected_coverage, + cov_data=mod_cov.expected_coverage, msg="This is for code_sample['{}']".format(name)) diff --git a/test/test_testmon.py b/test/test_testmon.py index c418959..7230644 100755 --- a/test/test_testmon.py +++ b/test/test_testmon.py @@ -2,6 +2,7 @@ import sys import pytest +import testmon.process_code from test.coveragepy import coveragetest from testmon.process_code import Module, checksum_coverage from testmon.testmon_core import eval_variant @@ -103,11 +104,6 @@ def func(): assert {os.path.relpath(a.strpath, testdir.tmpdir.strpath): checksum_coverage(Module(file_name=a.strpath).blocks, [2])} == deps -def test_detect_encoding(): - from testmon import testmon_core - with open('test/print1250r.py', 'rb') as f: - testmon_core.detect_encoding(f.readline() + f.readline()) == 'cp1250' - @pytest.mark.xfail def test_testmon_recursive(testdir, monkeypatch): diff --git a/testmon/process_code.py b/testmon/process_code.py index d55421a..d824318 100644 --- a/testmon/process_code.py +++ b/testmon/process_code.py @@ -1,8 +1,13 @@ import ast +import hashlib import textwrap import zlib import os +import re + +coding_re = re.compile(b'coding[=:]\s*([-\w.]+)') + class Block(): def __init__(self, start, end, code=0, name=''): @@ -44,15 +49,14 @@ def __init__(self, source_code=None, file_name='', rootdir=''): self.blocks = [] self.counter = 0 if source_code is None: - with open(os.path.join(rootdir, file_name)) as f: - source_code = f.read() + source_code, _ = read_file_with_checksum(os.path.join(rootdir, file_name)) else: source_code = textwrap.dedent(source_code) lines = source_code.splitlines() try: tree = ast.parse(source_code, file_name) self.dump_and_block(tree, len(lines), name=file_name) - except SyntaxError: + except SyntaxError as e: pass def dump_and_block(self, node, end, name='unknown', into_block=False): @@ -122,3 +126,27 @@ def checksum_coverage(blocks, lines): break return result + + +def process_encoding(lines, afile): + line = afile.readline() + match = coding_re.search(line) + if match: + return match.group(1).decode('ascii') + else: + lines.append(line) + return None + + +def read_file_with_checksum(absfilename): + hasher = hashlib.sha1() + with open(absfilename, 'rb') as afile: + lines = [] + encoding = process_encoding(lines, afile) + if not encoding: + encoding = process_encoding(lines, afile) + if not encoding: + encoding = 'utf8' + source = b''.join(lines) + afile.read() + hasher.update(source) + return source.decode(encoding), hasher.hexdigest() diff --git a/testmon/testmon_core.py b/testmon/testmon_core.py index 35b463d..ba28ce7 100644 --- a/testmon/testmon_core.py +++ b/testmon/testmon_core.py @@ -14,12 +14,9 @@ import coverage -from testmon.process_code import checksum_coverage +from testmon.process_code import checksum_coverage, read_file_with_checksum from testmon.process_code import Module -import codecs -import re -coding_re = re.compile(b'coding[=:]\s*([-\w.]+)') if sys.version_info > (3,): buffer = memoryview @@ -151,24 +148,6 @@ def get_variant_inifile(inifile): return eval_variant(run_variant_expression) -def read_file_with_checksum(absfilename): - hasher = hashlib.sha1() - with open(absfilename, 'rb') as afile: - source = b''.join([afile.readline(), afile.readline()]) - encoding = detect_encoding(source) - source = source + afile.read() - hasher.update(source) - return source.decode(encoding), hasher.hexdigest() - - -def detect_encoding(beginning): - result = coding_re.search(beginning) - if result: - return result.group(1).decode('ascii') - else: - return 'utf-8' - - def parse_file(filename, rootdir, source_code): return Module(source_code=source_code, file_name=filename, rootdir=rootdir)