Skip to content

Commit

Permalink
closes #14, crash on non-utf8 py files, could actually work now
Browse files Browse the repository at this point in the history
  • Loading branch information
tarpas committed Jan 5, 2018
1 parent ba69b81 commit 1e6a280
Show file tree
Hide file tree
Showing 8 changed files with 80 additions and 40 deletions.
4 changes: 0 additions & 4 deletions test/print1250r.py

This file was deleted.

Empty file added test/samples/__init__.py
Empty file.
3 changes: 3 additions & 0 deletions test/samples/print1250r.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# -*- coding: cp1250 -*-

print("š")
5 changes: 2 additions & 3 deletions test/test_core.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
import pytest
from collections import namedtuple

from testmon.process_code import Module
from testmon.process_code import Module, read_file_with_checksum
from test.test_process_code import CodeSample
from testmon.testmon_core import TestmonData as CoreTestmonData, SourceTree, flip_dictionary, unaffected, \
read_file_with_checksum
from testmon.testmon_core import TestmonData as CoreTestmonData, SourceTree, flip_dictionary, unaffected

pytest_plugins = "pytester",

Expand Down
45 changes: 42 additions & 3 deletions test/test_process_code.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,53 @@
# -- coding:utf8 --

from test.coveragepy.coveragetest import CoverageTest

import pytest
from testmon.process_code import Block, Module, checksum_coverage
from testmon.process_code import Block, Module, checksum_coverage, read_file_with_checksum, process_encoding
try:
from StringIO import StringIO as MemFile
except ImportError:
from io import BytesIO as MemFile


def parse(source_code, file_name='a.py'):
return Module(source_code=source_code, file_name=file_name).blocks


def test_detect_encoding1():
lines = []
output = MemFile(b'#first comment\n# -- coding: abcd --')
assert process_encoding(lines, output) == None
assert lines == [b'#first comment\n']
assert process_encoding(lines, output) == 'abcd'
assert lines == [b'#first comment\n']


def test_detect_encoding2():
lines = []
output = MemFile(b'1\n2\n')
assert process_encoding(lines, output) == None
assert lines == [b'1\n']
assert process_encoding(lines, output) == None
assert lines == [b'1\n', b'2\n']


def test_detect_encoding2():
with open('test/samples/print1250r.py', 'rb') as f:
lines = []
process_encoding(lines, f) == 'cp1250'
assert lines == []


def test_read_file_with_checksum():
assert u'š' in read_file_with_checksum('test/samples/print1250r.py')[0]


def test_module_with_1250():
code_repr = Module(None, 'test/samples/print1250r.py').blocks[0].code
assert "Str('\\xc5\\xa1')" in code_repr or "Str('š')" in Module(None, 'test/samples/print1250r.py').blocks[0].code


class TestSourceIntoBlocks(object):

def test_empty(self):
Expand Down Expand Up @@ -253,7 +293,6 @@ def test_classes(self):
assert module1.blocks[1] != module2.blocks[1]
assert module1.blocks[2] == module2.blocks[2]


def test_classes_header(self):
module1 = Module(code_samples['classes'].source_code)
module2 = Module(code_samples['classes_c'].source_code)
Expand All @@ -277,5 +316,5 @@ def test_easy(self):
for name, mod_cov in code_samples.items():
if mod_cov.expected_coverage:
self.check_coverage(mod_cov.source_code,
cov_data = mod_cov.expected_coverage,
cov_data=mod_cov.expected_coverage,
msg="This is for code_sample['{}']".format(name))
6 changes: 1 addition & 5 deletions test/test_testmon.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import sys

import pytest
import testmon.process_code
from test.coveragepy import coveragetest
from testmon.process_code import Module, checksum_coverage
from testmon.testmon_core import eval_variant
Expand Down Expand Up @@ -103,11 +104,6 @@ def func():
assert {os.path.relpath(a.strpath, testdir.tmpdir.strpath):
checksum_coverage(Module(file_name=a.strpath).blocks, [2])} == deps

def test_detect_encoding():
from testmon import testmon_core
with open('test/print1250r.py', 'rb') as f:
testmon_core.detect_encoding(f.readline() + f.readline()) == 'cp1250'


@pytest.mark.xfail
def test_testmon_recursive(testdir, monkeypatch):
Expand Down
34 changes: 31 additions & 3 deletions testmon/process_code.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import ast
import hashlib
import textwrap
import zlib
import os

import re

coding_re = re.compile(b'coding[=:]\s*([-\w.]+)')


class Block():
def __init__(self, start, end, code=0, name=''):
Expand Down Expand Up @@ -44,15 +49,14 @@ def __init__(self, source_code=None, file_name='<unknown>', rootdir=''):
self.blocks = []
self.counter = 0
if source_code is None:
with open(os.path.join(rootdir, file_name)) as f:
source_code = f.read()
source_code, _ = read_file_with_checksum(os.path.join(rootdir, file_name))
else:
source_code = textwrap.dedent(source_code)
lines = source_code.splitlines()
try:
tree = ast.parse(source_code, file_name)
self.dump_and_block(tree, len(lines), name=file_name)
except SyntaxError:
except SyntaxError as e:
pass

def dump_and_block(self, node, end, name='unknown', into_block=False):
Expand Down Expand Up @@ -122,3 +126,27 @@ def checksum_coverage(blocks, lines):
break

return result


def process_encoding(lines, afile):
line = afile.readline()
match = coding_re.search(line)
if match:
return match.group(1).decode('ascii')
else:
lines.append(line)
return None


def read_file_with_checksum(absfilename):
hasher = hashlib.sha1()
with open(absfilename, 'rb') as afile:
lines = []
encoding = process_encoding(lines, afile)
if not encoding:
encoding = process_encoding(lines, afile)
if not encoding:
encoding = 'utf8'
source = b''.join(lines) + afile.read()
hasher.update(source)
return source.decode(encoding), hasher.hexdigest()
23 changes: 1 addition & 22 deletions testmon/testmon_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,9 @@

import coverage

from testmon.process_code import checksum_coverage
from testmon.process_code import checksum_coverage, read_file_with_checksum
from testmon.process_code import Module
import codecs
import re

coding_re = re.compile(b'coding[=:]\s*([-\w.]+)')

if sys.version_info > (3,):
buffer = memoryview
Expand Down Expand Up @@ -151,24 +148,6 @@ def get_variant_inifile(inifile):
return eval_variant(run_variant_expression)


def read_file_with_checksum(absfilename):
hasher = hashlib.sha1()
with open(absfilename, 'rb') as afile:
source = b''.join([afile.readline(), afile.readline()])
encoding = detect_encoding(source)
source = source + afile.read()
hasher.update(source)
return source.decode(encoding), hasher.hexdigest()


def detect_encoding(beginning):
result = coding_re.search(beginning)
if result:
return result.group(1).decode('ascii')
else:
return 'utf-8'


def parse_file(filename, rootdir, source_code):
return Module(source_code=source_code, file_name=filename, rootdir=rootdir)

Expand Down

0 comments on commit 1e6a280

Please sign in to comment.