Skip to content

Commit

Permalink
#60 'a or b c' expression now raise an exception
Browse files Browse the repository at this point in the history
 * add check in BooleanAlgebra.parse to raise a Parse when two bare
   symbols are after each other without any keyword or operation
 * add optional tracing of what happens when the AST is built
   with a TRACE_PARSE top level flag to help with AST parsing
   debugging
 * add corresponding tests and improve ParseError tests to check for
   the error_code in tests

Signed-off-by: Philippe Ombredanne <[email protected]>
  • Loading branch information
pombredanne committed Feb 9, 2017
1 parent 619b14c commit 810357b
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 12 deletions.
59 changes: 56 additions & 3 deletions boolean/boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

from __future__ import absolute_import
from __future__ import unicode_literals
from __future__ import print_function

import inspect
import itertools
Expand All @@ -31,6 +32,8 @@
except NameError:
basestring = str # Python 3

# Set to True to enable tracing for parsing
TRACE_PARSE = False

# Token types for standard operators and parens
TOKEN_AND = 1
Expand Down Expand Up @@ -59,12 +62,14 @@
PARSE_UNBALANCED_CLOSING_PARENS = 2
PARSE_INVALID_EXPRESSION = 3
PARSE_INVALID_NESTING = 4
PARSE_INVALID_SYMBOL_SEQUENCE = 5

PARSE_ERRORS = {
PARSE_UNKNOWN_TOKEN: 'Unknown token',
PARSE_UNBALANCED_CLOSING_PARENS: 'Unbalanced parenthesis',
PARSE_INVALID_EXPRESSION: 'Invalid expression',
PARSE_INVALID_NESTING: 'Invalid expression nesting such as (AND xx)'
PARSE_INVALID_NESTING: 'Invalid expression nesting such as (AND xx)',
PARSE_INVALID_SYMBOL_SEQUENCE: 'Invalid symbols sequence such as (A B)',
}


Expand Down Expand Up @@ -197,27 +202,54 @@ def parse(self, expr, simplify=False):
else:
tokenized = iter(expr)

if TRACE_PARSE:
tokenized = list(tokenized)
print('tokens:')
map(print, tokenized)
tokenized = iter(tokenized)

ast = [None, None]

def is_sym(_t):
return _t == TOKEN_SYMBOL or isinstance(_t, Symbol)

prev = None
for tok in tokenized:
if TRACE_PARSE: print('\nprocessing token:', repr(tok))
token, tokstr, position = tok

if prev:
prev_token, _, _ = prev
if is_sym(prev_token) and is_sym(token):
raise ParseError(token, tokstr, position, PARSE_INVALID_SYMBOL_SEQUENCE)

if token == TOKEN_SYMBOL:
ast.append(self.Symbol(tokstr))
if TRACE_PARSE: print(' ast: token == TOKEN_SYMBOL: append new symbol', repr(ast))

elif isinstance(token, Symbol):
ast.append(token)
if TRACE_PARSE: print(' ast: isinstance(token, Symbol): append existing symbol', repr(ast))

elif token == TOKEN_TRUE:
ast.append(self.TRUE)
if TRACE_PARSE: print('ast4:', repr(ast))

elif token == TOKEN_FALSE:
ast.append(self.FALSE)
if TRACE_PARSE: print('ast5:', repr(ast))

elif token == TOKEN_NOT:
ast = [ast, self.NOT]
if TRACE_PARSE: print('ast6:', repr(ast))

elif token == TOKEN_AND:
ast = self._start_operation(ast, self.AND, precedence)
if TRACE_PARSE: print(' ast: token == TOKEN_AND: start_operation', repr(ast))

elif token == TOKEN_OR:
ast = self._start_operation(ast, self.OR, precedence)
if TRACE_PARSE: print(' ast: token == TOKEN_OR: start_operation', repr(ast))

elif token == TOKEN_LPAR:
if prev:
Expand All @@ -227,13 +259,16 @@ def parse(self, expr, simplify=False):
if ptoktype not in (TOKEN_NOT, TOKEN_AND, TOKEN_OR, TOKEN_LPAR):
raise ParseError(token, tokstr, position, PARSE_INVALID_NESTING)
ast = [ast, TOKEN_LPAR]

elif token == TOKEN_RPAR:
while True:
if ast[0] is None:
raise ParseError(token, tokstr, position, PARSE_UNBALANCED_CLOSING_PARENS)
if ast[1] is TOKEN_LPAR:
ast[0].append(ast[2])
if TRACE_PARSE: print('ast9:', repr(ast))
ast = ast[0]
if TRACE_PARSE: print('ast10:', repr(ast))
break

if isinstance(ast[1], int):
Expand All @@ -246,10 +281,12 @@ def parse(self, expr, simplify=False):

subex = ast[1](*ast[2:])
ast[0].append(subex)
if TRACE_PARSE: print('ast11:', repr(ast))
ast = ast[0]
if TRACE_PARSE: print('ast12:', repr(ast))
else:
raise ParseError(token, tokstr, position, PARSE_UNKNOWN_TOKEN)

prev = tok

try:
Expand All @@ -259,16 +296,21 @@ def parse(self, expr, simplify=False):
if len(ast) != 3:
raise ParseError(error_code=PARSE_INVALID_EXPRESSION)
parsed = ast[2]
if TRACE_PARSE: print('parsed1:', repr(parsed))
else:
parsed = ast[1](*ast[2:])
if TRACE_PARSE: print('parsed2:', repr(parsed))
break
else:
subex = ast[1](*ast[2:])
ast[0].append(subex)
if TRACE_PARSE: print('ast13:', repr(ast))
ast = ast[0]
if TRACE_PARSE: print('ast14:', repr(ast))
except TypeError:
raise ParseError(error_code=PARSE_INVALID_EXPRESSION)

if TRACE_PARSE: print('parsed3:', repr(parsed))
if simplify:
return parsed.simplify()
return parsed
Expand All @@ -277,31 +319,42 @@ def _start_operation(self, ast, operation, precedence):
"""
Returns an AST where all operations of lower precedence are finalized.
"""
if TRACE_PARSE: print(' start_operation: ast, operation, precedence', repr(ast), repr(operation), repr(precedence))
op_prec = precedence[operation]
while True:
if ast[1] is None: # [None, None, x]
if TRACE_PARSE: print(' start_op: ast[1] is None:', repr(ast))
ast[1] = operation
if TRACE_PARSE: print(' --> start_op: ast[1] is None:', repr(ast))
return ast

prec = precedence[ast[1]]
if prec > op_prec: # op=&, [ast, |, x, y] -> [[ast, |, x], &, y]
if TRACE_PARSE: print(' start_op: prec > op_prec:', repr(ast))
ast = [ast, operation, ast.pop(-1)]
if TRACE_PARSE: print(' --> start_op: prec > op_prec:', repr(ast))
return ast

if prec == op_prec: # op=&, [ast, &, x] -> [ast, &, x]
if TRACE_PARSE: print(' start_op: prec == op_prec:', repr(ast))
return ast

if not (inspect.isclass(ast[1]) and issubclass(ast[1], Function)):
# the top ast node should be a function subclass at this stage
raise ParseError(error_code=PARSE_INVALID_NESTING)

if ast[0] is None: # op=|, [None, &, x, y] -> [None, |, x&y]
if TRACE_PARSE: print(' start_op: ast[0] is None:', repr(ast))
subexp = ast[1](*ast[2:])
return [ast[0], operation, subexp]
new_ast = [ast[0], operation, subexp]
if TRACE_PARSE: print(' --> start_op: ast[0] is None:', repr(new_ast))
return new_ast

else: # op=|, [[ast, &, x], ~, y] -> [ast, &, x, ~y]
if TRACE_PARSE: print(' start_op: else:', repr(ast))
ast[0].append(ast[1](*ast[2:]))
ast = ast[0]
if TRACE_PARSE: print(' --> start_op: else:', repr(ast))

def tokenize(self, expr):
"""
Expand Down
56 changes: 47 additions & 9 deletions boolean/test_boolean.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,18 @@
Released under revised BSD license.
"""

from __future__ import absolute_import, unicode_literals

import unittest
from unittest.case import expectedFailure
from __future__ import absolute_import
from __future__ import unicode_literals
from __future__ import print_function
from boolean.boolean import PARSE_UNKNOWN_TOKEN

try:
basestring # Python 2
except NameError:
basestring = str # Python 3

import unittest
from unittest.case import expectedFailure

from boolean import BooleanAlgebra
from boolean import ParseError
Expand All @@ -29,6 +31,9 @@
from boolean import TOKEN_SYMBOL
from boolean import TOKEN_LPAR
from boolean import TOKEN_RPAR
from boolean.boolean import PARSE_INVALID_SYMBOL_SEQUENCE
from boolean.boolean import PARSE_INVALID_EXPRESSION
from boolean.boolean import PARSE_INVALID_NESTING


class BooleanAlgebraTestCase(unittest.TestCase):
Expand Down Expand Up @@ -281,13 +286,43 @@ def test_parse_raise_ParseError(self):
]

for expr in invalid_expressions:
print(expr)
try:
algebra.parse(expr)
self.fail("Exception should be raised when parsing '%s'" % expr)
except ParseError:
pass
except ParseError as pe:
assert pe.error_code == PARSE_UNKNOWN_TOKEN

def test_parse_side_by_side_symbols_should_raise_exception_but_not(self):
algebra = BooleanAlgebra()
expr_str = 'a or b c'
try:
algebra.parse(expr_str)
except ParseError as pe:
assert pe.error_code == PARSE_INVALID_SYMBOL_SEQUENCE

def test_parse_side_by_side_symbols_should_raise_exception_but_not2(self):
algebra = BooleanAlgebra()
expr_str = '(a or b) c'
try:
algebra.parse(expr_str)
except ParseError as pe:
assert pe.error_code == PARSE_INVALID_EXPRESSION

def test_parse_side_by_side_symbols_raise_exception(self):
algebra = BooleanAlgebra()
expr_str = 'a b'
try:
algebra.parse(expr_str)
except ParseError as pe:
assert pe.error_code == PARSE_INVALID_SYMBOL_SEQUENCE

def test_parse_side_by_side_symbols_with_parens_raise_exception(self):
algebra = BooleanAlgebra()
expr_str = '(a) (b)'
try:
algebra.parse(expr_str)
except ParseError as pe:
assert pe.error_code == PARSE_INVALID_NESTING

class BaseElementTestCase(unittest.TestCase):

Expand Down Expand Up @@ -783,8 +818,11 @@ def test_simplify_complex_expression_parsed_then_simplified(self):

def test_parse_invalid_nested_and_should_raise_a_proper_exception(self):
algebra = BooleanAlgebra()
test_expression_str = '''a (and b)'''
self.assertRaises(ParseError, algebra.parse, test_expression_str)
expr = '''a (and b)'''
try:
algebra.parse(expr)
except ParseError as pe:
assert pe.error_code == PARSE_INVALID_NESTING

def test_subtract(self):
parse = BooleanAlgebra().parse
Expand Down

0 comments on commit 810357b

Please sign in to comment.