diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..6a9f55d --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,40 @@ +# Changelog + +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [0.0.8] - 2024-08-03 + +### Added + +- Support `strict` mode based on [this issue](https://github.com/iw4p/partialjson/issues/5) +- Test cases for `parser_strict` and `parser_non_strict` to handle incomplete and complete JSON strings with newline characters. +- Example usage of both strict and non-strict parsers in the unit tests. +- Unit tests for various number, string, boolean, array, and object parsing scenarios. + +### Changed + +- Updated incomplete number parsing logic to ensure better error handling and test coverage. + +### Fixed + +- Fixed issue with parsing incomplete floating point numbers where the parser incorrectly returned an error. +- Corrected string parsing logic to properly handle escape characters in strict mode. + +## [0.0.2] - 2023-11-24 + +### Added + +### Changed + +### Fixed + +- json format + +## [0.0.1] - 2023-11-24 + +### Added + +- Initial implementation of `JSONParser` with support for only strict mode. diff --git a/example.py b/example.py index 38cf450..3316622 100644 --- a/example.py +++ b/example.py @@ -1,7 +1,18 @@ from partialjson.json_parser import JSONParser import time, sys -parser = JSONParser() +parser_strict = JSONParser() +parser_non_strict = JSONParser(strict=False) + +print("###### Strict Mode == True (Default) ######") +print(parser_strict.parse('{"x": "1st line\\n2nd line').get('x')) +print(parser_strict.parse('{"x": "1st line\\n2nd line"').get('x')) +print(parser_strict.parse('{"x": "1st line\\n2nd line"}').get('x')) +print("###### Strict Mode == False ######") +print(parser_non_strict.parse('{"x": "1st line\\n2nd line').get('x')) +print(parser_non_strict.parse('{"x": "1st line\\n2nd line"').get('x')) +print(parser_non_strict.parse('{"x": "1st line\\n2nd line"}').get('x')) + incomplete_json = """ @@ -111,6 +122,6 @@ for char in incomplete_json.strip(): json += char print(f'\nIncomplete or streaming json:\n{json}') - print(f'Final and usable JSON without crashing:\n{parser.parse(json)}') + print(f'Final and usable JSON without crashing:\n{parser_strict.parse(json)}') sys.stdout.flush() time.sleep(0.01) \ No newline at end of file diff --git a/partialjson/__init__.py b/partialjson/__init__.py index df4c1a0..4c3d40c 100644 --- a/partialjson/__init__.py +++ b/partialjson/__init__.py @@ -5,7 +5,7 @@ """ from .json_parser import JSONParser -__version__ = "0.0.7" +__version__ = "0.0.8" __author__ = 'Nima Akbarzadeh' __author_email__ = "iw4p@protonmail.com" __license__ = "MIT" diff --git a/partialjson/json_parser.py b/partialjson/json_parser.py index 4390b80..097c3c7 100644 --- a/partialjson/json_parser.py +++ b/partialjson/json_parser.py @@ -1,7 +1,8 @@ import json class JSONParser: - def __init__(self): + def __init__(self, strict=True): + self.strict = strict self.parsers = { ' ': self.parse_space, '\r': self.parse_space, @@ -14,7 +15,6 @@ def __init__(self): 'f': self.parse_false, 'n': self.parse_null } - # Adding parsers for numbers for c in '0123456789.-': self.parsers[c] = self.parse_number @@ -33,7 +33,7 @@ def parse(self, s): self.last_parse_reminding = reminding if self.on_extra_token and reminding: self.on_extra_token(s, data, reminding) - return json.loads(json.dumps(data)) + return data else: return json.loads("{}") @@ -75,19 +75,16 @@ def parse_object(self, s, e): key, s = self.parse_any(s, e) s = s.strip() - # Handle case where object ends after a key if not s or s[0] == '}': acc[key] = None break - # Expecting a colon after the key if s[0] != ':': raise e # or handle this scenario as per your requirement s = s[1:] # skip ':' s = s.strip() - # Handle case where value is missing or incomplete if not s or s[0] in ',}': acc[key] = None if s.startswith(','): @@ -107,10 +104,15 @@ def parse_string(self, s, e): while end != -1 and s[end - 1] == '\\': # Handle escaped quotes end = s.find('"', end + 1) if end == -1: - # Return the incomplete string without the opening quote - return s[1:], "" + # Incomplete string: handle it based on strict mode + if not self.strict: + return s[1:], "" + else: + return json.loads(f'"{s[1:]}"'), "" str_val = s[:end + 1] s = s[end + 1:] + if not self.strict: + return str_val[1:-1], s # Remove surrounding quotes for strict mode return json.loads(str_val), s def parse_number(self, s, e): @@ -143,5 +145,4 @@ def parse_false(self, s, e): def parse_null(self, s, e): if s.startswith('n'): return None, s[4:] - raise e - + raise e \ No newline at end of file diff --git a/test.py b/test.py index 638ee81..900a348 100644 --- a/test.py +++ b/test.py @@ -3,82 +3,103 @@ class TestJSONParser(unittest.TestCase): def setUp(self): - self.parser = JSONParser() + self.parser_strict = JSONParser(strict=True) + self.parser_non_strict = JSONParser(strict=False) + # Test for parser_strict + def test_parser_strict_incomplete_object(self): + with self.assertRaises(Exception): + self.parser_strict.parse('{"x": "1st line\\n2nd line', '{"x": "1st line\\n2nd line"}') + + def test_parser_strict_incomplete_string(self): + with self.assertRaises(Exception): + self.parser_strict.parse('{"x": "1st line\\n2nd line"', '{"x": "1st line\\n2nd line"}') + + def test_parser_strict_complete_string(self): + self.assertEqual(self.parser_strict.parse('{"x": "1st line\\n2nd line"}').get('x'), "1st line\n2nd line") + + def test_parser_strict_incomplete_object(self): + self.assertEqual(self.parser_strict.parse('{"x": "1st line\\n2nd line').get('x'), "1st line\n2nd line") + + def test_parser_strict_incomplete_string(self): + self.assertEqual(self.parser_strict.parse('{"x": "1st line\\n2nd line"').get('x'), "1st line\n2nd line") + + # Test for parser_non_strict + def test_parser_non_strict_complete_string(self): + self.assertEqual(self.parser_non_strict.parse('{"x": "1st line\\n2nd line"}').get('x'), "1st line\n2nd line") + + # Existing tests can remain unchanged... # Number Tests def test_positive_integer(self): - self.assertEqual(self.parser.parse("42"), 42) + self.assertEqual(self.parser_strict.parse("42"), 42) def test_negative_integer(self): - self.assertEqual(self.parser.parse("-42"), -42) + self.assertEqual(self.parser_strict.parse("-42"), -42) def test_positive_float(self): - self.assertEqual(self.parser.parse("12.34"), 12.34) + self.assertEqual(self.parser_strict.parse("12.34"), 12.34) def test_negative_float(self): - self.assertEqual(self.parser.parse("-12.34"), -12.34) + self.assertEqual(self.parser_strict.parse("-12.34"), -12.34) def test_incomplete_positive_float(self): - self.assertEqual(self.parser.parse("12."), 12) + self.assertEqual(self.parser_strict.parse("12."), 12) def test_incomplete_negative_float(self): - self.assertEqual(self.parser.parse("-12."), -12) - - # def test_incomplete_negative_integer(self): - # self.assertEqual(self.parser.parse("-"), -0) + self.assertEqual(self.parser_strict.parse("-12."), -12) def test_invalid_number(self): with self.assertRaises(Exception): - self.parser.parse("1.2.3.4") + self.parser_strict.parse("1.2.3.4") # String Tests def test_string(self): - self.assertEqual(self.parser.parse('"I am text"'), 'I am text') - self.assertEqual(self.parser.parse('"I\'m text"'), "I'm text") - self.assertEqual(self.parser.parse('"I\\"m text"'), 'I"m text') + self.assertEqual(self.parser_strict.parse('"I am text"'), 'I am text') + self.assertEqual(self.parser_strict.parse('"I\'m text"'), "I'm text") + self.assertEqual(self.parser_strict.parse('"I\\"m text"'), 'I"m text') def test_incomplete_string(self): with self.assertRaises(Exception): - self.parser.parse('"I am text') - self.parser.parse('"I\'m text') - self.parser.parse('"I\\"m text') + self.parser_strict.parse('"I am text', 'I am text') + self.parser_strict.parse('"I\'m text', 'I\'m text') + self.parser_strict.parse('"I\\"m text', 'I\\m text') # Boolean Tests def test_boolean(self): - self.assertEqual(self.parser.parse("true"), True) - self.assertEqual(self.parser.parse("false"), False) + self.assertEqual(self.parser_strict.parse("true"), True) + self.assertEqual(self.parser_strict.parse("false"), False) # Array Tests def test_empty_array(self): - self.assertEqual(self.parser.parse("[]"), []) + self.assertEqual(self.parser_strict.parse("[]"), []) def test_number_array(self): - self.assertEqual(self.parser.parse("[1,2,3]"), [1, 2, 3]) + self.assertEqual(self.parser_strict.parse("[1,2,3]"), [1, 2, 3]) def test_incomplete_array(self): - self.assertEqual(self.parser.parse("[1,2,3"), [1, 2, 3]) - self.assertEqual(self.parser.parse("[1,2,"), [1, 2]) - self.assertEqual(self.parser.parse("[1,2"), [1, 2]) - self.assertEqual(self.parser.parse("[1,"), [1]) - self.assertEqual(self.parser.parse("[1"), [1]) - self.assertEqual(self.parser.parse("["), []) + self.assertEqual(self.parser_strict.parse("[1,2,3"), [1, 2, 3]) + self.assertEqual(self.parser_strict.parse("[1,2,"), [1, 2]) + self.assertEqual(self.parser_strict.parse("[1,2"), [1, 2]) + self.assertEqual(self.parser_strict.parse("[1,"), [1]) + self.assertEqual(self.parser_strict.parse("[1"), [1]) + self.assertEqual(self.parser_strict.parse("["), []) # Object Tests def test_simple_object(self): o = {"a": "apple", "b": "banana"} - self.assertEqual(self.parser.parse('{"a":"apple","b":"banana"}'), o) - self.assertEqual(self.parser.parse('{"a": "apple","b": "banana"}'), o) - self.assertEqual(self.parser.parse('{"a" : "apple", "b" : "banana"}'), o) + self.assertEqual(self.parser_strict.parse('{"a":"apple","b":"banana"}'), o) + self.assertEqual(self.parser_strict.parse('{"a": "apple","b": "banana"}'), o) + self.assertEqual(self.parser_strict.parse('{"a" : "apple", "b" : "banana"}'), o) # Invalid Inputs def test_invalid_input(self): with self.assertRaises(Exception): - self.parser.parse(":atom") + self.parser_strict.parse(":atom") # Extra Space def test_extra_space(self): - self.assertEqual(self.parser.parse(" [1] "), [1]) - self.assertEqual(self.parser.parse(" [1 "), [1]) + self.assertEqual(self.parser_strict.parse(" [1] "), [1]) + self.assertEqual(self.parser_strict.parse(" [1 "), [1]) if __name__ == '__main__': unittest.main()