Skip to content

Commit

Permalink
Raise error for escaping surrogates
Browse files Browse the repository at this point in the history
  • Loading branch information
nineteendo committed Jul 17, 2024
1 parent c08332c commit 843e0a9
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 12 deletions.
23 changes: 19 additions & 4 deletions src/jsonyx/_accelerator.c
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@ ascii_escape_unichar(Py_UCS4 c, unsigned char *output, Py_ssize_t chars)
c = Py_UNICODE_LOW_SURROGATE(c);
output[chars++] = '\\';
}
if (0xd800 <= c <= 0xdfff) {
PyErr_Format(PyExc_ValueError,
"Surrogate '\\u%x' can not be escaped", c);
return -1;
}
output[chars++] = 'u';
output[chars++] = Py_hexdigits[(c >> 12) & 0xf];
output[chars++] = Py_hexdigits[(c >> 8) & 0xf];
Expand Down Expand Up @@ -234,6 +239,9 @@ ascii_escape_unicode(PyObject *pystr)
}
else {
chars = ascii_escape_unichar(c, output, chars);
if (chars < 0) {
return NULL;
}
}
}
output[chars++] = '"';
Expand Down Expand Up @@ -341,7 +349,7 @@ static void
raise_errmsg(const char *msg, PyObject *filename, PyObject *s, Py_ssize_t end)
{
/* Use JSONSyntaxError exception to raise a nice looking SyntaxError subclass */
PyObject *JSONSyntaxError = _PyImport_GetModuleAttrString("jsonyx.scanner",
PyObject *JSONSyntaxError = _PyImport_GetModuleAttrString("jsonyx",
"JSONSyntaxError");
if (JSONSyntaxError == NULL) {
return;
Expand Down Expand Up @@ -929,12 +937,15 @@ _match_number_unicode(PyScannerObject *s, PyObject *pyfilename, PyObject *pystr,
for (i = 0; i < n; i++) {
buf[i] = (char) PyUnicode_READ(kind, str, i + start);
}
if (is_float)
if (is_float) {
rval = PyFloat_FromString(numstr);
if (!s->allow_nan && !isfinite(PyFloat_AS_DOUBLE(rval))) {
Py_DECREF(numstr);
Py_DECREF(rval);
raise_errmsg("Infinity is not allowed", pyfilename, pystr, start);
return NULL;
}
}
else
rval = PyLong_FromString(buf, NULL, 10);
Py_DECREF(numstr);
Expand Down Expand Up @@ -1073,10 +1084,10 @@ scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds)
PyObject *pyfilename;
PyObject *pystr;
PyObject *rval;
Py_ssize_t idx = -1;
Py_ssize_t idx = 0;
Py_ssize_t next_idx = -1;
static char *kwlist[] = {"filename", "string", NULL};
if (!PyArg_ParseTupleAndKeywords(args, kwds, "UUn:scan_once", kwlist, &pyfilename, &pystr) ||
if (!PyArg_ParseTupleAndKeywords(args, kwds, "UU:scan_once", kwlist, &pyfilename, &pystr) ||
_skip_comments(self, pyfilename, pystr, &idx))
{
return NULL;
Expand All @@ -1087,6 +1098,10 @@ scanner_call(PyScannerObject *self, PyObject *args, PyObject *kwds)
}
rval = scan_once_unicode(self, memo, pyfilename, pystr, idx, &next_idx);
Py_DECREF(memo);
if (rval == NULL) {
return NULL;
}
idx = next_idx;
if (_skip_comments(self, pyfilename, pystr, &idx)) {
return NULL;
}
Expand Down
3 changes: 1 addition & 2 deletions src/jsonyx/_accelerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,11 @@
"encode_basestring_ascii",
"make_encoder",
"make_scanner",
"scanstring",
]

# pylint: disable-next=E0611, E0401
from _jsonyx import ( # type: ignore # isort: skip
DuplicateKey, encode_basestring, # type: ignore # noqa: PLC2701
encode_basestring_ascii, # type: ignore # noqa: PLC2701
make_encoder, make_scanner, scanstring, # type: ignore # noqa: PLC2701
make_encoder, make_scanner, # type: ignore # noqa: PLC2701
)
4 changes: 4 additions & 0 deletions src/jsonyx/_encoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@ def replace(match: Match[str]) -> str:
return _ESCAPE_DCT[s]
except KeyError:
uni: int = ord(s)
if 0xd800 <= uni <= 0xdfff:
msg: str = f"Surrogate {s!r} can not be escaped"
raise ValueError(msg) from None

if uni < 0x10000:
return f"\\u{uni:04x}"

Expand Down
10 changes: 7 additions & 3 deletions src/jsonyx/test_jsonyx/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,19 @@
if TYPE_CHECKING:
from types import ModuleType

cjson: ModuleType | None = import_fresh_module("jsonyx", fresh=["_jsonyx"])
pyjson: ModuleType | None = import_fresh_module("jsonyx", blocked=["_jsonyx"])
cjson: ModuleType | None = import_fresh_module(
"jsonyx", fresh=["jsonyx._accelerator"],
)
pyjson: ModuleType | None = import_fresh_module(
"jsonyx", blocked=["jsonyx._accelerator"],
)


@pytest.fixture(params=[cjson, pyjson], ids=["cjson", "pyjson"], name="json")
def get_json(request: pytest.FixtureRequest) -> ModuleType:
"""Get JSON module."""
result: ModuleType | None = request.param
if result is None:
pytest.skip("requires _jsonyx")
pytest.skip("requires jsonyx._accelerator")

return result
69 changes: 66 additions & 3 deletions src/jsonyx/test_jsonyx/test_loads.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,11 @@ def get_loads(json: ModuleType) -> FunctionType:
return json.loads


@pytest.mark.parametrize(("string", "expected"), {
@pytest.mark.parametrize(("string", "expected"), [
("true", True),
("false", False),
("null", None),
})
])
def test_keywords(loads: FunctionType, string: str, expected: Any) -> None:
"""Test JSON keywords."""
assert loads(string) is expected
Expand Down Expand Up @@ -94,8 +94,71 @@ def test_keywords(loads: FunctionType, string: str, expected: Any) -> None:
("-1.1", -1.1),
("-1.1e1", -11.0),
})
def test_number(loads: FunctionType, string: str, expected: float) -> None:
def test_number(loads: FunctionType, string: str, expected: Any) -> None:
"""Test JSON number."""
obj: Any = loads(string)
assert isinstance(obj, type(expected))
assert obj == expected


@pytest.mark.parametrize(("string", "expected"), [
# Empty string
('""', ""),
# UTF-8
('"$"', "$"),
('"\u00a3"', "\u00a3"),
('"\u0418"', "\u0418"),
('"\u0939"', "\u0939"),
('"\u20ac"', "\u20ac"),
('"\ud55c"', "\ud55c"),
('"\U00010348"', "\U00010348"),
('"\U001096B3"', "\U001096B3"),
# Backslash escapes
(r'"\""', '"'),
(r'"\\"', "\\"),
(r'"\/"', "/"),
(r'"\b"', "\b"),
(r'"\f"', "\f"),
(r'"\n"', "\n"),
(r'"\r"', "\r"),
(r'"\t"', "\t"),
# Unicode escape sequences
(r'"\u0024"', "$"),
(r'"\u00a3"', "\u00a3"),
(r'"\u0418"', "\u0418"),
(r'"\u0939"', "\u0939"),
(r'"\u20ac"', "\u20ac"),
(r'"\ud55c"', "\ud55c"),
(r'"\ud800\udf48"', "\U00010348"),
(r'"\udbe5\udeb3"', "\U001096B3"),
# TODO(Nice Zombies): add more tests
])
def test_string(loads: FunctionType, string: str, expected: Any) -> None:
"""Test JSON string."""
assert loads(string) == expected


@pytest.mark.parametrize(("string", "expected"), [
# Empty array
("[]", []),
# TODO(Nice Zombies): add more tests
])
def test_array(loads: FunctionType, string: str, expected: Any) -> None:
"""Test JSON array."""
assert loads(string) == expected


@pytest.mark.parametrize(("string", "expected"), [
# Empty object
("{}", {}),
# TODO(Nice Zombies): add more tests
])
def test_object(loads: FunctionType, string: str, expected: Any) -> None:
"""Test JSON object."""
assert loads(string) == expected

0 comments on commit 843e0a9

Please sign in to comment.