diff --git a/README.md b/README.md index 3c8a4e9..f9195d9 100644 --- a/README.md +++ b/README.md @@ -312,12 +312,14 @@ for await (const file of files) { - `.valueOf()` can be used on any JSON-serializable object, but may be very slow for big data. - `.blobValueOf()` can be used on any pipe-writeable object implementing the `length` property (e.g. `Buffer`). It can be massively faster by circumventing the JSON+UTF8 encode/decode layer, which is inept for large byte arrays. -* You can set the Node.js/Python binary paths by setting the `NODE_BIN` or `PYTHON_BIN` enviornment variables before importing the library. Otherwise, the `node` and `python3` or `python` binaries will be called relative to your PATH enviornment variable. +* You can use custom Node.js/Python binary paths by setting the `NODE_BIN` or `PYTHON_BIN` enviornment variables before importing the library. Otherwise, the `node` and `python3` or `python` binaries will be called relative to your PATH enviornment variable. + +* The inter-process communication can be inspected by setting the `DEBUG` env var to `jspybridge`. #### Limitations * The `ffid` keyword is reserved. You cannot use it in variable names, object keys or values as this is used to internlly track objects. -* On the bridge to call JavaScript from Python, due to the limiatations of Python and cross-platform IPC, we currently communicate over standard error which means that specific output in JS standard error can interfere with the bridge. As of this writing, the prefices `{"r"` and `blob!` are reserved. The same issue exists on Windows with python. You are however very unlikely to have issues with this. +* On the bridge to call JavaScript from Python, due to the limiatations of Python and cross-platform IPC, we currently communicate over standard error which means that specific output in JS standard error can interfere with the bridge (as of this writing, the prefices `{"r"` and `blob!` are reserved). A similar issue exists on Windows with Python. You are however very unlikely to have issues with this. * Function calls will timeout after 100000 ms and throw a `BridgeException` error. That default value can be overridden by defining the new value of `REQ_TIMEOUT` in an environment variable. diff --git a/examples/python/pdfjs.py b/examples/python/pdfjs.py index e7295f5..d150ea1 100644 --- a/examples/python/pdfjs.py +++ b/examples/python/pdfjs.py @@ -33,6 +33,7 @@ def render_pdf(inpath, outdir, scale): context = canvas.getContext("2d") page.render({"canvasContext": context, "viewport": viewport}).promise + # note that blobValueOf() is much faster than valueOf()["data"] for large byte buffers js_buffer = canvas.toBuffer("raw") py_buffer = js_buffer.blobValueOf() diff --git a/src/javascript/connection.py b/src/javascript/connection.py index b620f4a..36c0731 100644 --- a/src/javascript/connection.py +++ b/src/javascript/connection.py @@ -59,21 +59,32 @@ def supports_color(): proc = com_thread = stdout_thread = None -def readCommItem(comm): +def readComItem(stream): - line = comm.readline() + line = stream.readline() if not line: return - # blobs may contain any value, including b"\n", so we track len and fetch possible remaining data if line.startswith(b"blob!"): + _, d, blob = line.split(b"!", maxsplit=2) d = json.loads(d.decode("utf-8")) - req_len = d.pop("len") - fetch_len = req_len - len(blob) + 1 + + # blobs may contain any value, including b"\n", so we track length and fetch possible remaining data + # note that either initial_len or fetch_len will include space for a trailing \n + target_len = d.pop("len") + initial_len = len(blob) + fetch_len = (target_len - initial_len) + 1 + debug(f"[js -> py] blob r:{d['r']}: target_len {target_len}, initial_len {initial_len}, fetch_len {fetch_len}") if fetch_len > 0: - blob += comm.read(fetch_len) - d["blob"] = blob[:req_len] + blob += stream.read(fetch_len) + + # must end with \n (added by bridge) to separate the next IPC call, which will be received via .readline() + assert blob.endswith(b"\n") + d["blob"] = blob[:-1] + assert len(d["blob"]) == target_len + debug(f"[js -> py] blob r:{d['r']}: {d['blob'][:20]} ... {d['blob'][-20:]} (truncated)") + return d line = line.decode("utf-8") @@ -81,6 +92,7 @@ def readCommItem(comm): print("[JSE]", line) return try: + # FIXME valueOf() spams debug for big data d = json.loads(line) debug("[js -> py]", int(time.time() * 1000), line) return d @@ -157,7 +169,7 @@ def com_io(): stdout_thread.start() while proc.poll() is None: - item = readCommItem(proc.stderr) + item = readComItem(proc.stderr) if item: comm_items.append(item) if config.event_loop != None: diff --git a/test/javascript/test_general.py b/test/javascript/test_general.py index 607afa4..fb54df8 100644 --- a/test/javascript/test_general.py +++ b/test/javascript/test_general.py @@ -105,6 +105,7 @@ def test_blobValueOf_withNewLine(): assert blob_value == bytes(json_value["data"]) == native_value # don't actually assert to avoid time dependent test case + # note, the performance difference is much more pronounced for bigger values (see examples/pdfjs.py) print(f"blobValueOf() faster? {t_blob < t_json} (t_blob: {t_blob}, t_json {t_json})")