style improvements, add debug() calls

extremeheat · Nov 4, 2023 · caef781 · caef781
1 parent fbc777e
commit caef781
Show file tree

Hide file tree

Showing 4 changed files with 26 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -312,12 +312,14 @@ for await (const file of files) {
   - `.valueOf()` can be used on any JSON-serializable object, but may be very slow for big data.
   - `.blobValueOf()` can be used on any pipe-writeable object implementing the `length` property (e.g. `Buffer`). It can be massively faster by circumventing the JSON+UTF8 encode/decode layer, which is inept for large byte arrays.
 
-* You can set the Node.js/Python binary paths by setting the `NODE_BIN` or `PYTHON_BIN` enviornment variables before importing the library. Otherwise, the `node` and `python3` or `python` binaries will be called relative to your PATH enviornment variable. 
+* You can use custom Node.js/Python binary paths by setting the `NODE_BIN` or `PYTHON_BIN` enviornment variables before importing the library. Otherwise, the `node` and `python3` or `python` binaries will be called relative to your PATH enviornment variable. 
+
+* The inter-process communication can be inspected by setting the `DEBUG` env var to `jspybridge`.
 
 #### Limitations
 
 * The `ffid` keyword is reserved. You cannot use it in variable names, object keys or values as this is used to internlly track objects.
 
-* On the bridge to call JavaScript from Python, due to the limiatations of Python and cross-platform IPC, we currently communicate over standard error which means that specific output in JS standard error can interfere with the bridge. As of this writing, the prefices `{"r"` and `blob!` are reserved. The same issue exists on Windows with python. You are however very unlikely to have issues with this.
+* On the bridge to call JavaScript from Python, due to the limiatations of Python and cross-platform IPC, we currently communicate over standard error which means that specific output in JS standard error can interfere with the bridge (as of this writing, the prefices `{"r"` and `blob!` are reserved). A similar issue exists on Windows with Python. You are however very unlikely to have issues with this.
 
 * Function calls will timeout after 100000 ms and throw a `BridgeException` error. That default value can be overridden by defining the new value of `REQ_TIMEOUT` in an environment variable.
diff --git a/examples/python/pdfjs.py b/examples/python/pdfjs.py
@@ -17,6 +17,7 @@
 libcanvas = javascript.require("canvas")
 
 
+# TODO consider using a JS thread pool to parallelize rendering
 def render_pdf(inpath, outdir, scale):
 
     pdf = pdfjs.getDocument(str(inpath)).promise
@@ -33,6 +34,7 @@ def render_pdf(inpath, outdir, scale):
         context = canvas.getContext("2d")
         page.render({"canvasContext": context, "viewport": viewport}).promise
 
+        # note that blobValueOf() is much faster than valueOf()["data"] for large byte buffers
         js_buffer = canvas.toBuffer("raw")
         py_buffer = js_buffer.blobValueOf()
 

diff --git a/src/javascript/connection.py b/src/javascript/connection.py
@@ -59,21 +59,32 @@ def supports_color():
 proc = com_thread = stdout_thread = None
 
 
-def readCommItem(comm):
+def readComItem(stream):
 
-    line = comm.readline()
+    line = stream.readline()
     if not line:
         return
 
-    # blobs may contain any value, including b"\n", so we track len and fetch possible remaining data
     if line.startswith(b"blob!"):
+
         _, d, blob = line.split(b"!", maxsplit=2)
         d = json.loads(d.decode("utf-8"))
-        req_len = d.pop("len")
-        fetch_len = req_len - len(blob) + 1
+
+        # blobs may contain any value, including b"\n", so we track length and fetch possible remaining data
+        # note that either initial_len or fetch_len will include space for a trailing \n
+        target_len = d.pop("len")
+        initial_len = len(blob)
+        fetch_len = (target_len - initial_len) + 1
+        debug(f"[js -> py] blob r:{d['r']}: target_len {target_len}, initial_len {initial_len}, fetch_len {fetch_len}")
         if fetch_len > 0:
-            blob += comm.read(fetch_len)
-        d["blob"] = blob[:req_len]
+            blob += stream.read(fetch_len)
+
+        # must end with \n (added by bridge) to separate the next IPC call, which will be received via .readline()
+        assert blob.endswith(b"\n")
+        d["blob"] = blob[:-1]
+        assert len(d["blob"]) == target_len
+        debug(f"[js -> py] blob r:{d['r']}: {d['blob'][:20]} ... {d['blob'][-20:]} (truncated)")
+
         return d
 
     line = line.decode("utf-8")
@@ -157,7 +168,7 @@ def com_io():
         stdout_thread.start()
 
     while proc.poll() is None:
-        item = readCommItem(proc.stderr)
+        item = readComItem(proc.stderr)
         if item:
             comm_items.append(item)
             if config.event_loop != None:

diff --git a/test/javascript/test_general.py b/test/javascript/test_general.py
@@ -105,6 +105,7 @@ def test_blobValueOf_withNewLine():
     assert blob_value == bytes(json_value["data"]) == native_value
 
     # don't actually assert to avoid time dependent test case
+    # note, the performance difference is much more pronounced for bigger values (see examples/pdfjs.py)
     print(f"blobValueOf() faster? {t_blob < t_json} (t_blob: {t_blob}, t_json {t_json})")