From d108e626b925b4c9a7b758cfaea52ebf8c27a128 Mon Sep 17 00:00:00 2001 From: vyzo Date: Thu, 17 Aug 2023 13:28:54 +0300 Subject: [PATCH] Some BIO delimited utilities and bug fixes (#754) * fix buf in delimited read-u8 it was raising instead of returning eof * BufferedReader-read-line accepts #f as separator to match the behaviour of gambit's read-line * add varint delimited utilities * newline cosmetics * document varint delimited i/o * fix behaviour of signed fixed integer io bitwise-not does the wrong thing unless the integer has the right number of bits; sigh. * allow varint reading of arbitrary length * update documentation. * bio: allow arbitrary length varint writing --- doc/reference/stdio.md | 61 ++++++++++++++++++++++++++-- src/std/io/bio/api.ss | 44 +++++++++++++++++++- src/std/io/bio/bio-test.ss | 48 ++++++++++++++++++---- src/std/io/bio/delimited.ss | 2 +- src/std/io/bio/util.ss | 81 ++++++++++++++++++++++++------------- 5 files changed, 197 insertions(+), 39 deletions(-) diff --git a/doc/reference/stdio.md b/doc/reference/stdio.md index 74228dad7..955e20dbc 100644 --- a/doc/reference/stdio.md +++ b/doc/reference/stdio.md @@ -559,14 +559,17 @@ Raises an error if the end of input is reached. ```scheme (BufferedReader-read-varuint buf (max-bits 64)) -> integer buf := BufferedReader - max-bits := fixnum + max-bits := fixnum or #f ``` -Reads an unsigned integer, with variadic encoding and a maximum integer length of `max-bits` +Reads an unsigned integer, with variadic encoding and a maximum integer length of `max-bits` if +specified. Raises an error if the end of input is reached. #### BufferedReader-read-varint ```scheme -(BufferedReader-read-varint reader (max-bits 64)) -> integer +(BufferedReader-read-varint buf (max-bits 64)) -> integer + buf := BufferedReader + max-bits := fixnum or #f ``` Reads a signed integer, with variadic encoding and a maximum integer length of `max-bits` Raises an error if the end of input is reached. @@ -623,6 +626,31 @@ The separator is either a single character or a list of characters. If `include-sep?` is true, then the separator is include in the stream. If the separator is not encountered within `max-chars`, then an error is raised. +#### BufferedReader-read-delimited +```scheme +(BufferedReader-read-delimited buf read-value) + buf := BufferedReader + read-value := lambda (BufferedReader) -> any +``` + +Reads a varint prefix length delimited value, with `read-value` reading the body. + +#### BufferedReader-read-delimited-u8vector +```scheme +(BufferedReader-read-delimited-u8vector buf) -> u8vector + buf := BufferedReader +``` + +Reads a varint prefix length delimited u8vector. + +#### BufferedReader-read-delimited-string +```scheme +(BufferedReader-read-delimited-string buf) -> string + buf := BufferedReader +``` + +Reads a varint prefix length delimited string. + #### BufferedReader-put-back ```scheme (BufferedReader-put-back buf previous-input) @@ -833,6 +861,33 @@ Returns the number of bytes written. Writes a string, followed by a separator. Returns the number of bytes written +#### BufferedReader-write-delimited +```scheme +(BufferedWriter-write-delimited buf write-value (buffer-or-size default-small-buffer-size)) + buf := BufferedWriter + write-value := lambda (BufferedWriter) -> fixnum +``` + +Writes a varint prefix length delimited value, with `write-value` writing the body. + +#### BufferedReader-write-delimited-u8vector +```scheme +(BufferedWriter-read-delimited-u8vector buf bytes) -> fixnum + buf := BufferedReader + bytes := u8veftor +``` + +Writes a varint prefix length delimited u8vector. + +#### BufferedReader-write-delimited-string +```scheme +(BufferedWriter-write-delimited-string buf str) -> fixnum + buf := BufferedWriter + str := string +``` + +Writes a varint prefix length delimited string. + #### BufferedWriter-flush ``` (BufferedWriter-flush buf) diff --git a/src/std/io/bio/api.ss b/src/std/io/bio/api.ss index b9e0c6679..7a04d17a2 100644 --- a/src/std/io/bio/api.ss +++ b/src/std/io/bio/api.ss @@ -19,6 +19,7 @@ (import: ./util)) (def default-buffer-size (expt 2 15)) ; 32K +(def default-small-buffer-size 4096) (def (make-u8vector-buffer buffer-or-size) (cond @@ -45,7 +46,7 @@ (cond ((not maybe-writer) (let ((writer (open-chunk-writer)) - (buffer (make-u8vector buffer-or-size))) + (buffer (make-u8vector-buffer buffer-or-size))) (BufferedWriter (make-output-buffer writer buffer 0 #f)))) ((is-Writer? maybe-writer) (let ((writer (Writer maybe-writer)) @@ -75,6 +76,47 @@ ([chunk] chunk) (else (u8vector-concatenate chunks))))) +(defreader-ext (read-delimited reader read-value) + (let* ((len (&BufferedReader-read-varuint reader)) + (delimited (&BufferedReader-delimit reader len))) + (read-value delimited))) + +(defreader-ext (read-delimited-u8vector reader) + (let* ((len (&BufferedReader-read-varuint reader)) + (output (make-u8vector len))) + (&BufferedReader-read reader output 0 len len) + output)) + +(defreader-ext (read-delimited-string reader) + (let* ((len (&BufferedReader-read-varuint reader)) + (delimited (&BufferedReader-delimit reader len)) + (output (make-string len))) + (let lp ((i 0)) + (let (next (&BufferedReader-read-char delimited)) + (if (eof-object? next) + (begin + (string-shrink! output i) + output) + (begin + (string-set! output i next) + (lp (fx+ i 1)))))))) + +(defwriter-ext (write-delimited writer write-value (buffer-or-size default-small-buffer-size)) + (let* ((tmp-writer (open-buffered-writer #f buffer-or-size)) + (_ (write-value tmp-writer)) + (chunks (get-buffer-output-chunks tmp-writer)) + (len (foldl (lambda (c r) (fx+ (u8vector-length c) r)) 0 chunks)) + (varlen (&BufferedWriter-write-varuint writer len))) + (for-each (cut &BufferedWriter-write writer <>) chunks) + (fx+ varlen len))) + +(defwriter-ext (write-delimited-u8vector writer bytes) + (&BufferedWriter-write-delimited writer (cut &BufferedWriter-write <> bytes))) + +(defwriter-ext (write-delimited-string writer str) + (&BufferedWriter-write-delimited writer (cut &BufferedWriter-write-string <> str) + (fx* 4 (string-length str)))) + ;;; Interface ;; input-buffer BufferedReader implementation (defmethod {read input-buffer} diff --git a/src/std/io/bio/bio-test.ss b/src/std/io/bio/bio-test.ss index c94c30625..13049ab37 100644 --- a/src/std/io/bio/bio-test.ss +++ b/src/std/io/bio/bio-test.ss @@ -9,7 +9,8 @@ ../interface ./api) (export bio-input-test - bio-output-test) + bio-output-test + bio-varint-delimited-test) (def (make-test-u8vector size) (let (u8v (make-u8vector size)) @@ -117,11 +118,11 @@ (check (BufferedReader-read-u32 brd) => #x01020304) (check (BufferedReader-read-u64 brd) => #x0102030405060708) (check (BufferedReader-read-s16 brd) => #x0102) - (check (BufferedReader-read-s16 brd) => (bitwise-not #x8102)) + (check (BufferedReader-read-s16 brd) => -32510) (check (BufferedReader-read-s32 brd) => #x01020304) - (check (BufferedReader-read-s32 brd) => (bitwise-not #x81020304)) + (check (BufferedReader-read-s32 brd) => -2130574588) (check (BufferedReader-read-s64 brd) => #x0102030405060708) - (check (BufferedReader-read-s64 brd) => (bitwise-not #x8102030405060708)) + (check (BufferedReader-read-s64 brd) => -9150748177064392952) (check (BufferedReader-read-varuint brd) => 314159) (check (BufferedReader-read-varint brd) => 314159) (check (BufferedReader-read-varint brd) => -314159))) @@ -204,11 +205,11 @@ (check (BufferedWriter-write-u32 bwr #x01020304) => 4) (check (BufferedWriter-write-u64 bwr #x0102030405060708) => 8) (check (BufferedWriter-write-s16 bwr #x0102) => 2) - (check (BufferedWriter-write-s16 bwr -33027) => 2) + (check (BufferedWriter-write-s16 bwr -32510) => 2) (check (BufferedWriter-write-s32 bwr #x01020304) => 4) - (check (BufferedWriter-write-s32 bwr -2164392709) => 4) + (check (BufferedWriter-write-s32 bwr -2130574588) => 4) (check (BufferedWriter-write-s64 bwr #x0102030405060708) => 8) - (check (BufferedWriter-write-s64 bwr -9295995896645158665) => 8) + (check (BufferedWriter-write-s64 bwr -9150748177064392952) => 8) (check (BufferedWriter-write-varuint bwr 314159) => 3) (check (BufferedWriter-write-varint bwr 314159) => 3) (check (BufferedWriter-write-varint bwr -314159) => 3) @@ -247,3 +248,36 @@ (let (bwr (open-buffered-writer #f)) (check (BufferedWriter-write-line bwr input '(#\return #\newline)) => (fx+ (string-length input) 2)) (check (get-buffer-output-u8vector bwr) => output2)))))) + +(def bio-varint-delimited-test + (test-suite "varint delimited i/o" + (test-case "generic output and input" + (let* ((input "the quick brown fox jumped over the lazy dog") + (writer (open-buffered-writer #f)) + (_ (BufferedWriter-write-delimited writer (cut BufferedWriter-write-string <> input))) + (output (get-buffer-output-u8vector writer)) + (reader (open-buffered-reader output)) + (reinput (make-string (string-length input))) + (_ (BufferedReader-read-delimited reader (cut BufferedReader-read-string <> reinput)))) + (check reinput => input) + (check (BufferedReader-peek-char reader) ? eof-object?))) + (test-case "u8vector output and input" + (let* ((input "the quick brown fox jumped over the lazy dog") + (input-bytes (string->utf8 input)) + (writer (open-buffered-writer #f)) + (_ (BufferedWriter-write-delimited-u8vector writer input-bytes)) + (output (get-buffer-output-u8vector writer)) + (reader (open-buffered-reader output)) + (reinput-bytes (BufferedReader-read-delimited-u8vector reader)) + (reinput (utf8->string reinput-bytes))) + (check reinput => input) + (check (BufferedReader-peek-char reader) ? eof-object?))) + (test-case "string output and input" + (let* ((input "the quick brown fox jumped over the lazy dog") + (writer (open-buffered-writer #f)) + (_ (BufferedWriter-write-delimited-string writer input)) + (output (get-buffer-output-u8vector writer)) + (reader (open-buffered-reader output)) + (reinput (BufferedReader-read-delimited-string reader))) + (check reinput => input) + (check (BufferedReader-peek-char reader) ? eof-object?))))) diff --git a/src/std/io/bio/delimited.ss b/src/std/io/bio/delimited.ss index 5e97ff102..f749eb446 100644 --- a/src/std/io/bio/delimited.ss +++ b/src/std/io/bio/delimited.ss @@ -44,7 +44,7 @@ (set! (&delimited-input-buffer-remaining delim) (fx- remaining 1)) u8) - (raise-io-error 'BufferedReader-read-u8 "input limit exceeded")))) + '#!eof))) (def (bio-delimited-peek-u8 delim) (let (remaining (&delimited-input-buffer-remaining delim)) diff --git a/src/std/io/bio/util.ss b/src/std/io/bio/util.ss index 420911792..60fd9a33c 100644 --- a/src/std/io/bio/util.ss +++ b/src/std/io/bio/util.ss @@ -61,37 +61,46 @@ ;; reader (defreader-ext (read-u16 reader) - (read-uXX reader 2)) + (read-uint reader 2)) (defreader-ext (read-s16 reader) - (read-sXX reader 2)) + (read-sint reader 2)) (defreader-ext (read-u32 reader) - (read-uXX reader 4)) + (read-uint reader 4)) (defreader-ext (read-s32 reader) - (read-sXX reader 4)) + (read-sint reader 4)) (defreader-ext (read-u64 reader) - (read-uXX reader 8)) + (read-uint reader 8)) (defreader-ext (read-s64 reader) - (read-sXX reader 8)) + (read-sint reader 8)) -(def (read-uXX reader len) +(def (read-uint reader len) (let lp ((i 0) (x 0)) (if (fx< i len) (let (next (&BufferedReader-read-u8-inline reader)) (if (eof-object? next) - (raise-io-error 'Buffered-reader-read-uXX "premature end of input") + (raise-io-error 'Buffered-reader-read-uint "premature end of input") (lp (fx+ i 1) (bitwise-ior (arithmetic-shift x 8) next)))) x))) -(def (read-sXX reader len) - (let ((ux (read-uXX reader len)) - (bits (fxarithmetic-shift-left len 3))) - (if (bit-set? (fx- bits 1) ux) - (bitwise-not ux) - ux))) +(def (read-sint reader len) + (let (uint (read-uint reader len)) + (complement-input uint len))) + +(def (complement-input uint len) + (let (bits (fxarithmetic-shift-left len 3)) + (if (< uint (expt-cache-get (fx- bits 1))) + uint + (- uint (expt-cache-get bits))))) (defreader-ext (read-varuint reader (max-bits 64)) + (def read-more? + (if max-bits + (lambda (shift) + (fx< shift max-bits)) + (lambda (shift) #t))) + (let lp ((shift 0) (x 0)) - (if (fx< shift max-bits) + (if (read-more? shift) (let (next (&BufferedReader-read-u8-inline reader)) (if (eof-object? next) (raise-io-error 'Buffered-reader-read-varuint "premature end of input") @@ -467,7 +476,11 @@ read)))) (defreader-ext (read-line reader (sep #\newline) (include-sep? #f) (max-chars #f)) - (let* ((separators (if (pair? sep) sep [sep])) + (let* ((separators + (cond + ((pair? sep) sep) + ((not sep) []) + (else [sep]))) (read-more? (if max-chars (lambda (x) (fx< x max-chars)) @@ -494,19 +507,19 @@ ;; writer (defwriter-ext (write-u16 writer uint) - (write-uXX writer uint 2)) + (write-uint writer uint 2)) (defwriter-ext (write-s16 writer int) - (write-sXX writer int 2)) + (write-sint writer int 2)) (defwriter-ext (write-u32 writer uint) - (write-uXX writer uint 4)) + (write-uint writer uint 4)) (defwriter-ext (write-s32 writer int) - (write-sXX writer int 4)) + (write-sint writer int 4)) (defwriter-ext (write-u64 writer int) - (write-uXX writer int 8)) + (write-uint writer int 8)) (defwriter-ext (write-s64 writer int) - (write-sXX writer int 8)) + (write-sint writer int 8)) -(def (write-uXX writer uint len) +(def (write-uint writer uint len) (let lp ((i 0) (shift (fx- (fxarithmetic-shift-left len 3) 8))) (if (fx< i len) (let (u8 (bitwise-and (arithmetic-shift uint (fx- shift)) #xff)) @@ -514,13 +527,17 @@ (lp (fx+ i 1) (fx- shift 8))) len))) -(def (write-sXX writer int len) +(def (write-sint writer int len) + (write-uint writer (complement-output int len) len)) + +(def (complement-output int len) (if (< int 0) - (write-uXX writer (bitwise-not int) len) - (write-uXX writer int len))) + (let (bits (fxarithmetic-shift-left len 3)) + (+ (expt-cache-get bits) int)) + int)) (defwriter-ext (write-varuint writer uint (max-bits 64)) - (when (fx> (integer-length uint) max-bits) + (when (and max-bits (fx> (integer-length uint) max-bits)) (raise-io-error 'BufferedWriter-write-varuint "varuint max bits exceeded")) (let lp ((uint uint) (wrote 0)) (if (> uint #x7f) @@ -604,3 +621,13 @@ (else result))) (let (wrote (&BufferedWriter-write-char-inline writer separator)) (fx+ result wrote))))) + +;; expt caches +(def +expt-cache+ + (let (cache (make-vector 64 #f)) + (for-each (lambda (i) (vector-set! cache i (expt 2 (fx+ i 1)))) + (iota 64)) + cache)) + +(def (expt-cache-get len) + (vector-ref +expt-cache+ (fx- len 1)))