Skip to content

Commit

Permalink
Some BIO delimited utilities and bug fixes (#754)
Browse files Browse the repository at this point in the history
* fix buf in delimited read-u8

it was raising instead of returning eof

* BufferedReader-read-line accepts #f as separator

to match the behaviour of gambit's read-line

* add varint delimited utilities

* newline cosmetics

* document varint delimited i/o

* fix behaviour of signed fixed integer io

bitwise-not does the wrong thing unless the integer has the right
number of bits; sigh.

* allow varint reading of arbitrary length

* update documentation.

* bio: allow arbitrary length varint writing
  • Loading branch information
vyzo authored Aug 17, 2023
1 parent ba545b7 commit d108e62
Show file tree
Hide file tree
Showing 5 changed files with 197 additions and 39 deletions.
61 changes: 58 additions & 3 deletions doc/reference/stdio.md
Original file line number Diff line number Diff line change
Expand Up @@ -559,14 +559,17 @@ Raises an error if the end of input is reached.
```scheme
(BufferedReader-read-varuint buf (max-bits 64)) -> integer
buf := BufferedReader
max-bits := fixnum
max-bits := fixnum or #f
```
Reads an unsigned integer, with variadic encoding and a maximum integer length of `max-bits`
Reads an unsigned integer, with variadic encoding and a maximum integer length of `max-bits` if
specified.
Raises an error if the end of input is reached.

#### BufferedReader-read-varint
```scheme
(BufferedReader-read-varint reader (max-bits 64)) -> integer
(BufferedReader-read-varint buf (max-bits 64)) -> integer
buf := BufferedReader
max-bits := fixnum or #f
```
Reads a signed integer, with variadic encoding and a maximum integer length of `max-bits`
Raises an error if the end of input is reached.
Expand Down Expand Up @@ -623,6 +626,31 @@ The separator is either a single character or a list of characters.
If `include-sep?` is true, then the separator is include in the stream.
If the separator is not encountered within `max-chars`, then an error is raised.

#### BufferedReader-read-delimited
```scheme
(BufferedReader-read-delimited buf read-value)
buf := BufferedReader
read-value := lambda (BufferedReader) -> any
```

Reads a varint prefix length delimited value, with `read-value` reading the body.

#### BufferedReader-read-delimited-u8vector
```scheme
(BufferedReader-read-delimited-u8vector buf) -> u8vector
buf := BufferedReader
```

Reads a varint prefix length delimited u8vector.

#### BufferedReader-read-delimited-string
```scheme
(BufferedReader-read-delimited-string buf) -> string
buf := BufferedReader
```

Reads a varint prefix length delimited string.

#### BufferedReader-put-back
```scheme
(BufferedReader-put-back buf previous-input)
Expand Down Expand Up @@ -833,6 +861,33 @@ Returns the number of bytes written.
Writes a string, followed by a separator.
Returns the number of bytes written

#### BufferedReader-write-delimited
```scheme
(BufferedWriter-write-delimited buf write-value (buffer-or-size default-small-buffer-size))
buf := BufferedWriter
write-value := lambda (BufferedWriter) -> fixnum
```

Writes a varint prefix length delimited value, with `write-value` writing the body.

#### BufferedReader-write-delimited-u8vector
```scheme
(BufferedWriter-read-delimited-u8vector buf bytes) -> fixnum
buf := BufferedReader
bytes := u8veftor
```

Writes a varint prefix length delimited u8vector.

#### BufferedReader-write-delimited-string
```scheme
(BufferedWriter-write-delimited-string buf str) -> fixnum
buf := BufferedWriter
str := string
```

Writes a varint prefix length delimited string.

#### BufferedWriter-flush
```
(BufferedWriter-flush buf)
Expand Down
44 changes: 43 additions & 1 deletion src/std/io/bio/api.ss
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
(import: ./util))

(def default-buffer-size (expt 2 15)) ; 32K
(def default-small-buffer-size 4096)

(def (make-u8vector-buffer buffer-or-size)
(cond
Expand All @@ -45,7 +46,7 @@
(cond
((not maybe-writer)
(let ((writer (open-chunk-writer))
(buffer (make-u8vector buffer-or-size)))
(buffer (make-u8vector-buffer buffer-or-size)))
(BufferedWriter (make-output-buffer writer buffer 0 #f))))
((is-Writer? maybe-writer)
(let ((writer (Writer maybe-writer))
Expand Down Expand Up @@ -75,6 +76,47 @@
([chunk] chunk)
(else (u8vector-concatenate chunks)))))

(defreader-ext (read-delimited reader read-value)
(let* ((len (&BufferedReader-read-varuint reader))
(delimited (&BufferedReader-delimit reader len)))
(read-value delimited)))

(defreader-ext (read-delimited-u8vector reader)
(let* ((len (&BufferedReader-read-varuint reader))
(output (make-u8vector len)))
(&BufferedReader-read reader output 0 len len)
output))

(defreader-ext (read-delimited-string reader)
(let* ((len (&BufferedReader-read-varuint reader))
(delimited (&BufferedReader-delimit reader len))
(output (make-string len)))
(let lp ((i 0))
(let (next (&BufferedReader-read-char delimited))
(if (eof-object? next)
(begin
(string-shrink! output i)
output)
(begin
(string-set! output i next)
(lp (fx+ i 1))))))))

(defwriter-ext (write-delimited writer write-value (buffer-or-size default-small-buffer-size))
(let* ((tmp-writer (open-buffered-writer #f buffer-or-size))
(_ (write-value tmp-writer))
(chunks (get-buffer-output-chunks tmp-writer))
(len (foldl (lambda (c r) (fx+ (u8vector-length c) r)) 0 chunks))
(varlen (&BufferedWriter-write-varuint writer len)))
(for-each (cut &BufferedWriter-write writer <>) chunks)
(fx+ varlen len)))

(defwriter-ext (write-delimited-u8vector writer bytes)
(&BufferedWriter-write-delimited writer (cut &BufferedWriter-write <> bytes)))

(defwriter-ext (write-delimited-string writer str)
(&BufferedWriter-write-delimited writer (cut &BufferedWriter-write-string <> str)
(fx* 4 (string-length str))))

;;; Interface
;; input-buffer BufferedReader implementation
(defmethod {read input-buffer}
Expand Down
48 changes: 41 additions & 7 deletions src/std/io/bio/bio-test.ss
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
../interface
./api)
(export bio-input-test
bio-output-test)
bio-output-test
bio-varint-delimited-test)

(def (make-test-u8vector size)
(let (u8v (make-u8vector size))
Expand Down Expand Up @@ -117,11 +118,11 @@
(check (BufferedReader-read-u32 brd) => #x01020304)
(check (BufferedReader-read-u64 brd) => #x0102030405060708)
(check (BufferedReader-read-s16 brd) => #x0102)
(check (BufferedReader-read-s16 brd) => (bitwise-not #x8102))
(check (BufferedReader-read-s16 brd) => -32510)
(check (BufferedReader-read-s32 brd) => #x01020304)
(check (BufferedReader-read-s32 brd) => (bitwise-not #x81020304))
(check (BufferedReader-read-s32 brd) => -2130574588)
(check (BufferedReader-read-s64 brd) => #x0102030405060708)
(check (BufferedReader-read-s64 brd) => (bitwise-not #x8102030405060708))
(check (BufferedReader-read-s64 brd) => -9150748177064392952)
(check (BufferedReader-read-varuint brd) => 314159)
(check (BufferedReader-read-varint brd) => 314159)
(check (BufferedReader-read-varint brd) => -314159)))
Expand Down Expand Up @@ -204,11 +205,11 @@
(check (BufferedWriter-write-u32 bwr #x01020304) => 4)
(check (BufferedWriter-write-u64 bwr #x0102030405060708) => 8)
(check (BufferedWriter-write-s16 bwr #x0102) => 2)
(check (BufferedWriter-write-s16 bwr -33027) => 2)
(check (BufferedWriter-write-s16 bwr -32510) => 2)
(check (BufferedWriter-write-s32 bwr #x01020304) => 4)
(check (BufferedWriter-write-s32 bwr -2164392709) => 4)
(check (BufferedWriter-write-s32 bwr -2130574588) => 4)
(check (BufferedWriter-write-s64 bwr #x0102030405060708) => 8)
(check (BufferedWriter-write-s64 bwr -9295995896645158665) => 8)
(check (BufferedWriter-write-s64 bwr -9150748177064392952) => 8)
(check (BufferedWriter-write-varuint bwr 314159) => 3)
(check (BufferedWriter-write-varint bwr 314159) => 3)
(check (BufferedWriter-write-varint bwr -314159) => 3)
Expand Down Expand Up @@ -247,3 +248,36 @@
(let (bwr (open-buffered-writer #f))
(check (BufferedWriter-write-line bwr input '(#\return #\newline)) => (fx+ (string-length input) 2))
(check (get-buffer-output-u8vector bwr) => output2))))))

(def bio-varint-delimited-test
(test-suite "varint delimited i/o"
(test-case "generic output and input"
(let* ((input "the quick brown fox jumped over the lazy dog")
(writer (open-buffered-writer #f))
(_ (BufferedWriter-write-delimited writer (cut BufferedWriter-write-string <> input)))
(output (get-buffer-output-u8vector writer))
(reader (open-buffered-reader output))
(reinput (make-string (string-length input)))
(_ (BufferedReader-read-delimited reader (cut BufferedReader-read-string <> reinput))))
(check reinput => input)
(check (BufferedReader-peek-char reader) ? eof-object?)))
(test-case "u8vector output and input"
(let* ((input "the quick brown fox jumped over the lazy dog")
(input-bytes (string->utf8 input))
(writer (open-buffered-writer #f))
(_ (BufferedWriter-write-delimited-u8vector writer input-bytes))
(output (get-buffer-output-u8vector writer))
(reader (open-buffered-reader output))
(reinput-bytes (BufferedReader-read-delimited-u8vector reader))
(reinput (utf8->string reinput-bytes)))
(check reinput => input)
(check (BufferedReader-peek-char reader) ? eof-object?)))
(test-case "string output and input"
(let* ((input "the quick brown fox jumped over the lazy dog")
(writer (open-buffered-writer #f))
(_ (BufferedWriter-write-delimited-string writer input))
(output (get-buffer-output-u8vector writer))
(reader (open-buffered-reader output))
(reinput (BufferedReader-read-delimited-string reader)))
(check reinput => input)
(check (BufferedReader-peek-char reader) ? eof-object?)))))
2 changes: 1 addition & 1 deletion src/std/io/bio/delimited.ss
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@
(set! (&delimited-input-buffer-remaining delim)
(fx- remaining 1))
u8)
(raise-io-error 'BufferedReader-read-u8 "input limit exceeded"))))
'#!eof)))

(def (bio-delimited-peek-u8 delim)
(let (remaining (&delimited-input-buffer-remaining delim))
Expand Down
81 changes: 54 additions & 27 deletions src/std/io/bio/util.ss
Original file line number Diff line number Diff line change
Expand Up @@ -61,37 +61,46 @@

;; reader
(defreader-ext (read-u16 reader)
(read-uXX reader 2))
(read-uint reader 2))
(defreader-ext (read-s16 reader)
(read-sXX reader 2))
(read-sint reader 2))
(defreader-ext (read-u32 reader)
(read-uXX reader 4))
(read-uint reader 4))
(defreader-ext (read-s32 reader)
(read-sXX reader 4))
(read-sint reader 4))
(defreader-ext (read-u64 reader)
(read-uXX reader 8))
(read-uint reader 8))
(defreader-ext (read-s64 reader)
(read-sXX reader 8))
(read-sint reader 8))

(def (read-uXX reader len)
(def (read-uint reader len)
(let lp ((i 0) (x 0))
(if (fx< i len)
(let (next (&BufferedReader-read-u8-inline reader))
(if (eof-object? next)
(raise-io-error 'Buffered-reader-read-uXX "premature end of input")
(raise-io-error 'Buffered-reader-read-uint "premature end of input")
(lp (fx+ i 1) (bitwise-ior (arithmetic-shift x 8) next))))
x)))

(def (read-sXX reader len)
(let ((ux (read-uXX reader len))
(bits (fxarithmetic-shift-left len 3)))
(if (bit-set? (fx- bits 1) ux)
(bitwise-not ux)
ux)))
(def (read-sint reader len)
(let (uint (read-uint reader len))
(complement-input uint len)))

(def (complement-input uint len)
(let (bits (fxarithmetic-shift-left len 3))
(if (< uint (expt-cache-get (fx- bits 1)))
uint
(- uint (expt-cache-get bits)))))

(defreader-ext (read-varuint reader (max-bits 64))
(def read-more?
(if max-bits
(lambda (shift)
(fx< shift max-bits))
(lambda (shift) #t)))

(let lp ((shift 0) (x 0))
(if (fx< shift max-bits)
(if (read-more? shift)
(let (next (&BufferedReader-read-u8-inline reader))
(if (eof-object? next)
(raise-io-error 'Buffered-reader-read-varuint "premature end of input")
Expand Down Expand Up @@ -467,7 +476,11 @@
read))))

(defreader-ext (read-line reader (sep #\newline) (include-sep? #f) (max-chars #f))
(let* ((separators (if (pair? sep) sep [sep]))
(let* ((separators
(cond
((pair? sep) sep)
((not sep) [])
(else [sep])))
(read-more?
(if max-chars
(lambda (x) (fx< x max-chars))
Expand All @@ -494,33 +507,37 @@

;; writer
(defwriter-ext (write-u16 writer uint)
(write-uXX writer uint 2))
(write-uint writer uint 2))
(defwriter-ext (write-s16 writer int)
(write-sXX writer int 2))
(write-sint writer int 2))
(defwriter-ext (write-u32 writer uint)
(write-uXX writer uint 4))
(write-uint writer uint 4))
(defwriter-ext (write-s32 writer int)
(write-sXX writer int 4))
(write-sint writer int 4))
(defwriter-ext (write-u64 writer int)
(write-uXX writer int 8))
(write-uint writer int 8))
(defwriter-ext (write-s64 writer int)
(write-sXX writer int 8))
(write-sint writer int 8))

(def (write-uXX writer uint len)
(def (write-uint writer uint len)
(let lp ((i 0) (shift (fx- (fxarithmetic-shift-left len 3) 8)))
(if (fx< i len)
(let (u8 (bitwise-and (arithmetic-shift uint (fx- shift)) #xff))
(&BufferedWriter-write-u8-inline writer u8)
(lp (fx+ i 1) (fx- shift 8)))
len)))

(def (write-sXX writer int len)
(def (write-sint writer int len)
(write-uint writer (complement-output int len) len))

(def (complement-output int len)
(if (< int 0)
(write-uXX writer (bitwise-not int) len)
(write-uXX writer int len)))
(let (bits (fxarithmetic-shift-left len 3))
(+ (expt-cache-get bits) int))
int))

(defwriter-ext (write-varuint writer uint (max-bits 64))
(when (fx> (integer-length uint) max-bits)
(when (and max-bits (fx> (integer-length uint) max-bits))
(raise-io-error 'BufferedWriter-write-varuint "varuint max bits exceeded"))
(let lp ((uint uint) (wrote 0))
(if (> uint #x7f)
Expand Down Expand Up @@ -604,3 +621,13 @@
(else result)))
(let (wrote (&BufferedWriter-write-char-inline writer separator))
(fx+ result wrote)))))

;; expt caches
(def +expt-cache+
(let (cache (make-vector 64 #f))
(for-each (lambda (i) (vector-set! cache i (expt 2 (fx+ i 1))))
(iota 64))
cache))

(def (expt-cache-get len)
(vector-ref +expt-cache+ (fx- len 1)))

0 comments on commit d108e62

Please sign in to comment.