From b3c4038f408eb712003af691c4c7c038ddd24b44 Mon Sep 17 00:00:00 2001 From: Francois-Rene Rideau Date: Sun, 17 Sep 2023 22:41:16 +0000 Subject: [PATCH] Many cleanups post JSON RPC --- src/gerbil/prelude/core.ss | 4 +- src/std/build-spec.ss | 4 +- src/std/io/api.ss | 6 +- src/std/io/interface.ss | 14 +-- src/std/io/port.ss | 45 ++++++++ src/std/misc/bytes.ss | 7 ++ src/std/misc/with-id-test.ss | 39 ------- src/std/misc/with-id.ss | 38 ------- src/std/net/httpd/handler.ss | 2 +- src/std/stxutil.ss | 33 +++--- src/std/sugar-test.ss | 28 ++++- src/std/sugar.ss | 40 ++++++- src/std/text/basic-parsers.ss | 186 +++++++++++++++++++++++++++++++++ src/std/text/char-set.ss | 87 +++++++++++++++ src/std/text/json/json-test.ss | 8 +- src/std/text/json/util.ss | 30 ++++-- 16 files changed, 454 insertions(+), 117 deletions(-) create mode 100644 src/std/io/port.ss delete mode 100644 src/std/misc/with-id-test.ss delete mode 100644 src/std/misc/with-id.ss create mode 100644 src/std/text/basic-parsers.ss create mode 100644 src/std/text/char-set.ss diff --git a/src/gerbil/prelude/core.ss b/src/gerbil/prelude/core.ss index 073150a90a..7a3495ba2d 100644 --- a/src/gerbil/prelude/core.ss +++ b/src/gerbil/prelude/core.ss @@ -90,9 +90,9 @@ package: gerbil with-input-from-file with-output-to-file open-input-file open-output-file close-input-port close-output-port - read read-char peek-char + read read-char peek-char read-u8 peek-u8 eof-object? char-ready? - write display newline write-char + write display newline write-char write-u8 load ;; transcript-on transcript-off ; void )) diff --git a/src/std/build-spec.ss b/src/std/build-spec.ss index be6490136b..ea6d487293 100644 --- a/src/std/build-spec.ss +++ b/src/std/build-spec.ss @@ -42,6 +42,7 @@ "io/file" "io/util" "io/error" + "io/port" "io/bio/types" "io/bio/input" "io/bio/delimited" @@ -167,6 +168,8 @@ "parser/grammar" "parser" ;; :std/text + "text/char-set" + "text/basic-parsers" "text/utf8" "text/utf16" "text/utf32" @@ -309,7 +312,6 @@ "crypto" ;; :std/misc "misc/atom" - "misc/with-id" "misc/concurrent-plan" "misc/timeout" "misc/list-builder" diff --git a/src/std/io/api.ss b/src/std/io/api.ss index 2b93928d46..e0e73c64b6 100644 --- a/src/std/io/api.ss +++ b/src/std/io/api.ss @@ -7,11 +7,13 @@ ./delimited ./file ./util - ./error) + ./error + ./port) (export (import: ./bio/api) (import: ./strio/api) (import: ./socket/api) (import: ./delimited) (import: ./file) (import: ./util) - (import: ./error)) + (import: ./error) + (import: ./port)) diff --git a/src/std/io/interface.ss b/src/std/io/interface.ss index bb17db8f08..297d54a05f 100644 --- a/src/std/io/interface.ss +++ b/src/std/io/interface.ss @@ -27,12 +27,13 @@ (write u8v (start 0) (end (u8vector-length u8v)))) ;; buffered IO -(interface (BufferedReader Reader) +(interface (PeekableReader Reader) ;; reads a single byte (read-u8) ;; peeks the next byte - (peek-u8) + (peek-u8)) +(interface (BufferedReader PeekableReader) ;; puts back some bytes previously read; can also inject bytes. ;; - previous-input is a u8 or a list of u8s injected back into the buffer (put-back previous-input) @@ -51,7 +52,7 @@ ;; writes a single byte (write-u8 u8) - ;; flushes the buffer to the underlyin output instance + ;; flushes the buffer to the underlying output instance (flush) ;; resets the underlying output and buffer state, allowing reuse of buffers. @@ -62,17 +63,18 @@ ;; read into a string (read-string str (start 0) (end (string-length str)) (need 0))) -(interface (BufferedStringReader StringReader) +(interface (PeekableStringReader StringReader) ;; reads a single char (read-char) ;; peeks the next char - (peek-char) + (peek-char)) +(interface (BufferedStringReader PeekableStringReader) ;; puts back some chars previously read; can also inject characters. ;; - previous-input is a char or a list of chars injected into the buffer (put-back previous-input) - ;; skips the next count bytes of input + ;; skips the next count chars of input (skip count) ;; returns a new StringBufferedReader instance delimiting the input length that shares diff --git a/src/std/io/port.ss b/src/std/io/port.ss new file mode 100644 index 0000000000..86f11db685 --- /dev/null +++ b/src/std/io/port.ss @@ -0,0 +1,45 @@ +(import + :gerbil/gambit/ports + :std/sugar) + +(export raw-port raw-port-port) + +;; Raw wrapper for a port without any buffering +(defstruct raw-port (port) final: #t unchecked: #t) + +(defrule (def-raw-port-method (name port . args) body ...) + (defmethod {name raw-port} + (lambda (self . args) + (let (port (&raw-port-port self)) + body ...)))) + +(def-raw-port-method (close port) + (close-port port)) +(def-raw-port-method (read-char port) + (read-char port)) +(def-raw-port-method (peek-char port) + (peek-char port)) +(def-raw-port-method (read port u8v (start 0) (end (u8vector-length u8v)) (need 0)) + (read-subu8vector u8v port start end need)) +(def-raw-port-method (write port u8v (start 0) (end (u8vector-length u8v))) + (write-subu8vector u8v port start end)) +(def-raw-port-method (read-u8 port) + (read-u8 port)) +(def-raw-port-method (peek-u8 port) + (peek-u8 port)) +(def-raw-port-method (put-back port previous-input) + (error "cannot put-back into port" port previous-input)) +(def-raw-port-method (skip port count) + (error "cannot skip from port" port count)) +(def-raw-port-method (delimit port limit) + (error "cannot delimit port" port limit)) +(def-raw-port-method (reset! port reader) + (error "cannot reset! port" port reader)) +(def-raw-port-method (write-u8 port u8) + (write-u8 u8 port)) +(def-raw-port-method (flush port) + (force-output port)) +(def-raw-port-method (read-string port str (start 0) (end (string-length str)) (need 0)) + (read-substring str start end port need)) +(def-raw-port-method (write-string port str (start 0) (end (string-length str))) + (write-substring str start end port)) diff --git a/src/std/misc/bytes.ss b/src/std/misc/bytes.ss index 06ced146d4..305feee09d 100644 --- a/src/std/misc/bytes.ss +++ b/src/std/misc/bytes.ss @@ -110,6 +110,8 @@ &u8vector-double-set!/native &u8vector-swap! + + u8vector-every ) ;;; Endianness @@ -695,3 +697,8 @@ END-C (define-c-lambda &u8vector-double-set!/native (scheme-object int double) void "*(double*)(U8_DATA(___arg1) + ___arg2) = ___arg3; ___return;") ) + +(def (u8vector-every pred bytes) + (declare (fixnum)) + (let lp ((i (1- (u8vector-length bytes)))) + (or (< i 0) (and (pred (u8vector-ref bytes i)) (lp (1- i)))))) diff --git a/src/std/misc/with-id-test.ss b/src/std/misc/with-id-test.ss deleted file mode 100644 index bad9fb2fd2..0000000000 --- a/src/std/misc/with-id-test.ss +++ /dev/null @@ -1,39 +0,0 @@ -(export with-id-test) - -(import - :gerbil/gambit/exceptions - :std/srfi/13 - :std/misc/number - :std/misc/string - :std/sugar - :std/stxutil - :std/test - ./with-id) - -(def with-id-test - (test-suite "test suite for std/misc/with-id" - (test-case "with-id, defining variables" - (def mem (make-vector 5 0)) - (defrule (defvar name n) - (with-id name ((@ #'name "@") (get #'name) (set #'name "-set!")) - (begin (def @ n) (def (get) (vector-ref mem @)) (def (set x) (vector-set! mem @ x))))) - (defvar A 0) - (defvar B 1) - (defvar C 2) - (defvar D 3) - (A-set! 42) (B-set! (+ (A) 27)) (increment! (C) 5) (D-set! (post-increment! (C) 18)) - (check-equal? mem #(42 69 23 5 0))) - (test-case "with-id, variable resolution in macro" - (check-exception - (eval '(begin - (defsyntax (m stx) - (def myvar "bar") - #'(with-id ctx ((foo my-var)) (def foo 2))) - (m))) - true) - (defsyntax (m stx) - (with-syntax ((ctx (stx-car stx)) - (myvar "bar")) - #'(with-id ctx ((foo #'myvar)) (def foo 3)))) - (m) - (check-equal? bar 3)))) diff --git a/src/std/misc/with-id.ss b/src/std/misc/with-id.ss deleted file mode 100644 index b2e0d4f95b..0000000000 --- a/src/std/misc/with-id.ss +++ /dev/null @@ -1,38 +0,0 @@ -;;; -*- Gerbil -*- -;;; © fare -;;; Easier identifier introduction -(export #t) - -(import (for-syntax ./func - :std/stxutil) - :std/sugar) - -(defrules defsyntax/unhygienic () - ((_ (m-id stx) body ...) - (defsyntax m-id (compose syntax-local-introduce (lambda (stx) body ...) syntax-local-introduce))) - ((_ m-id f-expr) (identifier? #'m-id) - (defsyntax m-id (compose syntax-local-introduce f-expr syntax-local-introduce)))) - -;; Written with the precious help of Alex Knauth -(defsyntax (with-id stx) - (syntax-case stx () - ((wi (id-spec ...) body ...) - #'(wi wi (id-spec ...) body ...)) - ((wi ctx (id-spec ...) body body1 body+ ...) - (identifier? #'ctx) - #'(wi ctx (id-spec ...) (begin body body1 body+ ...))) - ((_ ctx (id-spec ...) template) - (identifier? #'ctx) - (with-syntax ((((id expr) ...) - (stx-map (lambda (spec) (syntax-case spec () - ((id) #'(id 'id)) - ((id ct-expr more ...) #'(id (list ct-expr more ...))) - (id (identifier? #'id) #'(id 'id)))) - #'(id-spec ...)))) - #'(begin - (defsyntax/unhygienic (m stx2) - (with-syntax ((id (identifierify (stx-car (stx-cdr stx2)) expr)) ...) - (... #'(... template)))) - (m ctx)))))) - -(defrule (with-id/expr stuff ...) (let () (with-id stuff ...))) diff --git a/src/std/net/httpd/handler.ss b/src/std/net/httpd/handler.ss index 04dc247128..5a30f43aeb 100644 --- a/src/std/net/httpd/handler.ss +++ b/src/std/net/httpd/handler.ss @@ -11,10 +11,10 @@ :std/foreign :std/text/utf8 :std/pregexp - :std/misc/with-id ./base (for-syntax :std/stxutil :std/misc/string)) + (export http-request-handler http-request? http-request-method http-request-url http-request-path http-request-params diff --git a/src/std/stxutil.ss b/src/std/stxutil.ss index 31e5b7b54a..5c731dec6c 100644 --- a/src/std/stxutil.ss +++ b/src/std/stxutil.ss @@ -2,9 +2,7 @@ ;;; © vyzo ;;; syntax utilities; import for-syntax (import - :gerbil/gambit/bytes - :std/format - :std/text/hex) + :std/format) (export #t (for-syntax #t)) ;; format an identifier; see also stx-identifier @@ -15,24 +13,11 @@ (datum->syntax ctx (string->symbol (apply format fmt (map stx-e args))) (stx-source ctx))) -;; Use maybe-intern-symbol instead of string->symbol to avoid DoS attacks -;; that cause you to intern too many symbols and run out of memory. -;; : (Or Symbol String) <- String -(def (maybe-intern-symbol string) - (or (##find-interned-symbol string) string)) - -;; Use maybe-intern-symbol instead of string->keyword to avoid DoS attacks -;; that cause you to intern too many keywords and run out of memory. -;; : (Or Keyword String) <- String -(def (maybe-intern-keyword string) - (or (##find-interned-keyword string) string)) - (def (displayify x port) (cond ((member x '(#f #t () #!void #!eof)) (void)) ((or (string? x) (symbol? x) (number? x)) (display x port)) ((keyword? x) (display (keyword->string x) port)) - ((bytes? x) (display (bytes->string x) port)) ((pair? x) (displayify (car x) port) (displayify (cdr x) port)) ((vector? x) (displayify (vector->list x) port)) ((AST? x) (displayify (stx-e x) port)) @@ -42,8 +27,22 @@ (x (string->symbol (stringify x))))) (def keywordify (case-lambda ((x) (if (keyword? x) x (string->keyword (stringify x)))) (x (string->keyword (stringify x))))) +(def (identifierify stx . x) (datum->syntax stx (apply symbolify x))) + + +;; Use maybe-intern-symbol instead of string->symbol to avoid DoS attacks +;; that cause you to intern too many symbols and run out of memory. +;; : (Or Symbol String) <- String +(def (maybe-intern-symbol string) + (or (##find-interned-symbol string) string)) + +;; Use maybe-intern-symbol instead of string->keyword to avoid DoS attacks +;; that cause you to intern too many keywords and run out of memory. +;; : (Or Keyword String) <- String +(def (maybe-intern-keyword string) + (or (##find-interned-keyword string) string)) + (def maybe-symbolify (case-lambda ((x) (if (symbol? x) x (maybe-intern-symbol (stringify x)))) (x (maybe-intern-symbol (stringify x))))) (def maybe-keywordify (case-lambda ((x) (if (keyword? x) x (maybe-intern-keyword (stringify x)))) (x (maybe-intern-keyword (stringify x))))) -(def (identifierify stx . x) (datum->syntax stx (apply symbolify x))) diff --git a/src/std/sugar-test.ss b/src/std/sugar-test.ss index edd76c2806..18537f4d8f 100644 --- a/src/std/sugar-test.ss +++ b/src/std/sugar-test.ss @@ -1,6 +1,7 @@ (export sugar-test) (import :std/test + :std/misc/number :std/pregexp :std/sugar) @@ -59,4 +60,29 @@ (check ((is 'a test: eq?) 'a) => #t) (check ((is 2.0) 2.0) => #t) (check ((is "a") "a") => #t)) - )) + + (test-case "with-id, defining variables" + (def mem (make-vector 5 0)) + (defrule (defvar name n) + (with-id name ((@ #'name "@") (get #'name) (set #'name "-set!")) + (begin (def @ n) (def (get) (vector-ref mem @)) (def (set x) (vector-set! mem @ x))))) + (defvar A 0) + (defvar B 1) + (defvar C 2) + (defvar D 3) + (A-set! 42) (B-set! (+ (A) 27)) (increment! (C) 5) (D-set! (post-increment! (C) 18)) + (check-equal? mem #(42 69 23 5 0))) + (test-case "with-id, variable resolution in macro" + (check-exception + (eval '(begin + (defsyntax (m stx) + (def myvar "bar") + #'(with-id ctx ((foo my-var)) (def foo 2))) + (m))) + true) + (defsyntax (m stx) + (with-syntax ((ctx (stx-car stx)) + (myvar "bar")) + #'(with-id ctx ((foo #'myvar)) (def foo 3)))) + (m) + (check-equal? bar 3)))) diff --git a/src/std/sugar.ss b/src/std/sugar.ss index eddfc93115..b4fe12ea66 100644 --- a/src/std/sugar.ss +++ b/src/std/sugar.ss @@ -22,7 +22,13 @@ let-hash awhen chain - is) + is + with-id + with-id/expr + defsyntax/unhygienic) + +(import (for-syntax :std/misc/func + :std/stxutil)) (defrules defrule () ((_ (name args ...) body ...) @@ -367,3 +373,35 @@ ((_ type-test value-test arg) (chain <> (v (and (type-test v) (value-test arg v)))))) + + +;;; Easier identifier introduction +(defrules defsyntax/unhygienic () + ((_ (m-id stx) body ...) + (defsyntax m-id (compose syntax-local-introduce (lambda (stx) body ...) syntax-local-introduce))) + ((_ m-id f-expr) (identifier? #'m-id) + (defsyntax m-id (compose syntax-local-introduce f-expr syntax-local-introduce)))) + +;; Written with the precious help of Alex Knauth +(defsyntax (with-id stx) + (syntax-case stx () + ((wi (id-spec ...) body ...) + #'(wi wi (id-spec ...) body ...)) + ((wi ctx (id-spec ...) body body1 body+ ...) + (identifier? #'ctx) + #'(wi ctx (id-spec ...) (begin body body1 body+ ...))) + ((_ ctx (id-spec ...) template) + (identifier? #'ctx) + (with-syntax ((((id expr) ...) + (stx-map (lambda (spec) (syntax-case spec () + ((id) #'(id 'id)) + ((id ct-expr more ...) #'(id (list ct-expr more ...))) + (id (identifier? #'id) #'(id 'id)))) + #'(id-spec ...)))) + #'(begin + (defsyntax/unhygienic (m stx2) + (with-syntax ((id (identifierify (stx-car (stx-cdr stx2)) expr)) ...) + (... #'(... template)))) + (m ctx)))))) + +(defrule (with-id/expr stuff ...) (let () (with-id stuff ...))) diff --git a/src/std/text/basic-parsers.ss b/src/std/text/basic-parsers.ss new file mode 100644 index 0000000000..1aafb8134d --- /dev/null +++ b/src/std/text/basic-parsers.ss @@ -0,0 +1,186 @@ +;; -*- Gerbil -*- +;;;; Basic LL(1) parsers + +;; These basic LL(1) parsers work with an object satisfying the PeekableStringReader interface. +;; Be sure to wrap your port in a (raw-port port) and cast your wrapped port or BufferedStringReader +;; to a PeekableStringReader to avoid performance penalty in calling these methods. + +;; TODO: parsing combinators that produce generating functions for all the values of a parse +;; from a generator (or stream?) of values? +;; OR, combinators that use interface-passing to handle the specific + +(export #t) + +(import + :std/error + :std/io + (only-in :std/parser/base parse-error? raise-parse-error) + :std/iter + :std/misc/bytes + :std/misc/list-builder + :std/srfi/1 + :std/srfi/13 + :std/sugar + :std/text/char-set) + +(def (string-reader-eof? reader) + (eof-object? (PeekableStringReader-peek-char reader))) + +;;; Parse a natural number in decimal on the current reader, return it. +(def (parse-natural reader (base 10)) + (alet (digit (char-ascii-digit (PeekableStringReader-peek-char reader) base)) + (let loop ((n digit)) + (PeekableStringReader-read-char reader) + (alet (next-digit (char-ascii-digit (PeekableStringReader-peek-char reader) base)) + (loop (+ next-digit (* base n))) + n)) + (raise-parse-error 'parse-natural "Not a digit in requested base" + (PeekableStringReader-peek-char reader) base reader))) + +(def (parse-signed-integer reader (base 10)) + (let ((char (PeekableStringReader-peek-char reader))) + (cond + ((eqv? char #\+) + (PeekableStringReader-read-char reader) + (parse-natural reader base)) + ((eqv? char #\-) + (PeekableStringReader-read-char reader) + (- (parse-natural reader base))) + ((char-ascii-digit char) + (parse-natural reader base)) + (else + (raise-parse-error 'parse-signed-integer "Neither a sign nor a digit in requested base" + char base reader))))) + +(def (parse-maybe-one-of char-pred?) + (lambda (reader) + (and (char-pred? (PeekableStringReader-peek-char reader)) + (PeekableStringReader-read-char reader)))) + +(def (parse-one-of char-pred?) + (lambda (reader) + (if (char-pred? (PeekableStringReader-peek-char reader)) + (PeekableStringReader-read-char reader) + (raise-parse-error 'parse-one-of "Unexpected character" + (PeekableStringReader-peek-char reader) char-pred? reader)))) + +(def (parse-any-number-of char-pred?) + (lambda (in) + (and (char-pred? (PeekableStringReader-peek-char in)) + (call-with-output-string + (lambda (out) (while (begin (write-char (PeekableStringReader-read-char in) out) + (char-pred? (PeekableStringReader-peek-char in))))))))) + +(def (parse-one-or-more-of char-pred?) + (lambda (in) + (or ((parse-any-number-of char-pred?) in) + (raise-parse-error 'parse-one-or-more-of "Unexpected character" (PeekableStringReader-peek-char in) in)))) + +(def (parse-maybe-char char) + (parse-maybe-one-of (cut eqv? char <>))) + +(def (parse-one-char char) + (parse-one-of (cut eqv? char <>))) + +(def (parse-and-skip-any-whitespace reader (whitespace? char-strict-whitespace?)) + (while (whitespace? (PeekableStringReader-peek-char reader)) + (PeekableStringReader-read-char reader))) + +(def parse-eof (parse-one-of eof-object?)) + +(def (parse-eol reader) + (def char ((parse-one-of char-eol?) reader)) + (when (eqv? char #\return) + ((parse-maybe-char #\newline) reader))) + +(def (parse-literal-string string) + (lambda (reader) + (string-for-each (lambda (c) ((parse-one-char c) reader)) string))) + +(def (parse-n-digits n (base 10)) + (lambda (reader) + (let loop ((n n) (r 0)) + (if (zero? n) r + (let* ((char (PeekableStringReader-peek-char reader)) + (digit (char-ascii-digit char base))) + (if digit + (begin (PeekableStringReader-read-char reader) (loop (- n 1) (+ digit (* base r)))) + (raise-parse-error 'parse-n-digits "not a digit" char reader n base))))))) + +;; Like parse-line, but handles (and still strips) any of the CRLF, CR and LF line endings +(def (parse-line reader) + (call-with-output-string + [] (lambda (out) + (let loop () + (let ((char (PeekableStringReader-peek-char reader))) + (cond + ((char-eol? char) (parse-eol reader)) + ((eof-object? char) (void)) + (else (display char out) (PeekableStringReader-read-char reader) (loop)))))))) + +(def (parse-lines reader (parse-line parse-line)) + (with-list-builder (c) + (until (string-reader-eof? reader) + (c (parse-line reader)) + (parse-eol reader)))) + +(def (parse-to-eof parse) + (lambda (reader) (begin0 (parse reader) (parse-eof reader)))) + + +;; Parse an entire PeekableReader +(def (parse-reader parser reader (description reader) (where 'parse-reader)) + (with-catch (lambda (e) (raise-parse-error where "failure parsing" description (error-message e))) + (lambda () ((parse-to-eof parser) reader)))) + +;; Parse an entire port +(def (parse-port parser port (description port) (where 'parse-port)) + (parse-reader parser (PeekableStringReader (raw-port port)) description where)) + +;; Parse an entire file +(def (parse-file parser file (description file) (where 'parse-file)) + (call-with-input-file file (lambda (port) (parse-port parser port description where)))) + +;; Parse an entire string +(def (parse-string parser string (description string) (where 'parse-string)) + (parse-reader parser (PeekableStringReader (open-buffered-string-reader string)) description where)) + +;; Parse an entire file line-by-line +(def (parse-file-lines parse-line file (description file) (where 'parse-file-lines)) + (parse-file file (cut parse-lines <> parse-line) description where)) + + +;; Monadic parsing combinators +(def ((parse-alternatives alternatives (where 'parse-alternatives)) reader) + (let loop ((as alternatives)) + (if (null? as) + (raise-parse-error where "none applied" alternatives reader) + (with-catch (lambda (e) (if (parse-error? e) (loop (cdr as)) (raise e))) + (lambda () ((car as) reader)))))) +(defrule (parse-result result) (lambda (_port) result)) +(def (parse-pure value) (parse-result value)) +(def (parse-bind processed processor) + (lambda (reader) ((processor (processed reader)) reader))) +(def (parse-or . alternatives) + (parse-alternatives alternatives 'parse-or)) +(defrule (parse-begin parse-ignored ... parse-value) + (lambda (reader) (parse-ignored reader) ... (parse-value reader))) +(defrule (parse-begin0 parse-value parse-ignored ...) + (lambda (reader) (begin0 (parse-value reader) (parse-ignored reader) ...))) +(def (parse-repeated parse-element parse-terminator (rhead '())) + (let loop ((r rhead)) + (parse-or + (parse-begin parse-terminator (parse-result (reverse r))) + (parse-bind parse-element (lambda (e) (loop [e . r])))))) +(def (parse-separated parse-element parse-separator parse-terminator) + (parse-or + (parse-begin parse-terminator (parse-pure '())) + (parse-bind parse-element + (lambda (e) (parse-repeated (parse-begin parse-separator parse-element) + parse-terminator [e]))))) +(def ((parse-n-repeats n parse-element) reader) + (for/collect ((_ (in-range n))) (parse-element reader))) +(def ((parse* f . parse-elements) reader) + (apply f (map-in-order (lambda (ee) (ee reader)) parse-elements))) +(def (parse-list . parse-elements) + (apply parse* list parse-elements)) diff --git a/src/std/text/char-set.ss b/src/std/text/char-set.ss new file mode 100644 index 0000000000..169f94df3d --- /dev/null +++ b/src/std/text/char-set.ss @@ -0,0 +1,87 @@ +;; -*- Gerbil -*- +;;;; Basic char sets, as both byte + +(export #t) + +(import + :std/sugar) + +;; Codepoints assume Unicode encoding + +(defrule (def-codepoint (name x y ...) body ...) + (with-id name ((codepoint-fun "codepoint-" #'name) + (char-fun "char-" #'name)) + (begin + (def (codepoint-fun x y ...) (declare (fixnum)) body ...) + (def (char-fun x y ...) (and (char? x) (codepoint-fun (char->integer x) y ...)))))) + +(def-codepoint (ascii? c) + (<= 0 c 127)) +(def-codepoint (ascii-uppercase? c) ;; [A-Z] + (<= 65 c 90)) +(def-codepoint (ascii-lowercase? c) ;; [a-z] + (<= 97 c 122)) +(def-codepoint (ascii-alphabetic? c) ;; [A-Za-z] + (or (codepoint-ascii-uppercase? c) (codepoint-ascii-lowercase? c))) +(def-codepoint (ascii-numeric? c) ;; [0-9] + (<= 48 c 57)) +(def-codepoint (ascii-alphanumeric? c) ;; [A-Za-z0-9] + (or (codepoint-ascii-alphabetic? c) (codepoint-ascii-numeric? c))) +(def-codepoint (ascii-alphanumeric-or-underscore? c) ;; [A-Za-z0-9_] + (or (codepoint-ascii-alphanumeric? c) (= c 95))) +(def-codepoint (ascii-graphic? c) ;; any ascii "graphic" character + (<= 32 c 127)) + + +;;; There is no consensus on what a Unicode "whitespace" is. +;; See https://en.wikipedia.org/wiki/Whitespace_character + +;; Whitespace as minimally defined by HTML, JSON. +;; R7RS says this is the set of whitespace accepted by all Scheme implementations, +;; though implementations may allow additional whitespace "such as page-break" +(def-codepoint (strict-whitespace? c) + (or (= c #x20) ;; #\space + (= c #x09) ;; #\tab + (= c #x0A) ;; #\newline + (= c #x0D))) ;; #\return + +;; Whitespace as defined by C, C++ and Python. +(def-codepoint (ascii-whitespace? c) + (or (codepoint-ascii-whitespace? c) + (= c #x0B) ;; #\vtab (vertical tab) C'\v' + (= c #x0C))) ;; #\page (page break, form feed) C'\f' + +;; Whitespace as defined by the underlying Scheme implementation +;; For Gambit and thus Gerbil (so far), it is the union of ASCII whitespace +;; plus Unicode Space Separators (#x20 #xA0 #x1680 #x2000-#x200a #x202f #x205f #x3000) +;; plus Unicode Line Separators (#x0A #x0D #x85 #x2028 #x2029) +;; To check, see: +;; (import :std/format :std/iter :std/misc/list :std/sugar) (with-list-builder (c) (for (i (in-range (1+ ##max-char))) (try (when (char-whitespace? (integer->char i)) (c (format "~04x" i))) (catch (_) (void))))) +(def-codepoint (scheme-whitespace? c) + (char-whitespace? (integer->char c))) + +;; Note that JavaScript accepts the ASCII whitespace, the Unicode Space Separators, +;; #xFEFF (ZWNBSP), but doesn't consider the line separators whitespace; rather +;; it considers #x0A #x0D #x2028 #x2029 as line terminators but not #x85 (Next Line). + +;; Rust recognizes the ASCII whitespace plus #x85 #x200E #x200F #x2028 #x2029. + +;; Whichever language or grammar you parse, be sure to look at its latest specification +;; to identify its specific definition of "whitespace". + +(def-codepoint (ascii-printable? c) ;; Should we really include 127 though? + (or (codepoint-ascii-graphic? c) (codepoint-ascii-whitespace? c))) + +;; Assume ASCII, base 2 to 36 +(def (codepoint-ascii-digit c (base 10)) + (let (found (lambda (d) (and (< d base) d))) + (cond + ((<= 48 c 57) (found (- c 48))) ;; ASCII 0-9 + ((<= 65 c 90) (found (- c 55))) ;; ASCII A-Z + ((<= 97 c 122) (found (- c 87))) ;; ASCII a-z + (else #f)))) +(def (char-ascii-digit c (base 10)) + (and (char? c) (codepoint-ascii-digit (char->integer c) base))) + +(def (char-eol? x) + (or (eqv? x #\newline) (eqv? x #\return) (eof-object? x))) diff --git a/src/std/text/json/json-test.ss b/src/std/text/json/json-test.ss index 72b07424a0..5b020cade4 100644 --- a/src/std/text/json/json-test.ss +++ b/src/std/text/json/json-test.ss @@ -2,10 +2,11 @@ ;;; (C) vyzo at hackzen.org ;;; :std/text/json unit test -(import :std/test +(import :std/io :std/misc/walist + :std/parser/base :std/sugar - :std/io + :std/test :std/text/utf8 ./api) (export json-test) @@ -55,7 +56,8 @@ "{\"a\":1,\"b\":2,\"c\":{\"d\":3,\"e\":4,\"f\":5}}")) (check-encode-decode [1 2 #f #t 3] "[1,2,false,true,3]") (check-encode (walist '((d . 41) (c . 23))) "{\"d\":41,\"c\":23}") - (check (call-with-output-string (cut write-json (foo 23 41) <>)) => "{\"a\":23,\"b\":41}")) + (check (call-with-output-string (cut write-json (foo 23 41) <>)) => "{\"a\":23,\"b\":41}") + (check-exception (string->json-object "true junk") parse-error?)) (test-case "io zoo" (def obj diff --git a/src/std/text/json/util.ss b/src/std/text/json/util.ss index 5ccc5b03e3..881d5216b9 100644 --- a/src/std/text/json/util.ss +++ b/src/std/text/json/util.ss @@ -3,10 +3,23 @@ ;;; json utilities (import :gerbil/gambit/ports - :std/io :std/misc/ports :std/misc/process - :std/iter :std/misc/alist :std/misc/hash :std/misc/list :std/misc/list-builder - :std/misc/ports :std/misc/plist :std/misc/rtd :std/misc/walist - :std/sort :std/srfi/43 :std/sugar :std/values + :std/io + :std/misc/ports + :std/misc/process + :std/iter + :std/misc/alist + :std/misc/hash + :std/misc/list + :std/misc/list-builder + :std/misc/ports + :std/misc/plist + :std/misc/rtd + :std/misc/walist + :std/sort + :std/srfi/43 + :std/sugar + :std/text/basic-parsers + :std/values ./env ./input ./output) (export #t) @@ -23,10 +36,15 @@ (error "Bad input source; expected input port, BufferedStringReader or BufferedReader instance" input)))) (def (string->json-object str) - (read-json-object/reader (open-buffered-string-reader str) (make-env))) + (let (reader (open-buffered-string-reader str)) + (begin0 (read-json-object/reader reader (make-env)) + ((parse-to-eof parse-and-skip-any-whitespace) (PeekableStringReader reader))))) (def (bytes->json-object bytes) - (read-json (open-buffered-reader bytes))) + (let (buffer (open-buffered-reader bytes)) + (begin0 (read-json-object/buffer buffer (make-env)) + ((parse-to-eof parse-and-skip-any-whitespace) + (PeekableStringReader (open-buffered-string-reader buffer)))))) (def (write-json obj (output (current-output-port))) (cond