From a4452089e7e97b8e40004f46296995a2d45b4010 Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Sun, 2 Jun 2024 12:44:19 -0700 Subject: [PATCH 01/23] Add :std/swank, some swank instructions and functions in the readme, and gxswank/`gerbil swank` (#1186) Co-authored-by: Drew Crampsie --- doc/reference/std/getopt.md | 2 +- src/gerbil/builtin.ssxi.ss | 2 +- src/gerbil/main.ss | 3 + src/std/build-spec.ss | 12 ++++ src/std/ide/swank.ss | 23 +++++++ src/std/ide/swank/README.org | 104 +++++++++++++++++++++++++++++ src/std/ide/swank/api.ss | 23 +++++++ src/std/ide/swank/autodoc.ss | 93 ++++++++++++++++++++++++++ src/std/ide/swank/completions.ss | 56 ++++++++++++++++ src/std/ide/swank/context.ss | 68 +++++++++++++++++++ src/std/ide/swank/eval.ss | 18 +++++ src/std/ide/swank/handlers.ss | 40 +++++++++++ src/std/ide/swank/message.ss | 87 ++++++++++++++++++++++++ src/std/ide/swank/presentation.ss | 86 ++++++++++++++++++++++++ src/std/ide/swank/repl.ss | 106 ++++++++++++++++++++++++++++++ src/std/ide/swank/server.ss | 43 ++++++++++++ src/std/ide/swank/top.ss | 36 ++++++++++ src/std/markup/sxml/README.org | 46 ++++++++++++- src/std/markup/sxml/oleg/SSAX.scm | 37 ++++++----- src/std/markup/sxml/ssax.ss | 6 +- src/std/markup/sxml/sxpath.ss | 15 ++++- src/std/markup/sxml/xml.org | 52 +++++++++++++-- src/std/srfi/srfi-19.scm | 8 ++- src/tools/build.ss | 3 +- src/tools/gxswank.ss | 60 +++++++++++++++++ 25 files changed, 996 insertions(+), 33 deletions(-) create mode 100644 src/std/ide/swank.ss create mode 100644 src/std/ide/swank/README.org create mode 100644 src/std/ide/swank/api.ss create mode 100644 src/std/ide/swank/autodoc.ss create mode 100644 src/std/ide/swank/completions.ss create mode 100644 src/std/ide/swank/context.ss create mode 100644 src/std/ide/swank/eval.ss create mode 100644 src/std/ide/swank/handlers.ss create mode 100644 src/std/ide/swank/message.ss create mode 100644 src/std/ide/swank/presentation.ss create mode 100644 src/std/ide/swank/repl.ss create mode 100644 src/std/ide/swank/server.ss create mode 100644 src/std/ide/swank/top.ss create mode 100644 src/tools/gxswank.ss diff --git a/doc/reference/std/getopt.md b/doc/reference/std/getopt.md index 01ba4c454..2e55cdfed 100644 --- a/doc/reference/std/getopt.md +++ b/doc/reference/std/getopt.md @@ -4,7 +4,7 @@ This is the old name of the [`:std/cli/getopt`](cli/getopt.md) module, that provides facilities for command line argument parsing. Up to Gerbil v0.18, `:std/getopt` was the only name for this module. -As of Gerbil v0.19, both names are supported. +Up to and including Gerbil v0.19, both names are supported. However, we recommend you use the new name `:std/cli/getopt` from now on, as we may remove the old name at some point in the future. diff --git a/src/gerbil/builtin.ssxi.ss b/src/gerbil/builtin.ssxi.ss index 013f58bca..9af77aebb 100644 --- a/src/gerbil/builtin.ssxi.ss +++ b/src/gerbil/builtin.ssxi.ss @@ -963,7 +963,7 @@ package: gerbil (input-port-characters-buffered (port::t) integer::t effect: (io)) (input-port-column (port::t) integer::t effect: (io)) (input-port-line (port::t) integer::t effect: (io)) - (input-port-readtable (port::t) integer::t effect: (io)) + (input-port-readtable (port::t) readtable::t effect: (io)) (input-port-readtable-set! (port::t readtable::t) void::t effect: (mut io)) (input-port-timeout-set! [((port::t t::t) void::t effect: (mut io)) ((port::t t::t procedure::t) void::t effect: (mut io))]) diff --git a/src/gerbil/main.ss b/src/gerbil/main.ss index cd579c5f9..6ec43485f 100644 --- a/src/gerbil/main.ss +++ b/src/gerbil/main.ss @@ -66,6 +66,7 @@ package: gerbil ("clean" "gxpkg" "clean") ("env" "gxpkg" "env") ("pkg" "gxpkg") + ("swank" "gxswank") ("test" "gxtest") ("tags" "gxtags") ("prof" "gxprof") @@ -81,6 +82,7 @@ package: gerbil ("clean" "gxpkg" "help" "clean") ("env" "gxpkg" "help" "env") ("pkg" "gxpkg" "help") + ("swank" "gxswank" "-h") ("test" "gxtest" "-h") ("tags" "gxtags" "-h") ("prof" "gxprof" "-h") @@ -107,6 +109,7 @@ package: gerbil (displayln " clean clean build artifactacts for a package (gxpkg clean)") (displayln " env eval a shell command in the local package environment (gxpkg env)") (displayln " pkg invoke the gerbil package manager (gxpkg)") + (displayln " swank run a swank server (gxswank)") (displayln " test run tests (gxtest)") (displayln " tags create emacs tags (gxtags)") (displayln " prof profile a dynamic executable module (gxprof)") diff --git a/src/std/build-spec.ss b/src/std/build-spec.ss index c2cd1ada8..95ff275b3 100644 --- a/src/std/build-spec.ss +++ b/src/std/build-spec.ss @@ -167,6 +167,18 @@ "srfi/160/c64" "srfi/160/c128" "srfi/212" + "ide/swank/api" + "ide/swank/message" + "ide/swank/context" + "ide/swank/autodoc" + "ide/swank/repl" + "ide/swank/presentation" + "ide/swank/top" + "ide/swank/eval" + "ide/swank/completions" + "ide/swank/handlers" + "ide/swank/server" + "ide/swank" ;; :std/mime "mime/struct" (gxc: "mime/types" (extra-inputs: ("mime/mime.types"))) diff --git a/src/std/ide/swank.ss b/src/std/ide/swank.ss new file mode 100644 index 000000000..b8c903ea4 --- /dev/null +++ b/src/std/ide/swank.ss @@ -0,0 +1,23 @@ +(import + ./swank/api + ./swank/message + ./swank/context + ./swank/presentation + ./swank/eval + ./swank/top + ./swank/completions + ./swank/autodoc + ./swank/handlers + ./swank/server) +(export + (import: + ./swank/api + ./swank/message + ./swank/context + ./swank/presentation + ./swank/eval + ./swank/top + ./swank/completions + ./swank/autodoc + ./swank/handlers + ./swank/server)) diff --git a/src/std/ide/swank/README.org b/src/std/ide/swank/README.org new file mode 100644 index 000000000..16259dc05 --- /dev/null +++ b/src/std/ide/swank/README.org @@ -0,0 +1,104 @@ +#+TITLE: Swank, a SLIME backend for an IDE + +* SLIME and Swank: Graphical REPL + +SLIME, the Superior Lisp Interaction Mode for Emacs, is an Emacs mode +for developing Lisp applications. +-- https://en.wikipedia.org/wiki/SLIME + +SLIME uses a backend called Swank that is loaded into Lisp to turn +Emacs into a featured REPL along with an IDE for lisp use. + +* Minimal Emacs setup + +There are some developers that (**gasp**) don't use Emacs as their +primary go to application. Having said that even without using the +editor portion the REPL itself is useful and continues to grow into a +wonderful interface to Gerbil. + +If you already use emacs it may be as simple as adding the following +to your init file. + +#+begin_src emacs-lisp + (use-package slime + :hook ((gerbil-mode . slime-mode))) + + (defun gerbil-slime-init-function (file encoding) + (format "(begin (import (prefix-in :std/swank swank#)) + (let ((port (+ 10000 (random-integer 55535)))) + (swank#create-server port: port) + (with-output-to-file %S (cut write port))))\n\n" + file)) + + (cl-defun gerbil-slime-start + (&key (program "/opt/gerbil/current/bin/gerbil") program-args + directory + (coding-system slime-net-coding-system) + (init 'gerbil-slime-init-function) + (name 'gerbil) + (buffer "*inferior-gerbil*") + init-function + env) + "Start a Gerbil process and connect to it. + This function is intended for programmatic use if `gerbil-slime' is not + flexible enough. + + PROGRAM and PROGRAM-ARGS are the filename and argument strings + for the subprocess. + INIT is a function that should return a string to load and start + Swank. The function will be called with the PORT-FILENAME and ENCODING as + arguments. INIT defaults to `gerbil-slime-init-command'. + CODING-SYSTEM a symbol for the coding system. The default is + slime-net-coding-system + ENV environment variables for the subprocess (see `process-environment'). + INIT-FUNCTION function to call right after the connection is established. + BUFFER the name of the buffer to use for the subprocess. + NAME a symbol to describe the Lisp implementation + DIRECTORY change to this directory before starting the process. + " + (let ((args (list :program program :program-args program-args :buffer buffer + :coding-system coding-system :init init :name name + :init-function init-function :env env))) + (slime-check-coding-system coding-system) + (when (slime-bytecode-stale-p) + (slime-urge-bytecode-recompile)) + (let ((proc (slime-maybe-start-lisp program program-args env + directory buffer))) + (slime-inferior-connect proc args) + (pop-to-buffer (process-buffer proc))))) + + (defun gerbil-slime (&optional command coding-system) + "Start an inferior^_superior Lisp and connect to its Swank server." + (interactive) + (slime-setup) + (gerbil-slime-start)) + + + +#+end_src + +Or you could [[https://slime.common-lisp.dev/doc/html/Getting-started.html#Getting-started][download it]] and put it in your =load-path=. + +* Running the Swank Server in Gerbil + +#+begin_src sh +gerbil swank --help +gxswank: The Gerbil HTTP Daemon + +Usage: gxwank [option ...] + +Options: + -g --global-env use the user global env even in local package context + -a --address The IP or hostname to listen on [default: localhost] + -p --port The port number on which the Swank server listens for connections [default: 4005] + -d --dont-close Close the acceptor on client disconnect if anything but true [default: true] + -h --help display help + +#+end_src + + + + + + + diff --git a/src/std/ide/swank/api.ss b/src/std/ide/swank/api.ss new file mode 100644 index 000000000..764b69d48 --- /dev/null +++ b/src/std/ide/swank/api.ss @@ -0,0 +1,23 @@ +(import :std/misc/string) +(export #t) +(def current-slime-client (make-parameter #f)) +(def current-slime-writer (make-parameter #f)) +(def current-slime-package (make-parameter #f)) +(def current-slime-thread (make-parameter #f)) +(def current-slime-id (make-parameter #f)) +(def current-swank-exit-emacs-rex (make-parameter #f)) +(def (swank-machine:version) + (with ([cpu os kern] ##os-system-type-saved) + (if (eq? os 'apple) + (string-trim-eol + (cdr (##shell-command "sysctl -n machdep.cpu.brand_string" #t))) + "Dunno"))) +(def (swank-machine:type) + (##string-upcase (string-trim-eol + (cdr (##shell-command "uname -m" #t))))) + +(def (swank-lisp-implementation:program) + (path-expand (string-append "bin/" (##command-name)) (gerbil-home))) + +(def (swank-modules) + '("SWANK-ARGLISTS" "SWANK-REPL" "SWANK-PRESENTATIONS")) diff --git a/src/std/ide/swank/autodoc.ss b/src/std/ide/swank/autodoc.ss new file mode 100644 index 000000000..f59d30905 --- /dev/null +++ b/src/std/ide/swank/autodoc.ss @@ -0,0 +1,93 @@ +(import :gerbil/expander ./message :std/sugar + ./context ./api :std/format + :std/srfi/13) +(export #t) + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; Completions, Autodoc and Signature +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; search for a procedure in 0gambit-procedures +;; returns the procedure symbol if it finds it + +(gxc#optimizer-info-init!) +(def src (path-expand "src/" (gerbil-home))) + +(try + (gx#import-module + (path-expand "gerbil/builtin.ssxi.ss" src) #t #t) + (gx#import-module + (path-expand "gerbil/builtin-inline-rules.ssxi.ss" src) #t #t) + (let* ((lpath (path-expand "lib/gerbil/runtime/" (gerbil-home))) + (fs (filter (cut string-suffix? ".ssxi.ss" <>) + (directory-files lpath))) + (mods (map (cut path-expand <> lpath) fs))) + (for-each (cut gx#import-module <> #t #t) mods)) + (catch (_) #f)) + +(def (swank-pp-proc-args args) + (def (strim sym) + (string->symbol + (string-trim-both + (symbol->string sym) + (? (or (cut char=? <> #\_) char-numeric?))))) + (let lp ((lst args)) + (if (null? lst) lst + (with ([kar . kdr] lst) + (cons (strim kar) + (if (pair? kdr) (lp kdr) (strim kdr))))))) + +(def (swank-autodoc-procedure sym proc) + (def name (##procedure-name proc)) + (def opti (and name (gxc#optimizer-lookup-type name))) + (def sig (and opti (gxc#!procedure? opti) (gxc#!procedure-signature opti))) + (def ret (and sig (gxc#!signature? sig) (gxc#!signature-return sig))) + (def args (and sig (gxc#!signature? sig) (gxc#!signature-arguments sig))) + (def call (cons sym args)) + + + (if opti (format "~a => ~a" call ret) + (let (form (##decompile proc)) + (cons sym (if form (swank-pp-proc-args (cadr form)) '/args/))))) + +(def (autod sym) + (try + (let* ((sym (and (string? sym) (string->symbol sym))) + (id (and sym (swank-eval-in-context `(gx#resolve-identifier ',sym))))) + (if (not id) + `(:not-available t) + (if (syntax-binding? id) + (format "(~a /syntax/)" sym) + (let (val (swank-eval-in-context sym)) + (if (procedure? val) + (try (swank-autodoc-procedure sym val) + (catch (e) (format "(~a . t::t) => t::t"))) + (format "~a" sym)))))) + (catch (e) + '(:not-available t)))) + +(def cursor 'swank::%cursor-marker%) + +(def (cursor-symbol form (exit #f)) + (def (fnd) + (when (pair? form) + (foldl + (lambda (this prev) + (cond + ((pair? this) (cursor-symbol this exit)) + ((eq? this cursor) (exit prev)) + ((equal? this "") prev) + + (else this))) + #f + form)) + #f) + (if exit (fnd) + (call/cc (lambda (k) (set! exit k) (fnd))))) + +(def-swank (swank:operator-arglist name module) + (autod name)) + +(def-swank (swank:autodoc lst . args) + (def d (autod (cursor-symbol lst))) + (if (list? d) d [d])) diff --git a/src/std/ide/swank/completions.ss b/src/std/ide/swank/completions.ss new file mode 100644 index 000000000..2c59b313e --- /dev/null +++ b/src/std/ide/swank/completions.ss @@ -0,0 +1,56 @@ +(import + :gerbil/expander ./message :std/sugar + ./context ./api :std/srfi/13 :std/iter :std/srfi/1) +(export #t) + + +(def (symbol-string-prefix? sym str) + (def symstr (symbol->string sym)) + (= (string-prefix-length str symstr) + (string-length str))) + +(def (symbol-ns-prefix? sym str) + (def symstr (symbol->string sym)) + (def string-prefix? + (= (string-prefix-length str symstr) + (string-length str))) + (and string-prefix? + (let (idx (string-index symstr #\#)) + (and idx (< 1 idx) + (if (> (string-length str) idx) + symstr + (substring/shared symstr 0 (1+ idx))))))) + +(def (find-completions str (cxt (swank-context))) + (append + (for/collect ((s (in-hash-keys + (gx#expander-context-table cxt)) + when (symbol-string-prefix? s str))) (symbol->string s)) + (if (not (string-prefix? "##" str)) [] + (map symbol->string + (##global-var-table-foldl + (lambda (prev this) + (if (symbol-string-prefix? this str) + (cons this prev) + prev)) + []))) + (map symbol->string + (##global-var-table-foldl + (lambda (prev this) (if (symbol-ns-prefix? this str) + (cons this prev) + prev)) + [])))) + +(def (common-prefix strings) + (let (n +inf.0) + (fold (lambda (x y) ;(displayln x " " y) + (set! n (min (string-prefix-length x (or y x)) n)) x) + #f strings) + (substring (car strings) 0 (inexact->exact n)))) + +(def-swank (swank:completions str pkg) + (when (and (pair? pkg) (eq? 'quote (car pkg))) + (set! pkg (eval pkg))) + (let (comp (find-completions str (swank-context pkg))) + (if (null? comp) 'nil + [comp (common-prefix comp)]))) diff --git a/src/std/ide/swank/context.ss b/src/std/ide/swank/context.ss new file mode 100644 index 000000000..0ad7a994f --- /dev/null +++ b/src/std/ide/swank/context.ss @@ -0,0 +1,68 @@ +(import :gerbil/expander ./api ./message) +(export #t) + +(def (swank-context (:mod (current-slime-package))) + (def cxt + (identity ;; make-top-context + (cond + ((string? :mod) + (if (string=? "TOP" :mod) + (gx#current-expander-context) + (swank-context + (string->symbol (string-append ":" :mod))))) + ((not :mod) (gx#current-expander-context)) + ((symbol? :mod) (gx#import-module :mod #f #t)) + (else (error "Unknown Module" :mod))))) + (parameterize ((gx#current-expander-context cxt)) + ;; TODO: This should be a shadow context or something. + (gx#eval-syntax '(extern namespace: #f + swank:lookup-presented-object + swank:lookup-presented-object-or-lose + swank:get-presented-object + repl-result-history-ref))) + cxt) + +(def (swank-eval-in-context form (cxt-name (current-slime-package))) + (parameterize ((gx#current-expander-context (swank-context cxt-name))) + (eval form))) + +(def (list-all-context-names) + (##list-sort + stringstring (gx#expander-context-id cxt))) + (filter gx#module-context? + (map cdr + (hash->list + gx#__module-registry)))) + ...])) + +(def (swank-read-from-string-in-context str (cxt-name (current-slime-package))) + (let (form + (swank-eval-in-context + `(call-with-input-string + ,str + (lambda (p) + (input-port-readtable-set! + p + (readtable-eval-allowed?-set + (input-port-readtable p) #t)) + (let lp ((form (read p)) (lst [])) + (if (eof-object? form) + (reverse lst) + (lp (read p) (cons form lst)))))) + cxt-name)) + (case (length form) + ((0) (eof-object)) + ((1) (car form)) + (else (cons 'begin form))))) + +(def-swank (swank:list-all-package-names . _) + (list-all-context-names)) + +(def-swank (swank:set-package name) + [name (if (equal? name "TOP") "TOP" + (string-append ":" name))]) + diff --git a/src/std/ide/swank/eval.ss b/src/std/ide/swank/eval.ss new file mode 100644 index 000000000..c407ca3f1 --- /dev/null +++ b/src/std/ide/swank/eval.ss @@ -0,0 +1,18 @@ +(import :gerbil/expander :gerbil/gambit + ./api + ./message + ./presentation + ./repl + ./context) +(export #t) + +(def-swank (swank:interactive-eval str) + (let* ((form (swank-read-from-string-in-context str)) + (res + (swank-eval-in-context form))) + (##object->string res))) + +(def-swank (swank-repl:listener-eval str) + ;; (displayln "Evaling " str) + (swank-repl-eval str) + ((current-swank-exit-emacs-rex) #f)) diff --git a/src/std/ide/swank/handlers.ss b/src/std/ide/swank/handlers.ss new file mode 100644 index 000000000..5e1f985c8 --- /dev/null +++ b/src/std/ide/swank/handlers.ss @@ -0,0 +1,40 @@ +(import :std/misc/string :std/sugar :gerbil/gambit + ./api ./message ./presentation ./top) +(export #t) + +(def-swank (:emacs-rex form package thread id) + (call-with-current-continuation + (lambda (exit) + (parameterize ((current-swank-exit-emacs-rex exit) + (current-slime-package package) + (current-slime-thread thread) + (current-slime-id id)) + (try + (let (handler (hash-get swank-message-handlers (car form))) + (if handler + `(:return (:ok ,(apply handler (cdr form))) ,id) + (error "No Handler Found" form))) + (catch (e) + `(:return (:abort ,(call-with-output-string (cut display-exception e <>))) ,id))))))) + +(def-swank (swank:connection-info) + `(:pid + ,(##os-getpid) + :style :spawn + :encoding (:coding-systems ("utf-8-unix")) + :lisp-implementation (:type + "Scheme" + :name "gerbil" + :version ,(gerbil-version-string) + :program ,(swank-lisp-implementation:program)) + :machine (:instance + ,(##host-name) + :type ,(swank-machine:type) + :version ,(swank-machine:version)) + :features (:swank) + :modules ,(swank-modules) + :package (:name "TOP" :prompt "TOP") + :version "2.29.1")) + +(def-swank (swank:swank-require modules) (swank-modules)) +(def-swank (swank-repl:create-repl nil . plist) '("TOP" "TOP")) diff --git a/src/std/ide/swank/message.ss b/src/std/ide/swank/message.ss new file mode 100644 index 000000000..8794815c7 --- /dev/null +++ b/src/std/ide/swank/message.ss @@ -0,0 +1,87 @@ +(import + ./api + :std/text/utf8 :std/io :std/contract :std/interface :gerbil/gambit) +(export #t) + +(def default-swank-buffer-size (expt 2 15)) ; 32K +(def (read-message reader (buffer-or-size default-swank-buffer-size)) + (def sizehex (make-u8vector 6)) + (using (reader :- Reader) + (let* ((size (string->number + (utf8->string + (begin + (reader.read sizehex) sizehex)) + 16)) + (buffer (if (number? buffer-or-size) + (make-u8vector default-swank-buffer-size) + buffer-or-size)) + (mbytes (and size (reader.read buffer 0 size size))) + (port (open-input-u8vector buffer))) + (input-port-readtable-set! + port + (readtable-eval-allowed?-set + (input-port-readtable port) #t)) + + (and size (read port))))) + +(def swank-message-handlers (make-hash-table-eq)) + +(defrules def-swank () + ((_ (name args ...) body ...) + (begin + (def (name args ...) body ...) + (hash-put! swank-message-handlers 'name name) + name)) + ((_ (name args ... . rest) body ...) + (begin + (def (name args ... . rest) body ...) + (hash-put! swank-message-handlers 'name name) + name)) + ((_ (name . rest) body ...) + (begin + (def (name . rest) body ...) + (hash-put! swank-message-handlers 'name name) + name))) + +(defrules set-swank! () + ((_ (name args ...) body ...) + (begin + (set! name (lambda (args ...) body ...)) + (hash-put! swank-message-handlers 'name name) + name)) + ((_ (name args ... . rest) body ...) + (begin + (set! name (lambda (args ... . rest) body ...)) + (hash-put! swank-message-handlers 'name name) + name)) + ((_ (name . rest) body ...) + (begin + (set! name (lambda (args . rest) body ...)) + (hash-put! swank-message-handlers 'name name) + name))) + +(def (swank-handle-message msg writer) + (let (handler (hash-get swank-message-handlers (car msg))) + (if handler + (let (ret (apply handler (cdr msg))) + (and ret (write-message writer ret)) + #t) + #f))) + +(def (write-message writer msg) + (let* ((str (cond ((string? msg) msg) + ((pair? msg) + (with-output-to-string "" (cut write msg))) + (else #f))) + (bytes (if (u8vector? msg) msg + (string->utf8 str))) + (len (u8vector-length bytes)) + (hex (string->utf8 (number->string len 16))) + (filler (make-u8vector (- 6 (u8vector-length hex))))) + (u8vector-fill! filler 48) + (using (writer :- Writer) + (writer.write (u8vector-append filler hex bytes))))) + +(def (send-to-emacs msg) + (write-message (current-slime-writer) msg)) + diff --git a/src/std/ide/swank/presentation.ss b/src/std/ide/swank/presentation.ss new file mode 100644 index 000000000..a6a033d8a --- /dev/null +++ b/src/std/ide/swank/presentation.ss @@ -0,0 +1,86 @@ +(import ./api ./message ./repl ./context :gerbil/gambit) +(export #t) + ;;;; Recording and accessing results of computations + +(def +record-repl-results+ #t) +(def object-to-presentation-id-table + (##make-table weak-keys: #t)) +(def presentation-id-to-object-table + (##make-table weak-values: #t)) + +(def (clrhash tbl) + (table-for-each (lambda (k _) (table-set! tbl k)) tbl)) + +(def (clear-presentation-tables) + (clrhash object-to-presentation-id-table) + (clrhash presentation-id-to-object-table)) + +(def nil-surrogate (gensym 'nil-surrogate)) + +(def presentation-counter 0) + +;; XXX thread safety? [2006-09-13] mb: not in the slightest (fwiw the +;; rest of slime isn't thread safe either), do we really care? +(def (save-presented-object obj) + "Save OBJECT and return the assigned id. + If OBJECT was saved previously return the old id." + (let ((ref (table-ref object-to-presentation-id-table obj #f))) + (or ref + (let ((id (begin0 presentation-counter + (set! presentation-counter (1+ presentation-counter))))) + (table-set! presentation-id-to-object-table id obj) + (table-set! object-to-presentation-id-table obj id) + id)))) + +(def (present-repl-results object (writer #f)) + (unless writer (set! writer (current-slime-writer))) + ;(def id (current-slime-id)) + (def (writeme val (iden #f)) + (write-message + writer + `(:write-string ,(if iden val (##object->string val)) :repl-result))) + (def (present val) + (let ((id (if (not +record-repl-results+) 'nil + (save-presented-object val)))) + (write-message + writer `(:presentation-start ,id :repl-result)) + (writeme val) + (write-message + writer `(:presentation-end ,id :repl-result)) + (writeme "\n" #t))) + + (if (##values? object) + (for-each present (values->list object)) + (present object))) + +(def-swank (swank:init-presentations) + (begin0 'present-repl-results + (current-swank-repl-results-function present-repl-results))) + +(extern namespace: #f + swank:lookup-presented-object + swank:lookup-presented-object-or-lose + swank:get-presented-object) + +(set-swank! + (swank:get-presented-object id) + (table-ref presentation-id-to-object-table (exact id) nil-surrogate)) + +(set-swank! + (swank:lookup-presented-object id) + (let* ((val (swank:get-presented-object id)) + (found? (not (eq? val nil-surrogate)))) + (values val (if found? 't 'nil)))) + +(set-swank! + (swank:lookup-presented-object-or-lose id) + (with ((values object found?) (swank:lookup-presented-object id)) + (if (eq? found? 't) + `(swank:get-presented-object ,id) + (error "Attempt to access unrecorded object" id)))) + +(def-swank (cl:nth-value n vs) + (set! vs (eval vs)) + (if (not (##values? vs)) + (if (= n 0) vs (error "Invalid nth-value request" n vs)) + (##values-ref vs n))) diff --git a/src/std/ide/swank/repl.ss b/src/std/ide/swank/repl.ss new file mode 100644 index 000000000..afa26d324 --- /dev/null +++ b/src/std/ide/swank/repl.ss @@ -0,0 +1,106 @@ +(import + ./message ./api ./context + :gerbil/gambit :std/sugar :std/format :std/srfi/13) +(export #t) + +;;; client->thread mapping +(def repl-thread-client-table + (##make-table weak-keys: #t)) + +(def (client->repl-thread client) + (table-ref repl-thread-client-table client #f)) + +(def (print-object-to-string obj (maxlen (* 80 5))) + (def str (string-trim-right (format (if (list? obj) "~Y" "~a") obj) + char-whitespace?)) + (if (< (string-length str) (- maxlen 3)) str + (format "~a..." (substring str 0 (max 1 (- maxlen 3)))))) + +(def (swank-default-repl-results-function object (writer #f)) + (unless writer (set! writer (current-slime-writer))) + (def (writeme val) + (write-message + writer + `(:write-string ,(print-object-to-string val) :repl-result))) + (if (##values? object) + (for-each writeme (values->list object)) + (writeme object))) + +(def current-swank-repl-results-function + (make-parameter swank-default-repl-results-function)) + +(def (swank-repl-eval str) + (declare (not inline) (safe)) + (def client (current-slime-client)) + (def writer (current-slime-writer)) + (def cxt (current-slime-package)) + (def id (current-slime-id)) + (def (return val (status ':ok)) + (write-message writer `(:return (,status ,val) ,id))) + (def (write-string-to-repl str) + (write-message + writer `(:write-string ,str nil ,id))) + (def current-repl-results-function (current-swank-repl-results-function)) + (def (repl-results-function val) + (current-repl-results-function val writer)) + + (def (:repl-thread) + (call/cc + (lambda (k) + (def stdout (open-string)) + + (def (:repl-output-thread) + (let lp () + (let* ((char (peek-char stdout)) + (str (get-output-string stdout))) + (if (or (eof-object? str) (eof-object? char) + (equal? str "")) + str + (begin + (write-string-to-repl str) + (lp)))))) + (parameterize ((current-swank-exit-emacs-rex k) + (current-slime-package cxt) + (current-slime-thread ':repl-thread) + (current-slime-id id) + (current-output-port stdout) + (current-error-port stdout)) + (let (outt (spawn :repl-output-thread)) + (try + (let (res + (swank-eval-in-context + `(eval + (call-with-input-string + ,str + (lambda (p) + (input-port-readtable-set! + p + (readtable-eval-allowed?-set + (input-port-readtable p) #t)) + (let lp ((form (read p)) (lst [])) + (if (eof-object? form) + (cons 'begin (reverse lst)) + (lp (read p) (cons form lst))))))) + cxt)) + (force-output stdout) + (close-port stdout) + (thread-join! outt) + (repl-results-function res) + (return 'nil) + (thread-yield!)) + (catch (e) + (return (call-with-output-string + "" + (cut display-exception e <>)) + ':abort)) + (finally + (close-port stdout) + (thread-terminate! outt) + (table-set! repl-thread-client-table client)))))))) + + (when (let (thr (client->repl-thread client)) + (and thr (thread-state-running? thr))) + (error "REPL thread already running")) + + (let (thread (spawn :repl-thread)) + (table-set! repl-thread-client-table client thread))) diff --git a/src/std/ide/swank/server.ss b/src/std/ide/swank/server.ss new file mode 100644 index 000000000..d21923a98 --- /dev/null +++ b/src/std/ide/swank/server.ss @@ -0,0 +1,43 @@ +(import :std/misc/string + :std/logger + :std/contract + :std/io + :std/net/address + ./api ./message ./presentation + ./handlers ./autodoc ./eval + :std/sugar :gerbil/gambit) +(export #t) + +(deflogger swank-server) +(def (swank-handle-slime client) + (using ((client :- StreamSocket) + (reader (client.reader) :- Reader) + (writer (client.writer) :- Writer)) + (parameterize ((current-slime-client client) + (current-slime-writer writer)) + (let lp () + (let ((msg (read-message reader))) + (when msg + (swank-handle-message msg writer) (lp))))))) + +(def (run-swank-server (port 2005) (host "localhost")) + (let (laddr (resolve-address + (string-append host":"(number->string port)))) + (displayln "Running Swank Server on " laddr) + (using (sock (tcp-listen laddr) : ServerSocket) + (while #t + (try + (using (cli (sock.accept) : StreamSocket) + (debugf "Accepted connection from ~a" (cli.peer-address)) + (spawn swank-handle-slime cli)) + (catch (e) + (errorf "Error accepting connection: ~a" e))))))) + +(def (create-server + port: (port 4005) + ;; Security, I almost always do 0.0.0.0. Shhh. + ;; Can be a hostname even, but where to listen. + host: (host "localhost") + ;; don't close if client does. + dont-close: (ignore-me #t)) + (spawn run-swank-server port host)) diff --git a/src/std/ide/swank/top.ss b/src/std/ide/swank/top.ss new file mode 100644 index 000000000..7159cba93 --- /dev/null +++ b/src/std/ide/swank/top.ss @@ -0,0 +1,36 @@ +(import + :std/interactive ./api + ./presentation) +(export #t apropos (import: :std/interactive)) + +(extern namespace: #f + apropos + swank:lookup-presented-object + repl-result-history-ref) + +(def (swank:apropos thing (out #f)) + (def po (or out (##current-output-port))) + (if (current-slime-id) + (##apropos thing po) + (if out + (##apropos thing out) + (##apropos thing)))) + +(set! apropos swank:apropos) + +(def (swank:repl-result-history-ref n) + (if (not (current-slime-id)) + (##repl-result-history-ref n) + (let ((-n (- presentation-counter 1 n))) + (with ((values object found?) + (swank:lookup-presented-object -n)) + (if (eq? found? 't) + object + (error "No repl-history for number" n)))))) + +(set! repl-result-history-ref swank:repl-result-history-ref) + + + + + diff --git a/src/std/markup/sxml/README.org b/src/std/markup/sxml/README.org index 1cf687be9..eea2661aa 100644 --- a/src/std/markup/sxml/README.org +++ b/src/std/markup/sxml/README.org @@ -200,6 +200,14 @@ When set to =xml?= things are different. :CUSTOM_ID: sxml-queries :END: +SXPath is an XPath-conforming XML query language that internally +relies on SXML as a representation of the XML Infoset. + +TODO: https://gitlab.com/wak/wak-sxml-tools/ <--- there's an +XPath->SXPath parser. + +It's by Lizorkin who also has a good tutorial [[https://web.archive.org/web/20070414181503/http://modis.ispras.ru/Lizorkin/sxml-tutorial.html][here]], + *** sxpath :PROPERTIES: :CUSTOM_ID: sxpath @@ -210,15 +218,45 @@ When set to =xml?= things are different. path := list #+end_src -Evaluate an abbreviated SXPath +Evaluate an abbreviated SXPath. The syntax will follow but assume it's +a sexp version of XPath similar to how SXML is a sexp version of XML. + +The easy way is to think of SXPath as a list of path components. It's +also important to realize that attributes are themselves a node of +type =@=. + +So ='(html head title)= is like the ="​/​html/​head/​title"= XPath and the +=//row[@r​='8']= instead can be said like =(// (row (@ r (equal? +"8"))))= + +There are a few special path components: + + - * :: matches an element node. + - // :: matches any one or many consecutive path components. + - @ :: selects the attribute list node. + + If a path component is a list it's one of these forms: + + - (equal? x) :: matches if the node under examination matches x using + node-equal? + - (eq? x) :: matches if the node under examination matches x using + node-eq? + - ( n) :: - :: matches the n-th node matching same path + component. n starts from 1. Negative numbers start from the end of + the node list backward. This is =path[n]= syntax in XPath. + - ( (...)) :: matches a path component path and + =(sxpath (...))= on those nodes are not empty. This is + =path[predicate...]= syntax in XPath. +Here's the syntax: + #+begin_example sxpath:: AbbrPath -> Converter, or sxpath:: AbbrPath -> Node|Nodeset -> Nodeset #+end_example -AbbrPath is a list. It is translated to the full SXPath according to the -following rewriting rules: +AbbrPath is a list. It is translated to the full SXPath according to +the following rewriting rules: #+begin_src scheme (sxpath '()) -> (node-join) @@ -238,6 +276,8 @@ following rewriting rules: (sxpathr path-filter) -> (filter (sxpath path-filter)) #+end_src +The =sxpath1= and =sxpathr= operators are not exported and just there +for show. *** sxml-select :PROPERTIES: :CUSTOM_ID: sxml-select diff --git a/src/std/markup/sxml/oleg/SSAX.scm b/src/std/markup/sxml/oleg/SSAX.scm index 304baad1c..bcda31ce4 100644 --- a/src/std/markup/sxml/oleg/SSAX.scm +++ b/src/std/markup/sxml/oleg/SSAX.scm @@ -1310,22 +1310,24 @@ ; ; This procedure tests for the namespace constraints: ; http://www.w3.org/TR/REC-xml-names/#nsc-NSDeclared - +(define ssax:current-resolve-namespaces (make-parameter #f)) (define (ssax:resolve-name port unres-name namespaces apply-default-ns?) (cond - ((pair? unres-name) ; it's a QNAME - (cons - (cond - ((assq (car unres-name) namespaces) => cadr) - ((eq? (car unres-name) ssax:Prefix-XML) ssax:Prefix-XML) - (else - (parser-error port "[nsc-NSDeclared] broken; prefix " (car unres-name)))) - (cdr unres-name))) - (apply-default-ns? ; Do apply the default namespace, if any + ((pair? unres-name) ;; it's a QNAME + (if (not (ssax:current-resolve-namespaces)) + unres-name + (cons + (cond + ((assq (car unres-name) namespaces) => cadr) + ((eq? (car unres-name) ssax:Prefix-XML) ssax:Prefix-XML) + (else + (parser-error port "[nsc-NSDeclared] broken; prefix " (car unres-name)))) + (cdr unres-name)))) + (apply-default-ns? ; Do apply the default namespace, if any (let ((default-ns (assq '*DEFAULT* namespaces))) - (if (and default-ns (cadr default-ns)) - (cons (cadr default-ns) unres-name) - unres-name))) ; no default namespace declared + (if (and (ssax:current-resolve-namespaces) default-ns (cadr default-ns)) + (cons (cadr default-ns) unres-name) + unres-name))) ; no default namespace declared (else unres-name))) ; no prefix, don't apply the default-ns @@ -1401,7 +1403,6 @@ ; Procedure: ssax:complete-start-tag tag-head port elems entities namespaces (define ssax:complete-start-tag - (let ((xmlns (string->symbol "xmlns")) (largest-dummy-decl-attr (list ssax:largest-unres-name #f #f #f))) @@ -1527,8 +1528,12 @@ ((merged-attrs) (if decl-attrs (validate-attrs port attlist decl-attrs) (attlist->alist attlist))) ((proper-attrs namespaces) - (adjust-namespace-decl port merged-attrs namespaces)) - ) + (let*-values + (((pas nss) (adjust-namespace-decl port merged-attrs namespaces))) + (values (if (ssax:current-resolve-namespaces) + pas + merged-attrs) + namespaces)))) ;(cerr "proper attrs: " proper-attrs nl) ; build the return value (values diff --git a/src/std/markup/sxml/ssax.ss b/src/std/markup/sxml/ssax.ss index ec08a940c..7c6b6863f 100644 --- a/src/std/markup/sxml/ssax.ss +++ b/src/std/markup/sxml/ssax.ss @@ -9,11 +9,15 @@ ;; ns is an assoc or a hash table of mapping uri (string) -> namespace (string) ;; same interface as parse-xml so that implementations can be swapped + (def (read-xml source namespaces: (ns [])) (let* ((ns (if (hash-table? ns) (hash->list ns) ns)) - (ns (map (match <> ([uri . id] (cons (string->symbol id) uri))) + (ns (map (match <> ([uri . id] + (cons (if (string? id) + (string->symbol id) + id) uri))) ns))) (cond ((input-port? source) diff --git a/src/std/markup/sxml/sxpath.ss b/src/std/markup/sxml/sxpath.ss index 8e23d3657..429beba0d 100644 --- a/src/std/markup/sxml/sxpath.ss +++ b/src/std/markup/sxml/sxpath.ss @@ -3,8 +3,17 @@ ;;; SXPath interface (import :std/error) -(export sxpath) +(export (import: oleg-SXpath)) (extern namespace: #f pretty-print) -(include "oleg/myenv-gerbil.scm") -(include "oleg/SXPath.scm") + +(module oleg-myenv + (export #t) + (include "oleg/myenv-gerbil.scm")) + +(module oleg-SXpath + (import oleg-myenv) + (export #t) + (include "oleg/SXPath.scm")) + +(import oleg-SXpath) \ No newline at end of file diff --git a/src/std/markup/sxml/xml.org b/src/std/markup/sxml/xml.org index 31de38bef..474b86cff 100644 --- a/src/std/markup/sxml/xml.org +++ b/src/std/markup/sxml/xml.org @@ -35,6 +35,8 @@ exports the [[file:README.org][SXML]] procedures. :PROPERTIES: :CUSTOM_ID: parsing :END: + + *** read-xml :PROPERTIES: :CUSTOM_ID: read-xml @@ -54,15 +56,57 @@ swapped. Signals an error on invalid /source/ value. ::: tip Examples #+begin_src scheme -> (import :std/markup/xml) -> (read-xml "foobarbarbaz") -(*TOP* (foo (element (@ (id "1")) "foobar") (element (@ (id "2")) "barbaz"))) + > (import :std/markup/xml) + > (read-xml "foobarbarbaz") + (*TOP* (foo (element (@ (id "1")) "foobar") (element (@ (id "2")) "barbaz"))) + > (call-with-input-string " " read-xml) + (*TOP* (FOO:b (FOO:baz) (bAr:g))) #+end_src ::: + +*** Namespaces: =xmlns= and =SXML= + +XML has namespaces. While they can be very useful the translation to +s-expressions and back again needs some explaining. + +For example when working with Excel Spreadsheets the =workbook.xml= +has namespaces. + +::: tip Pretend workbook + +#+begin_src scheme :noweb yes + (def pretend-workbook-text (symbol->string '| + + + + + + + + | + )) + + (def pretend-workbook-sxml (read-xml pretend-workbook-text)) +#+end_src + +::: + + + + + + + + + ** Printing :PROPERTIES: -:CUSTOM_ID: printing +:CUSTOM_ID: printing :END: *** write-xml :PROPERTIES: diff --git a/src/std/srfi/srfi-19.scm b/src/std/srfi/srfi-19.scm index 82a1ae860..07e995955 100644 --- a/src/std/srfi/srfi-19.scm +++ b/src/std/srfi/srfi-19.scm @@ -859,9 +859,11 @@ (offset (date-zone-offset date))) (+ (tm:encode-julian-day-number day month year) (- 1/2) - (+ (/ (/ (+ (* hour 60 60) - (* minute 60) second (/ nanosecond tm:nano)) tm:sid) - (- offset)))))) + (+ (if (every zero? [nanosecond second minute hour]) 0 + (/ (/ (+ (* hour 60 60) + (* minute 60) second (if (zero? nanosecond) 0 (/ nanosecond tm:nano))) + tm:sid) + (if (zero? offset) 1 (- offset)))))))) (define (date->modified-julian-day date) (- (date->julian-day date) diff --git a/src/tools/build.ss b/src/tools/build.ss index 374c731ca..56053cdb1 100755 --- a/src/tools/build.ss +++ b/src/tools/build.ss @@ -9,7 +9,8 @@ "gxpkg" "gxtest" "gxensemble" - "gxhttpd") + "gxhttpd" + "gxswank") libdir: (path-expand "lib" (getenv "GERBIL_BUILD_PREFIX" (gerbil-home))) bindir: (path-expand "bin" (getenv "GERBIL_BUILD_PREFIX" (gerbil-home))) debug: #f) diff --git a/src/tools/gxswank.ss b/src/tools/gxswank.ss new file mode 100644 index 000000000..e5a336ee1 --- /dev/null +++ b/src/tools/gxswank.ss @@ -0,0 +1,60 @@ +;;; -*- Gerbil -*- +;;; © vyzo +;;; The Gerbil Swank Daemon +;;; +(import :std/sugar + :std/contract + :std/cli/getopt + :std/net/address + :std/net/httpd + :std/swank + :std/mime/types + :std/actor + :std/iter + :std/misc/ports + :std/interactive + (only-in :std/logger start-logger! deflogger current-logger-options) + (only-in :std/os/socket SO_REUSEADDR SO_REUSEPORT) + (only-in :std/srfi/13 string-contains) + :gerbil/expander + :gerbil/gambit + ./env) +(export main) + +(def (main . args) + (call-with-getopt gxswank-main args + program: "gxswank" + help: "The Gerbil Swank Daemon" + global-env-flag + host-option + port-option + persist-option)) + +(def port-option + (option + 'port "-p" "--port" default: "4005" + help: "The port number on which the Swank server listens for connections")) + +(def host-option + (option + 'host "-a" "--address" default: "localhost" + help: "The IP or hostname to listen on")) + +(def persist-option + (option + 'dont-close "-d" "--dont-close" default: "true" + help: "Close the acceptor on client disconnect if anything but true")) + + +(def (gxswank-main opt) + (setup-local-env! opt) + (let-hash opt + (let ((server (create-server + port: (string->number .port) + host: .host + dont-close: (equal? "true" .dont-close)))) + + (thread-yield!) + (##repl) + (thread-join! server)))) + From a7483b3fa1d8e71d24455aaa96550534ee2f4676 Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Sun, 2 Jun 2024 15:00:17 -0700 Subject: [PATCH 02/23] Add tools/gxswank with proper lib path --- src/tools/gxswank.ss | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/gxswank.ss b/src/tools/gxswank.ss index e5a336ee1..28d4cb6f8 100644 --- a/src/tools/gxswank.ss +++ b/src/tools/gxswank.ss @@ -7,7 +7,7 @@ :std/cli/getopt :std/net/address :std/net/httpd - :std/swank + :std/ide/swank :std/mime/types :std/actor :std/iter From 21f1c4678740d8de009544bb046c9c23f67793e3 Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Tue, 2 Jul 2024 13:47:59 -0700 Subject: [PATCH 03/23] WIP: Add `instance` syntax and doc, Parsec Monad to come --- doc/.vuepress/config.js | 1 + doc/build.sh | 1 + doc/reference/std/instance.md | 269 +++++++++++++ doc/reference/std/instance.org | 388 +++++++++++++++++++ doc/reference/std/markup/sxml/README.md | 31 +- doc/reference/std/markup/sxml/html/README.md | 16 +- doc/reference/std/markup/sxml/tal/README.md | 2 +- doc/reference/std/markup/sxml/xml.md | 32 ++ src/std/instance-test.ss | 105 +++++ src/std/instance.ss | 40 ++ 10 files changed, 874 insertions(+), 11 deletions(-) create mode 100644 doc/reference/std/instance.md create mode 100644 doc/reference/std/instance.org create mode 100644 src/std/instance-test.ss create mode 100644 src/std/instance.ss diff --git a/doc/.vuepress/config.js b/doc/.vuepress/config.js index 1d137bed8..5b16a4d5c 100644 --- a/doc/.vuepress/config.js +++ b/doc/.vuepress/config.js @@ -74,6 +74,7 @@ module.exports = { 'web', 'foreign', 'interface', + 'instance', 'contract', 'stdio', 'actor', diff --git a/doc/build.sh b/doc/build.sh index 5a1de347f..2eeaedcb5 100755 --- a/doc/build.sh +++ b/doc/build.sh @@ -24,6 +24,7 @@ weave ../src/std/markup/sxml/README.org weave ../src/std/markup/sxml/xml.org weave ../src/std/markup/sxml/html/README.org weave ../src/std/markup/sxml/tal/README.org +weave ./reference/std/instance.org npm install diff --git a/doc/reference/std/instance.md b/doc/reference/std/instance.md new file mode 100644 index 000000000..99234f150 --- /dev/null +++ b/doc/reference/std/instance.md @@ -0,0 +1,269 @@ +# Instance :: defmethod's for interface + +When defining many methods on a class, possibly with an interface wrapper, a short form is nice to reduce bloat. + +::: tip To use the bindings from this module: + +```scheme +(import :std/instance) +``` + + +## Introduction + +Gerbil has a strong type system which is close a "Meta-language"(`ML`)! That is to say: a functional programming language with a polymorphic Hindley–Milner type system. + +Let's take a `Monad` typeclass as an example. Here's one in a `Gofer`1 like syntax. + +```haskell +class Monad m where + return :: a -> m a + bind :: m a -> (a -> m b) -> m b +``` + +Using Gerbil's `interface`'s we can do something quite similar. + +```scheme +(import :std/interface) +;; The comments are just for show. +(interface Monad #; (m) + (return a) #;| -> m a | + (bind m a) #;| -> (a -> m b) -> m b |) +``` + +To declare an `instance` of a typeclass also has some syntax. Here's an `ML` instance declaration. + +```haskell +instance Monad Parser where + -- return :: a -> Parser a + return v = \inp -> [(v,inp)] + -- bind :: Parser a -> (a -> Parser b) -> Parser b + p ‘bind‘ f = \inp -> concat [f v out | (v,out) <- p inp] +``` + +A translation to gerbil uses **Interface Passing Style**. We'll take a struct as the interface object. + +```scheme +(defstruct parser ()) +``` + +If using the builtin `defmethod` it's not that far off the functional version. + +```scheme +(import :std/srfi/1) +;; return :: a -> Parser a +(defmethod {return parser} (lambda (_ v) (lambda (inp) [[v inp ...]]))) +;; bind :: Parser a -> (a -> Parser b) -> Parser b +(defmethod {bind parser} + (lambda (_ p f) + (lambda (inp) + (append-map (cut match <> ([v . out] ((f v) out))) + (p inp))))) +``` + +But we can do better. Introducting the `instance` syntax. + +```scheme +(instance Monad parser + ;; return :: a -> Parser a + ((return v) (lambda (inp) [[v inp ...]])) + ;; bind :: Parser a -> (a -> Parser b) -> Parser b + ((bind p f) + (lambda (inp) (append-map (cut match <> ([v . out] ((f v) out))) (p inp))))) + +``` + + +## The `instance` Syntax + +```scheme +(instance Interface Klass + method-def ... + [rebind: ]) + +: + ;; The name of the interface + (instance Interface) ;; both 's, the name and instance of interface. + ;; The interface is unused/blank + +: + ;; The name of the class we are defining methods on + (self Klass) ;; both 's, the name of the class and self thereof. + +: + ((name . args) body ...) ;; The definition of a method. Note the + ;; object is inferred so not in the lambda list. + +: + ;; Default: #f ; Error if not true and any method already + ;; been bound.. Otherwise rebind the klass. +``` + +The idea here is simply to reduce typing while putting things in a concise container. + + +## With an `interface` but no object + +For example let's start with a bigger `Monad`. + +We've seen a Monad Interface. + +By default Haskell has one more function that simply "combinds" two monadic functions. + +```haskell +class Monad m where + bind :: m a -> (a -> m b) -> m b + seq :: m a -> m b -> m b + return :: a -> m a + + -- Minimal complete definition: + -- (>>=), return + m >> k = m >>= \_ -> k +``` + +We can add that to our interface: + +```scheme +(interface Monad + (return a) #;| -> m a | + (bind m a) #;| -> (a -> m b) -> m b | + (seq ma mb) #;| m a -> m b -> m b |) +``` + +And make the `:t` class into an identity monad with a minimal complete definition. + +```scheme +(instance (m Monad) :t + ((return a) a) + ((bind ma f) (f ma)) + ((seq ma mb) (m.bind ma (lambda _ mb)))) +``` + +The `((return a) a)` form expands to: + +```scheme +(defmethod {return :t} (lambda (self a) (using (m self : Monad) a)) rebind: #f) +``` + +A binding of `self` is just made up and hygenic as it's not used. + +And the `seq` expansion becomes obvious as well. + +```scheme +(defmethod {seq :t} + (lambda (self ma mb) (using (m self : Monad) (m.bind ma (lambda _ mb)))) + rebind: #f) +``` + +That now means that everyting is an instance of a monad, as the form said. We'll just use `#t` as the object which is, after all, also of the `:t` class, as is everything. + +```scheme +> (using (m #t : Monad) + (let* ((ma (m.return 41)) + (mb (m.return 42))) + (m.seq ma mb))) +42 +``` + + + + +## Using a `interface` and an `object` + +We can use interface passing style AND object-orientation together! + +For example here's the start of a a parser similar to **Parsec**2. + +```scheme +(import :std/srfi/1) +(interface (Parser Monad) (item)) + +(defstruct parser (string)) + +(instance (m Parser) (self parser) + ((item) (lambda (inp) + (def (%item i) + (cond ((number? i) ((m.return (string-ref self.string i)) (1+ i))) + ((string? i) (if (zero? (string-length i)) [] + (begin (set! self.string i) (%item 0)))))) + (%item inp))) + ((return v) (lambda (inp) [[v inp ...]])) + ((bind p f) + (lambda (inp) (append-map (cut match <> ([v . out] ((f v) out))) (p inp))))) +``` + +Because it's a `Monad` that means `seq` is available. + +```scheme +> (def foop (parser "")) +> ((using (m foop : Parser) + (let* ((ma (m.return 41)) + (mb (m.item))) + (m.seq ma mb))) "input") +((#\i . 1)) +> (parser-string foop) +"input" +``` + + +## No interface, but an object and class. + +In the `item` method for the [Parser/parser](#using-a-interface-and-an-object) defined beforhand there's an `%item` function that could be abstacted a few ways. + +Keeping things dynamic there is no interface. + +```scheme +(import :std/ref) +(defstruct parsable (inp state)) + +(instance + #t (pa parsable) + ((update-state fn) (set! pa.state (fn pa.state)) pa.state) + ((ref (n 0)) (ref pa.inp n))) +``` + +So we can, dynamically, use and update and reference using the state. In this case we'll make the `state` just the offset to peek and/or read. + +```scheme +> (def pstr (parsable "string" 0)) +> {ref pstr} +#\s +> (using (pstr :- parsable) {pstr.ref pstr.state}) +#\s +> (using (pstr :- parsable) {pstr.update-state 1+}) +> (using (pstr :- parsable) {pstr.ref pstr.state}) +#\t +``` + + +## Time to `rebind:` + +Now that we've changed the way our parser does the state we should change the [Parser/parser](#using-a-interface-and-an-object) itself. + +```scheme +(defstruct parser ()) +(instance (m Parser) (self parser) + ((item) (lambda (inp) + (using (inp :- parsable) + (let (i {inp.ref inp.state}) + {inp.update-state 1+} + ((m.return i) inp))))) + ((return v) (lambda (inp) [[v inp ...]])) + ((bind p f) + (lambda (inp) (append-map (cut match <> ([v . out] ((f v) out))) (p inp)))) + rebind: #t) +``` + +```scheme +> (def inpp (parsable "foobar" 0)) +> (caar ((using (p (parser) : Parser) (p.item)) inpp)) +#\f +> (parsable-state inpp) +1 +``` + +## Footnotes + +1 + +2 diff --git a/doc/reference/std/instance.org b/doc/reference/std/instance.org new file mode 100644 index 000000000..d9cb5f210 --- /dev/null +++ b/doc/reference/std/instance.org @@ -0,0 +1,388 @@ +#+TITLE: The `instance` syntax +#+EXPORT_FILE_NAME: ./instance.md +#+OPTIONS: toc:nil + + +* Contents :noexport: +:PROPERTIES: +:TOC: :include siblings :depth 4 :ignore (this) +:END: +:CONTENTS: +- [[#instance--defmethods-for-interface][Instance :: defmethod's for interface]] + - [[#introduction][Introduction]] + - [[#the-instance-syntax][The instance Syntax]] + - [[#with-an-interface-but-no-object][With an interface but no object]] + - [[#using-a-interface-and-an-object][Using a interface and an object]] + - [[#no-interface-but-an-object-and-class][No interface, but an object and class.]] + - [[#time-to-rebind][Time to rebind:]] +- [[#footnotes][Footnotes]] +:END: + + +* Instance :: defmethod's for interface +:PROPERTIES: +:CUSTOM_ID: instance--defmethods-for-interface +:END: + +When defining many methods on a class, possibly with an interface +wrapper, a short form is nice to reduce bloat. + +::: tip To use the bindings from this module: +#+begin_src scheme + (import :std/instance) +#+end_src + +** Introduction +:PROPERTIES: +:CUSTOM_ID: introduction +:END: + +Gerbil has a strong type system which is close a +"Meta-language"(=ML=)! That is to say: a functional programming +language with a polymorphic Hindley–Milner type system. + +Let's take a =Monad= typeclass as an example. Here's one in a +=Gofer=[fn:1] like syntax. + +#+begin_src haskell + class Monad m where + return :: a -> m a + bind :: m a -> (a -> m b) -> m b +#+end_src + +Using Gerbil's =interface='s we can do something quite similar. + +#+begin_src scheme + (import :std/interface) + ;; The comments are just for show. + (interface Monad #;(ref:m) + (return a) #;| -> m a | + (bind m a) #;| -> (a -> m b) -> m b |) +#+end_src + + +To declare an =instance= of a typeclass also has some syntax. Here's +an =ML= instance declaration. + +#+begin_src haskell + instance Monad Parser where + -- return :: a -> Parser a + return v = \inp -> [(v,inp)] + -- bind :: Parser a -> (a -> Parser b) -> Parser b + p ‘bind‘ f = \inp -> concat [f v out | (v,out) <- p inp] +#+end_src + + +A translation to gerbil uses *Interface Passing Style*. We'll take a +struct as the interface object. + +#+begin_src scheme + (defstruct parser ()) +#+end_src + +If using the builtin =defmethod= it's not that far off the functional +version. + +#+begin_src scheme + (import :std/srfi/1) + ;; return :: a -> Parser a + (defmethod {return parser} (lambda (_ v) (lambda (inp) [[v inp ...]]))) + ;; bind :: Parser a -> (a -> Parser b) -> Parser b + (defmethod {bind parser} + (lambda (_ p f) + (lambda (inp) + (append-map (cut match <> ([v . out] ((f v) out))) + (p inp))))) +#+end_src + +But we can do better. Introducting the =instance= syntax. + +#+begin_src scheme + (instance Monad parser + ;; return :: a -> Parser a + ((return v) (lambda (inp) [[v inp ...]])) + ;; bind :: Parser a -> (a -> Parser b) -> Parser b + ((bind p f) + (lambda (inp) (append-map (cut match <> ([v . out] ((f v) out))) (p inp))))) + +#+end_src + +** The =instance= Syntax +:PROPERTIES: +:CUSTOM_ID: the-instance-syntax +:END: + +#+begin_src scheme + (instance Interface Klass + method-def ... + [rebind: ]) + + : + ;; The name of the interface + (instance Interface) ;; both 's, the name and instance of interface. + ;; The interface is unused/blank + + : + ;; The name of the class we are defining methods on + (self Klass) ;; both 's, the name of the class and self thereof. + + : + ((name . args) body ...) ;; The definition of a method. Note the + ;; object is inferred so not in the lambda list. + + : + ;; Default: #f ; Error if not true and any method already + ;; been bound.. Otherwise rebind the klass. +#+end_src + +The idea here is simply to reduce typing while putting things in a +concise container. + +** With an =interface= but no object +:PROPERTIES: +:CUSTOM_ID: with-an-interface-but-no-object +:END: + +For example let's start with a bigger =Monad=. + +We've seen a [[(m)][Monad Interface]]. + +By default Haskell has one more function that simply "combinds" two +monadic functions. + +#+begin_src haskell + class Monad m where + bind :: m a -> (a -> m b) -> m b + seq :: m a -> m b -> m b + return :: a -> m a + + -- Minimal complete definition: + -- (>>=), return + m >> k = m >>= \_ -> k +#+end_src + +We can add that to our interface: + +#+begin_src scheme :noweb-ref monad-interface + (interface Monad + (return a) #;| -> m a | + (bind m a) #;| -> (a -> m b) -> m b | + (seq ma mb) #;| m a -> m b -> m b |) +#+end_src + +And make the =:t= class into an identity monad with a minimal complete +definition. + +#+begin_src scheme :noweb-ref monad-:t + (instance (m Monad) :t + ((return a) a) + ((bind ma f) (f ma)) + ((seq ma mb) (m.bind ma (lambda _ mb)))) +#+end_src + +The =((return a) a)= form expands to: + +#+begin_src scheme +(defmethod {return :t} (lambda (self a) (using (m self : Monad) a)) rebind: #f) +#+end_src + +A binding of =self= is just made up and hygenic as it's not used. + +And the =seq= expansion becomes obvious as well. + +#+begin_src scheme + (defmethod {seq :t} + (lambda (self ma mb) (using (m self : Monad) (m.bind ma (lambda _ mb)))) + rebind: #f) +#+end_src + +That now means that everyting is an instance of a monad, as the form +said. We'll just use =#t= as the object which is, after all, also of +the =:t= class, as is everything. + +#+begin_src scheme :noweb-ref test-monad-:t + > (using (m #t : Monad) + (let* ((ma (m.return 41)) + (mb (m.return 42))) + (m.seq ma mb))) + 42 +#+end_src + + +** Using a =interface= and an =object= +:PROPERTIES: +:CUSTOM_ID: using-a-interface-and-an-object +:END: + +We can use interface passing style AND object-orientation together! + +For example here's the start of a a parser similar to *Parsec*[fn:2]. + +#+begin_src scheme :noweb-ref def-Parser + (import :std/srfi/1) + (interface (Parser Monad) (item)) + + (defstruct parser (string)) + + (instance (m Parser) (self parser) + ((item) (lambda (inp) + (def (%item i) + (cond ((number? i) ((m.return (string-ref self.string i)) (1+ i))) + ((string? i) (if (zero? (string-length i)) [] + (begin (set! self.string i) (%item 0)))))) + (%item inp))) + ((return v) (lambda (inp) [[v inp ...]])) + ((bind p f) + (lambda (inp) (append-map (cut match <> ([v . out] ((f v) out))) (p inp))))) +#+end_src + +Because it's a =Monad= that means =seq= is available. + +#+begin_src scheme :noweb-ref test-parser-seq + > (def foop (parser "")) + > ((using (m foop : Parser) + (let* ((ma (m.return 41)) + (mb (m.item))) + (m.seq ma mb))) "input") + ((#\i . 1)) + > (parser-string foop) + "input" +#+end_src + + +** No interface, but an object and class. +:PROPERTIES: +:CUSTOM_ID: no-interface-but-an-object-and-class +:END: + +In the =item= method for the [[#using-a-interface-and-an-object][Parser/parser]] defined beforhand there's +an =%item= function that could be abstacted a few ways. + +Keeping things dynamic there is no interface. + +#+begin_src scheme :noweb-ref define-first-parsable + (import :std/ref) + (defstruct parsable (inp state)) + + (instance + #t (pa parsable) + ((update-state fn) (set! pa.state (fn pa.state)) pa.state) + ((ref (n 0)) (ref pa.inp n))) +#+end_src + +So we can, dynamically, use and update and reference using the state. In +this case we'll make the =state= just the offset to peek and/or read. + +#+begin_src scheme :noweb-ref test-first-parsable + > (def pstr (parsable "string" 0)) + > {ref pstr} + #\s + > (using (pstr :- parsable) {pstr.ref pstr.state}) + #\s + > (using (pstr :- parsable) {pstr.update-state 1+}) + > (using (pstr :- parsable) {pstr.ref pstr.state}) + #\t +#+end_src + + +** Time to =rebind:= +:PROPERTIES: +:CUSTOM_ID: time-to-rebind +:END: + +Now that we've changed the way our parser does the state we should +change the [[#using-a-interface-and-an-object][Parser/parser]] itself. + +#+begin_src scheme :noweb-ref redef-parser + (defstruct parser ()) + (instance (m Parser) (self parser) + ((item) (lambda (inp) + (using (inp :- parsable) + (let (i {inp.ref inp.state}) + {inp.update-state 1+} + ((m.return i) inp))))) + ((return v) (lambda (inp) [[v inp ...]])) + ((bind p f) + (lambda (inp) (append-map (cut match <> ([v . out] ((f v) out))) (p inp)))) + rebind: #t) +#+end_src + + +#+begin_src scheme :noweb-ref redef-testing + > (def inpp (parsable "foobar" 0)) + > (caar ((using (p (parser) : Parser) (p.item)) inpp)) + #\f + > (parsable-state inpp) + 1 +#+end_src + +* The Test File :noexport: +:PROPERTIES: +:CUSTOM_ID: the-test-file +:END: + +#+begin_src scheme :noweb yes :tangle ../../../src/std/instance-test.ss + ;;; -*- Gerbil -*- + ;;; (C) me at drewc.ca + ;;; :std/instance unit-tests + + (import :std/test + :std/error + ; :std/instance + "instance" + (only-in :std/sugar hash try) + (only-in :gerbil/core error-object? with-catch)) + (export instance-test) + + (defsyntax (test-inline stx) + (syntax-case stx (>) + ((_ test-case: name rest ...) + #'(test-case name (test-inline rest ...))) + ((_ > form > rest ...) + #'(begin (displayln "... " 'form) form (test-inline > rest ...))) + ((_ > test result rest ...) + #'(begin (check test => 'result) (test-inline rest ...))) + ((_) #!void))) + + + <> + <> + + <> + <> + + + (def instance-test + (test-suite "Test :std/instance" + (test-inline + test-case: "Test Monad for :t" + <>) + (test-inline + test-case: "Test interface and object seq parsec" + <>) + (test-inline + test-case: "Test First Parsable" + <>) + (test-inline + test-case: "Test rebind and redef" + > (begin + <>) + <> + > #t #t))) + + + + + + + + +#+end_src + +* Footnotes +:PROPERTIES: +:CUSTOM_ID: footnotes +:END: +[fn:2] https://en.wikipedia.org/wiki/Parsec_(parser) + +[fn:1] https://en.wikipedia.org/wiki/Gofer_(programming_language) diff --git a/doc/reference/std/markup/sxml/README.md b/doc/reference/std/markup/sxml/README.md index 82a9f5b6b..90f64c5d6 100644 --- a/doc/reference/std/markup/sxml/README.md +++ b/doc/reference/std/markup/sxml/README.md @@ -139,6 +139,12 @@ When set to `xml?` things are different. ## SXML Queries +SXPath is an XPath-conforming XML query language that internally relies on SXML as a representation of the XML Infoset. + +TODO: <— there's an XPath->SXPath parser. + +It's by Lizorkin who also has a good tutorial [here](https://web.archive.org/web/20070414181503/http://modis.ispras.ru/Lizorkin/sxml-tutorial.html), + ### sxpath @@ -148,7 +154,26 @@ When set to `xml?` things are different. path := list ``` -Evaluate an abbreviated SXPath +Evaluate an abbreviated SXPath. The syntax will follow but assume it's a sexp version of XPath similar to how SXML is a sexp version of XML. + +The easy way is to think of SXPath as a list of path components. It's also important to realize that attributes are themselves a node of type `@`. + +So `'(html head title)` is like the `"​/​html/​head/​title"` XPath and the `//row[@r​='8']` instead can be said like `(// (row (@ r (equal? "8"))))` + +There are a few special path components: + +- **\* :** matches an element node. +- **//:** matches any one or many consecutive path components. +- **@ :** selects the attribute list node. + +If a path component is a list it's one of these forms: + +- **(equal? x):** matches if the node under examination matches x using node-equal? +- **(eq? x):** matches if the node under examination matches x using node-eq? +- **( n) :: - :** matches the n-th node matching same path component. n starts from 1. Negative numbers start from the end of the node list backward. This is `path[n]` syntax in XPath. +- **( (…)):** matches a path component path and `(sxpath (...))` on those nodes are not empty. This is `path[predicate...]` syntax in XPath. + +Here's the syntax: ``` sxpath:: AbbrPath -> Converter, or @@ -175,6 +200,8 @@ AbbrPath is a list. It is translated to the full SXPath according to the followi (sxpathr path-filter) -> (filter (sxpath path-filter)) ``` +The `sxpath1` and `sxpathr` operators are not exported and just there for show. + ### sxml-select @@ -328,4 +355,4 @@ returns nodes children as a list mapf := transform fn to apply to matches ``` -find with context \ No newline at end of file +find with context diff --git a/doc/reference/std/markup/sxml/html/README.md b/doc/reference/std/markup/sxml/html/README.md index e000ad876..caf466a06 100644 --- a/doc/reference/std/markup/sxml/html/README.md +++ b/doc/reference/std/markup/sxml/html/README.md @@ -122,13 +122,13 @@ While HTML and XML are friends there are some elements in HTML that cannot be ex ```scheme (make-html-parser start: #f end: #f text: #f - comment: #f decl: #f process: #f - entity: #f entities: *default-entities* - tag-levels: *tag-levels* - unnestables: *unnestables* - bodyless: (current-html-void-tags) - literals: (current-html-raw-tags) - terminators: *terminators*) + comment: #f decl: #f process: #f + entity: #f entities: *default-entities* + tag-levels: *tag-levels* + unnestables: *unnestables* + bodyless: (current-html-void-tags) + literals: (current-html-raw-tags) + terminators: *terminators*) ``` Returns a procedure of two arguments, an initial seed and an optional input port, which parses the HTML document from the port with the callbacks specified by a keyword. @@ -171,7 +171,7 @@ While HTML and XML are friends there are some elements in HTML that cannot be ex ``` decl: name attrs seed fhere on declaration data - + process: list seed fhere on process-instruction data ``` diff --git a/doc/reference/std/markup/sxml/tal/README.md b/doc/reference/std/markup/sxml/tal/README.md index 40bff12f1..7e4c7003e 100644 --- a/doc/reference/std/markup/sxml/tal/README.md +++ b/doc/reference/std/markup/sxml/tal/README.md @@ -873,4 +873,4 @@ That's also nice to see in long form. -``` \ No newline at end of file +``` diff --git a/doc/reference/std/markup/sxml/xml.md b/doc/reference/std/markup/sxml/xml.md index 77b8c744d..657bb6b6f 100644 --- a/doc/reference/std/markup/sxml/xml.md +++ b/doc/reference/std/markup/sxml/xml.md @@ -31,6 +31,38 @@ Reads and parses XML from *source* and returns SXML result. *namespaces* is opti > (import :std/markup/xml) > (read-xml "foobarbarbaz") (*TOP* (foo (element (@ (id "1")) "foobar") (element (@ (id "2")) "barbaz"))) +> (call-with-input-string " " read-xml) +(*TOP* (FOO:b (FOO:baz) (bAr:g))) +``` + +::: + + +### Namespaces: `xmlns` and `SXML` + +XML has namespaces. While they can be very useful the translation to s-expressions and back again needs some explaining. + +For example when working with Excel Spreadsheets the `workbook.xml` has namespaces. + +::: tip Pretend workbook + +```scheme +(def pretend-workbook-text (symbol->string '| + + + + + + + + | +)) + +(def pretend-workbook-sxml (read-xml pretend-workbook-text)) ``` ::: diff --git a/src/std/instance-test.ss b/src/std/instance-test.ss new file mode 100644 index 000000000..52de2d4a5 --- /dev/null +++ b/src/std/instance-test.ss @@ -0,0 +1,105 @@ +;;; -*- Gerbil -*- +;;; (C) me at drewc.ca +;;; :std/instance unit-tests + +(import :std/test + :std/error + ; :std/instance + "instance" + (only-in :std/sugar hash try) + (only-in :gerbil/core error-object? with-catch)) +(export instance-test) + +(defsyntax (test-inline stx) + (syntax-case stx (>) + ((_ test-case: name rest ...) + #'(test-case name (test-inline rest ...))) + ((_ > form > rest ...) + #'(begin (displayln "... " 'form) form (test-inline > rest ...))) + ((_ > test result rest ...) + #'(begin (check test => 'result) (test-inline rest ...))) + ((_) #!void))) + + +(interface Monad + (return a) #;| -> m a | + (bind m a) #;| -> (a -> m b) -> m b | + (seq ma mb) #;| m a -> m b -> m b |) +(instance (m Monad) :t + ((return a) a) + ((bind ma f) (f ma)) + ((seq ma mb) (m.bind ma (lambda _ mb)))) + +(import :std/srfi/1) +(interface (Parser Monad) (item)) + +(defstruct parser (string)) + +(instance (m Parser) (self parser) + ((item) (lambda (inp) + (def (%item i) + (cond ((number? i) ((m.return (string-ref self.string i)) (1+ i))) + ((string? i) (if (zero? (string-length i)) [] + (begin (set! self.string i) (%item 0)))))) + (%item inp))) + ((return v) (lambda (inp) [[v inp ...]])) + ((bind p f) + (lambda (inp) (append-map (cut match <> ([v . out] ((f v) out))) (p inp))))) +(import :std/ref) +(defstruct parsable (inp state)) + +(instance + #t (pa parsable) + ((update-state fn) (set! pa.state (fn pa.state)) pa.state) + ((ref (n 0)) (ref pa.inp n))) + + +(def instance-test + (test-suite "Test :std/instance" + (test-inline + test-case: "Test Monad for :t" + > (using (m #t : Monad) + (let* ((ma (m.return 41)) + (mb (m.return 42))) + (m.seq ma mb))) + 42) + (test-inline + test-case: "Test interface and object seq parsec" + > (def foop (parser "")) + > ((using (m foop : Parser) + (let* ((ma (m.return 41)) + (mb (m.item))) + (m.seq ma mb))) "input") + ((#\i . 1)) + > (parser-string foop) + "input") + (test-inline + test-case: "Test First Parsable" + > (def pstr (parsable "string" 0)) + > {ref pstr} + #\s + > (using (pstr :- parsable) {pstr.ref pstr.state}) + #\s + > (using (pstr :- parsable) {pstr.update-state 1+}) + > (using (pstr :- parsable) {pstr.ref pstr.state}) + #\t) + (test-inline + test-case: "Test rebind and redef" + > (begin + (defstruct parser ()) + (instance (m Parser) (self parser) + ((item) (lambda (inp) + (using (inp :- parsable) + (let (i {inp.ref inp.state}) + {inp.update-state 1+} + ((m.return i) inp))))) + ((return v) (lambda (inp) [[v inp ...]])) + ((bind p f) + (lambda (inp) (append-map (cut match <> ([v . out] ((f v) out))) (p inp)))) + rebind: #t)) + > (def inpp (parsable "foobar" 0)) + > (caar ((using (p (parser) : Parser) (p.item)) inpp)) + #\f + > (parsable-state inpp) + 1 + > #t #t))) diff --git a/src/std/instance.ss b/src/std/instance.ss new file mode 100644 index 000000000..3e19e4f41 --- /dev/null +++ b/src/std/instance.ss @@ -0,0 +1,40 @@ +(import :std/contract :std/interface :gerbil/core + (for-syntax :std/interface :std/srfi/1 :gerbil/core)) +(export #t) + +(extern namespace: #f ___specialize-class + __bind-class-specializer!) + +(defsyntax (def-instance-method stx) + (syntax-case stx () + ((_ (interface-object Interface) (self klass) ((method . args) body ...) rebind: rebind) + #'(defmethod {method klass} + (lambda (self . args) + (using (interface-object self : Interface) body ...)) + rebind: rebind)) + ((_ no-interface (self klass) ((method . args) body ...) rebind: rebind) + (stx-boolean? #'no-interface) + #'(defmethod {method klass} (lambda (self . args) body ...) + rebind: rebind)) + ((_ interface klass method ... rebind: rebind) + (and (identifier? #'class) + (or (stx-pair? #'interface) (stx-boolean? #'interface))) + #'(def-instance-method interface (self klass) method ... rebind: rebind)) + ((_ Interface klass method ... rebind: rebind) + (and (identifier? #'Interface)) + #'(def-instance-method #f klass method ... rebind: rebind)) + ((_ rest ...) + #'(def-instance-method rest... rebind: #f)))) + +(defsyntax (instance stx) + (syntax-case stx () + ((_ interface klass method ... rebind: rebind) + (with-syntax* + ((Type (if (identifier? #'klass) #'klass (stx-car (stx-cdr #'klass)))) + (Type::t (!class-type-descriptor (syntax-local-value #'Type false)))) + #'(begin + (def-instance-method interface klass method rebind: rebind) ... + (when rebind + (__bind-class-specializer! Type::t (___specialize-class Type::t)))))) + ((macro rest ...) + #'(macro rest ... rebind: #f)))) From 949a3f63732b5a738d4dadb5d87956df22b410d8 Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Tue, 2 Jul 2024 13:49:09 -0700 Subject: [PATCH 04/23] Make gxhttp pass the correct typecheck when timeout --- src/tools/gxhttpd.ss | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/gxhttpd.ss b/src/tools/gxhttpd.ss index fd7a76f6a..106d92262 100644 --- a/src/tools/gxhttpd.ss +++ b/src/tools/gxhttpd.ss @@ -337,7 +337,7 @@ (servlets? (: (config-get cfg enable-servlets:) :boolean))) (set! self.root root) (set! self.cache (make-hash-table-string)) - (set! self.cache-ttl (: (config-get cfg cache-ttl: 120) :real)) + (set! self.cache-ttl (: (inexact (config-get cfg cache-ttl: 120)) :real)) (set! self.cache-max-size (: (config-get cfg cache-max-size: 16384) :fixnum)) (set! self.handlers (make-hash-table-string)) (when servlets? From 2bb53ff442bd24dc19956a3c0aa15d97e7cd80c9 Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Tue, 2 Jul 2024 14:50:21 -0700 Subject: [PATCH 05/23] Change title to play with vuepress better --- doc/reference/std/instance.org | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/reference/std/instance.org b/doc/reference/std/instance.org index d9cb5f210..66b62e702 100644 --- a/doc/reference/std/instance.org +++ b/doc/reference/std/instance.org @@ -19,7 +19,7 @@ :END: -* Instance :: defmethod's for interface +* Instance :PROPERTIES: :CUSTOM_ID: instance--defmethods-for-interface :END: From e6d82acc317376a07629d9c0674d1c5a57d164dd Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Tue, 2 Jul 2024 15:48:54 -0700 Subject: [PATCH 06/23] Add instance to build spec --- src/std/build-spec.ss | 1 + 1 file changed, 1 insertion(+) diff --git a/src/std/build-spec.ss b/src/std/build-spec.ss index 95ff275b3..268d231ca 100644 --- a/src/std/build-spec.ss +++ b/src/std/build-spec.ss @@ -38,6 +38,7 @@ "amb" "contract" (gxc: "interface" ,@(include-gambit-sharp)) + "instance" ;; cli "cli/getopt" "cli/shell" From fa5f150c969608ef84778da1fd58776289e7f0ec Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Sun, 21 Jul 2024 19:23:15 -0700 Subject: [PATCH 07/23] max-token-length: for gxhttpd --- src/tools/gxhttpd.ss | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/tools/gxhttpd.ss b/src/tools/gxhttpd.ss index 106d92262..110464213 100644 --- a/src/tools/gxhttpd.ss +++ b/src/tools/gxhttpd.ss @@ -85,6 +85,8 @@ ;;; ensemble-supervisor-id: symbol|#f ;;; ;;; ensemble-registry: [optional] list of registry addresses ;;; ensemble-registry: (actor-address ...) +;;; ;;; max-token-length: The request handler parser buffer size +;;; max-token-length: integer ;;;---------------------------------------------------------------- ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -281,8 +283,10 @@ (mux (make-mux cfg)) (request-logger (get-request-logger cfg)) (addresses (config-get! cfg listen:)) + (max-token-length (config-get cfg max-token-length: 1024)) (run-httpd (lambda () + (set-httpd-max-token-length! max-token-length) (parameterize ((current-http-server-config cfg)) (let (srv (apply start-http-server! mux: mux From c86a49bd8e207eb006d1b262be013abf26deaf54 Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Tue, 30 Jul 2024 13:41:43 -0700 Subject: [PATCH 08/23] Change swank package path in README --- src/std/ide/swank/README.org | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/std/ide/swank/README.org b/src/std/ide/swank/README.org index 16259dc05..1340bc16f 100644 --- a/src/std/ide/swank/README.org +++ b/src/std/ide/swank/README.org @@ -24,7 +24,7 @@ to your init file. :hook ((gerbil-mode . slime-mode))) (defun gerbil-slime-init-function (file encoding) - (format "(begin (import (prefix-in :std/swank swank#)) + (format "(begin (import (prefix-in :std/ide/swank swank#)) (let ((port (+ 10000 (random-integer 55535)))) (swank#create-server port: port) (with-output-to-file %S (cut write port))))\n\n" From d1f5a41873767af1da17d070f492ac8c66a8861b Mon Sep 17 00:00:00 2001 From: vyzo Date: Tue, 6 Aug 2024 23:23:07 +0300 Subject: [PATCH 09/23] IDE: IDEServer interface (#1261) interface and data model for now. --- src/std/build-spec.ss | 2 + src/std/ide/interface.ss | 49 ++++++++++++++++ src/std/ide/model.ss | 124 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 175 insertions(+) create mode 100644 src/std/ide/interface.ss create mode 100644 src/std/ide/model.ss diff --git a/src/std/build-spec.ss b/src/std/build-spec.ss index 268d231ca..ef1a3dfca 100644 --- a/src/std/build-spec.ss +++ b/src/std/build-spec.ss @@ -168,6 +168,8 @@ "srfi/160/c64" "srfi/160/c128" "srfi/212" + "ide/model" + "ide/interface" "ide/swank/api" "ide/swank/message" "ide/swank/context" diff --git a/src/std/ide/interface.ss b/src/std/ide/interface.ss new file mode 100644 index 000000000..364667353 --- /dev/null +++ b/src/std/ide/interface.ss @@ -0,0 +1,49 @@ +;;; -*- Gerbil -*- +;;; © vyzo +;;; IDE interface +;;(import ./model) +(export #t) +(import ./model) + +(interface IDEServer + ;; updates the workspace tracked files + ;; added, modified, and removed are lists of file paths. + (update-workspace (added : :list) (modified : :list) (removed : :list)) + + ;; resolve a symbol in source to an entity + (resolve (src : Source)) => Entity + + ;; finds references of a specific entity within a scope + ;; - e is the entity for which references should be searched + ;; - scope can be: + ;; - #f, in which case the entire workspace is searched + ;; - a file path for searching a specific file/module + ;; - a list of file paths for searching specific file/modules + ;; returns a list of References + (find-refs (e : Entity) (scope :~ reference-scope? := #f)) => :list + + ;; finds documentation for an entity + ;; returns a list of Location with relevant documentation + (find-docs (e : Entity)) => :list + + ;; complete the last symbol in a partial s-expr in some source span + ;; returns a list of Completions + (complete (src : Source)) => :list + + ;; evals an sexpr in some source span + (eval (src : Source)) => Result + + ;; expands an sexpr in some source span + (expand (src : Source) (fully? : :boolean := #f)) => Result + + ;; visits a file/module + ;; Returns a parsed representation (SExpr) of the file/module's code + (visit-file (path : :string)) => Result + + ;; updates the in-memory reprsentation of a file/module + (edit-file (replace-at : SourceFileSpan) (data : :string) (span : Span) )) + +(def (reference-scope? o) + (or (not o) + (string? o) + (and (list? o) (andmap string? o)))) diff --git a/src/std/ide/model.ss b/src/std/ide/model.ss new file mode 100644 index 000000000..8ad3412a9 --- /dev/null +++ b/src/std/ide/model.ss @@ -0,0 +1,124 @@ +;;; -*- Gerbil -*- +;;; © vyzo +;;; IDE model types +(import :std/actor-v18/message) +(export #t) + +;; base class for serializable objects; actor io +(defclass (Serializable message) ()) + +;; source locations and anchors +(defclass (Source Serializable) ()) +(defclass (SourceString Source) ((value : :string))) +(defclass (File Serializable) ((path : :string))) +(defclass (Pos Serializable) ((line : :fixnum) (col : :fixnum))) +(defclass (Span Serializable) ((start : Pos) (end : Pos))) + +(defclass (SourceFilePos Source File Pos) ()) +(defclass (SourceFileSpan Source File Span) ()) +(defclass (SourceFileString SourceString File) ()) +(defclass (ModuleRef Serializable) ((module : :symbol))) +(defclass (SourceModuleString SourceString ModuleRef) ()) +(defclass (SourceFileModuleString SourceModuleString File) ()) + +;; uniquely identifiable entities +(defclass (Entity Serializable) + ((id : :symbol) ; the entity identifier + (name : :symbol) ; the human "name" of the entity + (ctx : :t))) ; the context of the identifier + +;; base class for entity types +(defclass (Type Entity) ()) + +;; identifiers +(defclass (Identifier Entity) + ((type :? Type))) + +;; identifier types +(defclass (MetaObject Type) ()) +(defclass (Class MetaObject) ()) +(defclass (Interface MetaObject) ()) + +(defclass (Object Type) ()) +(defclass (StandardObject Object) ()) +(defclass (ClassInstance StandardObject) ()) +(defclass (InterfaceInstance StandardObject) ()) +(defclass (SystemObject Object) ()) +(defclass (Procedure SystemObject) ()) + +(defclass (MetaSyntacticObject Type) ()) +(defclass (Macro MetaSyntacticObject) ()) +(defclass (Module MetaSyntacticObject) ()) + +;; completions +(defclass (Completion Serializable) + ((symbol : :symbol) ; the completion symbol + (entity : Entity))) ; the completion entity + +;; objects with a location +(defclass (Location Serializable) ((loc : SourceFileSpan))) + +;; entity source references +(defclass (Reference Location) + ((e : Entity))) ; the entity referred + +;; evaluation and expansion results +(defclass (Result Serializable) ()) + +;; S-Expressions +(defclass (SExpr Result) + ((hd : :symbol) + (e : :list))) + +;; literal values +(defclass (Literal Result) + ((value : :string))) + +;; errors +(defclass (Error Result) + ((msg : :string))) + +;; register message classes for actor io +(defsyntax (declare-serializable stx) + (syntax-case stx () + ((_ id ...) + (with-syntax (((id::t ...) + (stx-map (lambda (id) (stx-identifier id id "::t")) #'(id ...)))) + #'(begin + (register-message-type! id::t) + ...))))) + +(declare-serializable + Serializable + Source + SourceString + File + Pos + Span + SourceFilePos + SourceFileSpan + SourceFileString + ModuleRef + SourceModuleString + SourceFileModuleString + Entity + Type + Identifier + MetaObject + Class + Interface + StandardObject + ClassInstance + InterfaceInstance + SystemObject + Procedure + MetaSyntacticObject + Macro + Module + Completion + Location + Reference + Result + Literal + SExpr + Error) From 97c11badf606a423e3b9868e5291a55d24844a48 Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Fri, 16 Aug 2024 17:38:23 -0700 Subject: [PATCH 10/23] WIP: All the monads needed for Parsec --- src/std/build-spec.ss | 8 + src/std/monad-test.ss | 339 ++++++++++ src/std/monad.ss | 16 + src/std/monad/README.org | 1197 ++++++++++++++++++++++++++++++++++ src/std/monad/build.ss | 16 + src/std/monad/error.ss | 43 ++ src/std/monad/identity.ss | 11 + src/std/monad/instance-:list | 2 + src/std/monad/instance.ss | 126 ++++ src/std/monad/interface.ss | 37 ++ src/std/monad/list.ss | 13 + src/std/monad/state.ss | 69 ++ src/std/monad/syntax.ss | 33 + 13 files changed, 1910 insertions(+) create mode 100644 src/std/monad-test.ss create mode 100644 src/std/monad.ss create mode 100644 src/std/monad/README.org create mode 100755 src/std/monad/build.ss create mode 100644 src/std/monad/error.ss create mode 100644 src/std/monad/identity.ss create mode 100644 src/std/monad/instance-:list create mode 100644 src/std/monad/instance.ss create mode 100644 src/std/monad/interface.ss create mode 100644 src/std/monad/list.ss create mode 100644 src/std/monad/state.ss create mode 100644 src/std/monad/syntax.ss diff --git a/src/std/build-spec.ss b/src/std/build-spec.ss index ef1a3dfca..3f2df2855 100644 --- a/src/std/build-spec.ss +++ b/src/std/build-spec.ss @@ -182,6 +182,14 @@ "ide/swank/handlers" "ide/swank/server" "ide/swank" + ;; :std/monad + "monad/interface" + "monad/syntax" + "monad/identity" + "monad/list" + "monad/state" + "monad/error" + "monad" ;; :std/mime "mime/struct" (gxc: "mime/types" (extra-inputs: ("mime/mime.types"))) diff --git a/src/std/monad-test.ss b/src/std/monad-test.ss new file mode 100644 index 000000000..1ede35493 --- /dev/null +++ b/src/std/monad-test.ss @@ -0,0 +1,339 @@ +;;; -*- Gerbil -*- +;;; (C) me at drewc.ca +;;; :std/monad unit-tests + +(import :std/test + :std/error + :std/interactive + :srfi/13 + ; :std/instance + "instance" + "monad/interface" + "monad/identity" + "monad/list" + "monad/state" + "monad/syntax" + "monad/error" + (only-in :std/sugar hash try) + (only-in :gerbil/core error-object? with-catch)) +(export monad-test) + +(defsyntax (test-inline stx) + (syntax-case stx (>) + ((_ test-case: name rest ...) + #'(test-case name (test-inline rest ...))) + ((_ > form > rest ...) + #'(begin (displayln "... " 'form) form (test-inline > rest ...))) + ((_ > test result rest ...) + #'(begin (check test => 'result) (test-inline rest ...))) + ((_) #!void))) + +(set-test-verbose! #t) + +(def monad-test + (test-suite "Test :std/monad" + + (test-inline + test-case: ":t as Identity" + > (using (m 'identity : Monad) + (m.>>= (m.return 41) + (lambda (a) (m.return (1+ a))))) + 42 + > (using (m 'identity : Monad) (m.>> 'anything 42)) + 42 + > #t #t) + (test-inline + test-case: ":list as List" + > (using (m [] : Monad) + (m.>>= (m.return 41) + (lambda (a) (m.return (1+ a))))) + (42) + > (using (m [] : Monad) (m.>> ['anything] [42])) + (42)) + + (test-inline + test-case: ":list and :t Left and Right identity" + > (using (m [] : Monad) (m.>>= (m.return 42) (cut m.return <>))) + (42) + > (using (m 'identity : Monad) (m.>>= (m.return 42) (cut m.return <>))) + 42 + > (def (monad-test-lr-identity monad v) + (using (m monad : Monad) + (m.>>= (m.return v) + (lambda (ov) (if (eq? v ov) (m.return ov) + (error "Wrap/Unwrap failed")))))) + + > (monad-test-lr-identity 'foo 42) + 42 + > (monad-test-lr-identity [] 42) + (42)) + + (test-inline + test-case: "First State Tests" + > ((using (s (make-state 'state) : Monad) (s.return 42)) 'hey!) + (42 . hey!) + > (def (monad-test-lr-identity monad v) + (using (m monad : Monad) + (m.>>= (m.return v) + (lambda (ov) (if (eq? v ov) (m.return ov) + (error "Wrap/Unwrap failed")))))) + > (let ((fn (monad-test-lr-identity (make-state 'here) 42))) + (fn 'foo)) + (42 . foo) + > ((using (s (make-state 'state) : Monad) + (s.>>= (s.return 42) (lambda (v) (s.return (+ 1 v))))) 'hey!) + (43 . hey!) + > (let (state (make-state 'hey!)) + (using ((m state : Monad) + (r state : Run)) + (r.run (m.return 42)))) + (42 . hey!) + > (let (state (make-state 'hey!)) + (using (s state : MonadState) + (s.run (s.return 42)))) + (42 . hey!) + + > (let (state (make-state 'no!)) + (using (s state : MonadState) + (s.run (s.>>= (s.put! 'hey!) (lambda _ (s.return 42)))))) + (42 . hey!) + + > (let (state (make-state 42)) + (using (s state : MonadState) + (s.run (s.>>= (s.put! 'hey!) (lambda (old) (s.return old)))))) + (42 . hey!) + + > (let (state (make-state 'no!)) + (using (s state : MonadState) + (s.run (s.>> (s.put! 42) + (s.>>= (s.put! 'hey!) (lambda (old) (s.return old))))))) + (42 . hey!) + + > (using (s (make-state 41) : MonadState) + (s.run (s.state (lambda (s_) ['!yeh (+ s_ 1) ...])))) + (!yeh . 42)) + + (test-inline + test-case: "First du Tests" + > (du (m 'identity : Monad) + n <- (m.return 41) + v <- (m.return (+ n 1)) + (= v 42)) + #t + > (using (m 'identity : Monad) + (m.>>= + (m.return 41) + (lambda (n) + (m.>>= + (m.return (+ n 1)) + (lambda (v) (= v 42)))))) + #t + > (let* ((s (make-state 0)) + (ms (du (s : MonadState) + (s.put! 41) + (s.state (lambda (s_) ['!yeh (+ s_ 1) ...]))))) + (Run-run s ms)) + (!yeh . 42) + > (using (s (make-state 0) : MonadState) + (s.run (s.>> + (s.put! 41) + (s.state (lambda (s_) ['!yeh (+ s_ 1) ...]))))) + (!yeh . 42) + > (def (testme n) + (using (s (make-state n) : MonadState) + (s.run (du s + n <- (s.get) + (let (v (+ n 1)) + (if (eqv? v 42) (s.put! v) (s.put! error:))) + (s.return '!yeh))))) + > (testme 41) + (!yeh . 42) + > (testme 46) + (!yeh . error:)) + + (test-inline + test-case: "Fail Tests" + > (defstruct maybe (nothing)) + > (instance MonadFail (m maybe) + ((return a) a) + ((>>= ma f) + (if (eqv? m.nothing ma) ma (f ma))) + ((fail) m.nothing)) + + > (def (testme o (no #f)) + (du (mf (maybe no) : MonadFail) + n <- 1 + m <- (if (even? o) (mf.fail) o) + (+ m n))) + + > (testme 4) + #f + > (testme 5) + 6 + > (testme 2 'huh) + huh + > (testme 3) + 4 + > (def (testl lst) + (du (mf [] : MonadFail) + n <- lst + m <- (if (even? n) (mf.fail) (mf.return (+ 41 n))) + (mf.return (eqv? 42 m)))) + + > (testl [1 2 3 4 5 6]) + (#t #f #f)) + + (test-inline + test-case: "ZPO tests" + > (du (m [] : MonadPlus) + (m.plus (m.return 42) [42])) + (42 42) + > (du (m [] : MonadZero) + (m.zero)) + () + > (du (m [] : MonadZeroOrPlus) + (m.or (m.plus (m.return 42) (m.zero)) (m.zero))) + (42) + + ) + + + (test-inline + test-case: "StateT tests" + > (def (test-stateT-monad state) + (du (m state : Monad) + n <- (m.return 42) + (m.return [n (= n 42)]))) + > ((test-stateT-monad (make-stateT (Monad (Identity)))) 'state) + ((42 #t) . state) + > ((test-stateT-monad (make-stateT (Monad []))) 'state) + (((42 #t) . state)) + > (def (test-stateT-State statet first-state) + (def run (du (m statet : MonadState) + first <- (m.put! 42) + second <- (m.get) + (m.put! 'final) + (m.return [first second]))) + (run first-state)) + > (test-stateT-State (make-stateT (Monad (Identity))) 'first) + ((first 42) . final) + > (test-stateT-State (make-stateT (Monad [])) 'second) + (((second 42) . final)) + > (def (test-stateT-state statet) + (du (m statet : MonadState) + (m.state (lambda (s!) [s! . 42])))) + > (using (s (make-stateT (Monad (Identity))) : Run) + (s.run (test-stateT-state s) 'first)) + (first . 42) + > (using (s (make-stateT (Monad [])) : Run) + (s.run (test-stateT-state s) 'second)) + ((second . 42)) + > ((du (m (make-stateT []) : MonadZeroOrPlus) + ab <- (m.plus (m.return 'a) (m.return 'b)) + + (m.return ab)) + 42) + ((a . 42) (b . 42)) + > ((du (m (make-stateT []) : MonadZeroOrPlus) + a <- (m.or (m.return 'a) (m.return 'b)) + + (m.return a)) + 42) + ((a . 42)) + > ((du (m (make-stateT []) : MonadZeroOrPlus) + b <- (m.or (m.zero) (m.return 'b)) + + (m.return b)) + 42) + ((b . 42)) + + > (def (listM-fn) + (du (m (MonadPlus []) :- MonadPlus) + (m.plus (m.return 41) (m.return 43)))) + > (listM-fn) + (41 43) + > ((du (m (make-stateT []) : MonadTrans) + foo <- (m.lift (listM-fn)) + (m.return (+ foo 1))) "state") + ((42 . "state") (44 . "state"))) + + (test-inline + test-case: "First Parser Tests" + > (interface (Parser MonadState Fail Or Plus) (item)) + > (defstruct (parser stateT) ()) + > (def current-parsee (make-parameter "42")) + > (def current-parser (make-parameter (make-parser (Monad [])))) + + > (instance (P Parser) (p parser) + ((item) (du P + idx <- (P.get) + len <- (P.return (string-length (current-parsee))) + (P.put! (1+ idx)) + (if (>= idx len) (P.fail) + (P.return (string-ref (current-parsee) idx)))))) + + > ((using (p (current-parser) : Parser) (p.item)) 0) + ((#\4 . 1)) + > ((using (p (current-parser) : Parser) (p.plus (p.item) (p.item))) 0) + ((#\4 . 1)(#\4 . 1)) + > (interface (Parsec Parser) (char=? char)) + > (defstruct (parsec parser) ()) + > (instance (P Parsec) (p parsec) + ((char=? char) + (du P + c <- (P.item) + (if (char=? char c) (P.return c) (P.fail))))) + > (current-parser (make-parsec (Monad []))) + > ((using (p (current-parser) : Parsec) + (p.char=? #\4)) 0) + ((#\4 . 1)) + > ((using (p (current-parser) : Parsec) + (p.char=? #\4)) 1) + () + + > (def (test-ltuae str) + (parameterize ((current-parsee str)) + (let (ret ((du (p (current-parser) : Parsec) + (p.>> (p.char=? #\4) (p.char=? #\2))) 0)) + (not (null? ret))))) + > (test-ltuae "42") + #t + > (test-ltuae "41") + #f) + + (test-inline + test-case: "ErrorT tests" + > (du (m (make-errorT) : MonadError) (m.return 42)) + 42 + > (du (m (make-errorT []) : MonadError) (m.return 42)) + (42) + > (def (test mo n (m (make-errorT mo))) + (du (m : MonadError) + foo <- (m.return n) + (if (eqv? 42 foo) (m.throw "LTUAE") (m.return foo)) + (m.return (- foo 1)))) + + > (Error-message (test (Identity) 42)) + "LTUAE" + > (map Error-message (test [] 42)) + ("LTUAE") + > (test (Identity) 43) + 42 + > (test [] 43) + (42) + > (using (m (make-errorT []) : MonadError) + (m.catch (test #f 43 m) (lambda (e) 'fourtwo))) + (42) + > (using (m (make-errorT []) : MonadError) + (m.catch (test #f 42 m) (lambda (e) 'fourtwo))) + fourtwo + > (def parsr (make-stateT (make-errorT []))) + > (def parse (make-errorT (make-stateT []))) + > ((Monad-return parsr 42) 42) + ((42 . 42)) + > ((Monad-return parse 42) 42) + ((42 . 42)) + > (with ([[e . s]] ((MonadError-throw parse "Here") 42)) + [(cons (Error-message e) s)]) + (("Here" . 42)) + ))) diff --git a/src/std/monad.ss b/src/std/monad.ss new file mode 100644 index 000000000..ff1d5683b --- /dev/null +++ b/src/std/monad.ss @@ -0,0 +1,16 @@ +(import + ./monad/interface + ./monad/identity + ./monad/list + ./monad/state + ./monad/syntax + ./monad/error) + (export + (import: + ./monad/interface) + ;; ./monad/identity + ;; ./monad/list + ;; ./monad/state + ;; ./monad/syntax + ;; ./monad/error +) diff --git a/src/std/monad/README.org b/src/std/monad/README.org new file mode 100644 index 000000000..e8c26986e --- /dev/null +++ b/src/std/monad/README.org @@ -0,0 +1,1197 @@ +#+TITLE: Monads in Gerbil + +Here's the thing. I like my Monadic Parser. It uses monads. Gerbil now +has a very fast Interface that can make it quick and easy. This is my +attempt to Monadize Gerbil. + + https://wiki.haskell.org/All_About_Monad + https://wiki.haskell.org/Monad + +* Table Of Contents +:PROPERTIES: +:TOC: :include siblings :depth 5 :ignore (this) +:END: +:CONTENTS: +- [[#what-is-a-monad-exactly][What is a Monad exactly?]] +- [[#the-monad-interface-aka-typeclass][The Monad interface AKA "typeclass"]] + - [[#almost-the-identity-monad][Almost the Identity monad.]] + - [[#getting-to-the-list-monad][Getting to the List monad]] + - [[#return-as-constructor--as-destructor][return as constructor, >>= as destructor.]] +- [[#runing-things][Run'ing things]] +- [[#keeping-state-with-state][Keeping State with state.]] +- [[#the-du-syntax-sugar-for--and-][The du syntax: sugar for >>= and >>]] +- [[#fail-ure-is-an-option][Fail-ure IS an option]] + - [[#the-maybe-test-type][The maybe test type]] + - [[#fail-and-list-are-natural-friends][Fail and :list are natural friends]] +- [[#monadpluszeroor][Monad[Plus|Zero|Or]​]] +- [[#transformers-statet-redo-state-with-others-in-mind][Transformers! stateT Redo state with others in mind.]] + - [[#the-identity-identity-monad][The Identity identity monad]] + - [[#the-monad-for-statet][The Monad for stateT]] + - [[#run-it][Run it]] + - [[#the-state-for-statet][The State for stateT]] +- [[#first-parser-manual-tranform][First Parser, manual tranform]] +- [[#hindleymilner-types-and-instances][Hindley–Milner type's and instance's]] +- [[#the-monad-interface][The Monad interface]] +- [[#the-identity-singleton][The Identity singleton.]] +- [[#the-list-singleton][The List singleton]] +- [[#monadfail-another-monad-interface][MonadFail, another monad interface.]] + - [[#empty-list-is-a-failure][Empty List is a failure.]] +- [[#the-interface-aka-typeclass-monad-definition][The interface AKA typeclass monad definition]] +- [[#the-type-form-for-a-singleton][The type form for a singleton]] +- [[#state-and-statet-transformer][State and StateT Transformer]] +- [[#failure-is-always-an-option-the-maybe-monad][Failure is always an option! The Maybe monad]] +- [[#the-monaderror-exception-handling][The MonadError: exception handling]] +- [[#wrapping-with-errort][Wrapping with ErrorT]] +- [[#next-method-vs-inner][Next Method vs inner]] +- [[#general-monad-utility-functions][General Monad Utility Functions]] +- [[#the-monad-module][The Monad module]] +- [[#gerbil][Gerbil]] +- [[#emacs][Emacs]] +- [[#append-me][Append me]] + - [[#type-class-aka-interface]["Type Class", AKA Interface]] + - [[#an-instance-of-an-type-class-is-a-struct-that-has-the-interface-described][An Instance of an "Type Class" is a Struct that has the Interface described.]] + - [[#the-identity-monad][The Identity Monad.]] +- [[#instance-syntax][Instance Syntax]] +- [[#syntax-test-repl][Syntax test-repl]] + - [[#the-standard-testing-library][The Standard Testing Library]] + - [[#test-suite][test-suite]] + - [[#test-case][test-case]] + - [[#check][check]] + - [[#checkf][checkf]] + - [[#check-eq][check-eq?]] + - [[#check-not-eq][check-not-eq?]] + - [[#check-eqv][check-eqv?]] + - [[#check-not-eqv][check-not-eqv?]] + - [[#check-equal][check-equal?]] + - [[#check-not-equal][check-not-equal?]] + - [[#check-output][check-output]] + - [[#check-predicate][check-predicate]] + - [[#check-exception][check-exception]] + - [[#file-interfacess][File interface.ss]] + - [[#file-identityss][File identity.ss]] + - [[#file-listss][File list.ss]] + - [[#file-statess][File state.ss]] + - [[#file-syntaxss][File syntax.ss]] + - [[#file-monadss][File ../monad.ss]] +:END: + + +* What is a Monad exactly? +:PROPERTIES: +:CUSTOM_ID: what-is-a-monad-exactly +:END: + +#+begin_quote +*monad*, (from Greek /monas/ “unit”), an elementary individual +substance that reflects the order of the world and from which material +properties are derived. [...] + +The objects of the material world are simply appearances of +collections of monads. +. +-- https://www.britannica.com/topic/monad +#+end_quote + +In the abstract programming sense, a =Monad= is a generic way of +looking at a problem with a specific *soul*-ution almost hidden within +the wrapper. + +It's a strategy for going about a wide range of problems. It starts +with two abstractions, =return= and =>>==, that help to encompass +a lot about a program. + +* The =Monad= interface AKA "typeclass" +:PROPERTIES: +:CUSTOM_ID: the-monad-interface-aka-typeclass +:END: + +Gerbil has an incredible =interface= definition and usage. They +similar to =typeclass='s but for a dynamic language. + +Have a look [[https://cons.io/reference/std/interface.html][at the docs]] for information on how they work. + +In Haskell the typeclass for a monad looks like this: + +#+begin_src haskell + class Monad m where + (>>=) :: m a -> (a -> m b) -> m b + (>>) :: m a -> m b -> m b + return :: a -> m a + -- Minimal complete definition: + -- (>>=), return + m >> k = m >>= \_ -> k +#+end_src + +Since we don't have a way to create a method within a typeclass we'll +make a monad that does nothing in order to have that global method +definition. + +#+begin_src scheme :noweb-ref monad-interface + (begin + ;; class Monad m + (interface Monad (return a) (>>= ma f) (>> ma mb)) + ;; -- Minimal complete definition: + ;; -- return, >>= and >> + (instance (m Monad) :t + ((return a) a) + ((>>= ma f) (f ma)) + ((>> ma mb) (m.>>= ma (lambda (_) mb))))) +#+end_src + +Essentially that's that! a monad is just an abstraction around those +two procedures with those signatures. + +** Almost the =Identity= monad. +:PROPERTIES: +:CUSTOM_ID: almost-the-identity-monad +:END: + +To see what is does, or even what it doesn't, let us first import the +interface and contract libraries. + +#+begin_src scheme + > (import :std/interface :std/contract :std/instance) +#+end_src + +Because we have that =:t= monad we should be able to use it. We'll +just pass a symbol for now. + +#+begin_src scheme :noweb-ref test-m + > (using (m 'identity : Monad) + (m.>>= (m.return 41) + (lambda (a) (m.return (1+ a))))) + 42 + > (using (m 'identity : Monad) (m.>> 'anything 42)) + 42 +#+end_src + +That minimal "do nothing" monad is known as the =Identity Monad= and +is actually quite important. + +** Getting to the =List= monad +:PROPERTIES: +:CUSTOM_ID: getting-to-the-list-monad +:END: + +Because this is lisp let's look at another simple monad, the =List +Monad=. + +#+begin_src scheme :noweb-ref instance-mlist + (instance Monad :list + ((return a) [a]) ((>>= ma f) (append-map f ma))) +#+end_src + +Now what happens with our =1+= test? + +#+begin_src scheme :noweb-ref test-mlist + > (using (m [] : Monad) + (m.>>= (m.return 41) + (lambda (a) (m.return (1+ a))))) + (42) + > (using (m [] : Monad) (m.>> ['anything] [42])) + (42) +#+end_src + +In the first test the same forms are used but now it is a list as a +result. + +In the second test we have to pass lists as that's what a =Monadic +Value= is for a =List Monad=. + +** =return= as constructor, =>>== as destructor. +:PROPERTIES: +:CUSTOM_ID: return-as-constructor--as-destructor +:END: + +That's all a monad really is. + + - The =return= procedure, typed ~a -> m a~, take "a" value and makes + it a =Monadic Value=. It can be thought of as a wrapper. + + - The =>>== prodedure is also known as =bind=. It "unwraps" the + value and then calls a function with that value. + +#+begin_src scheme :noweb-ref test-lr-identity +> (using (m [] : Monad) (m.>>= (m.return 42) (cut m.return <>))) +(42) +> (using (m 'identity : Monad) (m.>>= (m.return 42) (cut m.return <>))) +42 +#+end_src + +We can of course rid ourselves of the duplication. That's the whole point. + +#+begin_src scheme :noweb-ref test-lr-identity + > (def (monad-test-lr-identity monad v) + (using (m monad : Monad) + (m.>>= (m.return v) + (lambda (ov) (if (eq? v ov) (m.return ov) + (error "Wrap/Unwrap failed")))))) + + > (monad-test-lr-identity 'foo 42) + 42 + > (monad-test-lr-identity [] 42) + (42) +#+end_src + +* =Run='ing things +:PROPERTIES: +:CUSTOM_ID: runing-things +:END: + +#+begin_src scheme :noweb-ref run-interface + (interface Run (run . args)) +#+end_src + +* Keeping =State= with =state=. +:PROPERTIES: +:CUSTOM_ID: keeping-state-with-state +:END: + +There are more things to wrap than just a list. We could be passing a +state. That's quite common. + +#+begin_src scheme :noweb-ref state-interface + (interface (State Run) (get) (put! s) (state f)) + (interface (MonadState Monad State) (state f)) +#+end_src + +We'll make a =state= struct. First, it's just for specialization. +#+begin_src scheme :noweb-ref state-struct + (defstruct state (e)) + + (instance (m Monad) state + ((return a) (lambda (s) [a . s])) + ((>>= ma f) (lambda (s) (with ((cons v new-state) (ma s)) ((f v) new-state))))) +#+end_src + +Using our past things we see that we need to call the function. + +#+begin_src scheme :noweb-ref first-state-test + > ((using (s (make-state 'state) : Monad) (s.return 42)) 'hey!) + (42 . hey!) + > (def (monad-test-lr-identity monad v) + (using (m monad : Monad) + (m.>>= (m.return v) + (lambda (ov) (if (eq? v ov) (m.return ov) + (error "Wrap/Unwrap failed")))))) + > (let ((fn (monad-test-lr-identity (make-state 'here) 42))) + (fn 'foo)) + (42 . foo) + > ((using (s (make-state 'state) : Monad) + (s.>>= (s.return 42) (lambda (v) (s.return (+ 1 v))))) 'hey!) + (43 . hey!) +#+end_src + +That call is also known as a run, as in we are running the state. + +#+begin_src scheme :noweb-ref state-run + (instance (r Run) (s state) + ((run m . _) (m s.e))) +#+end_src + +There's a reason for the MonadState interface as verbosity happens. + +#+begin_src scheme :noweb-ref first-state-test + > (let (state (make-state 'hey!)) + (using ((m state : Monad) + (r state : Run)) + (r.run (m.return 42)))) + (42 . hey!) +#+end_src + +Before we use it we need to define an instance of it for the =state= structure. + +#+begin_src scheme :noweb-ref state-State + (instance State state + ((get) (lambda (s) [s . s])) + ((put! new) (lambda (s) [s . new]))) + +#+end_src + +Easy, right? We've defined everything now to have a working =MonadState= + +#+begin_src scheme :noweb-ref MonadState + (instance (m MonadState) state + ((state f) + (m.>>= (m.get) + (lambda (s) + (with ((cons a _s) (f s)) + (m.>> (m.put! _s) + (m.return a))))))) + +#+end_src + +#+begin_src scheme :noweb-ref first-state-test + > (let (state (make-state 'hey!)) + (using (s state : MonadState) + (s.run (s.return 42)))) + (42 . hey!) + + > (let (state (make-state 'no!)) + (using (s state : MonadState) + (s.run (s.>>= (s.put! 'hey!) (lambda _ (s.return 42)))))) + (42 . hey!) + + > (let (state (make-state 42)) + (using (s state : MonadState) + (s.run (s.>>= (s.put! 'hey!) (lambda (old) (s.return old)))))) + (42 . hey!) + + > (let (state (make-state 'no!)) + (using (s state : MonadState) + (s.run (s.>> (s.put! 42) + (s.>>= (s.put! 'hey!) (lambda (old) (s.return old))))))) + (42 . hey!) + + > (using (s (make-state 41) : MonadState) + (s.run (s.state (lambda (s_) ['!yeh (+ s_ 1) ...])))) + (!yeh . 42) +#+end_src + + +* The =du= syntax: sugar for =>>== and =>>= +:PROPERTIES: +:CUSTOM_ID: the-du-syntax-sugar-for--and- +:END: + +Binding variables is a big part of programming. Sequential steps down +a path are also important, as well as knowing what has passed. + +In =Lisp= the =let*= form is kinda what I'm talking about. The =begin= +form plays a role as well. + +In =Haskell= this is called =do= but that's already taken and as luck +would have it =using= is a part of our "Do Using" (aka =du=) so it all +works out. + +Here's an example. + +#+begin_src scheme :noweb-ref first-du-test + > (du (m 'identity : Monad) + n <- (m.return 41) + v <- (m.return (+ n 1)) + (= v 42)) + #t +#+end_src + + +So =n <- mv ...= is just ~(m.>>= mv (n) ...)~ in short form, + +#+begin_src scheme :noweb-ref first-du-test + > (using (m 'identity : Monad) + (m.>>= + (m.return 41) + (lambda (n) + (m.>>= + (m.return (+ n 1)) + (lambda (v) (= v 42)))))) + #t +#+end_src + +There are a few ways to go about using =du=. Because things inside it +are basically inside the monad "wrapper" that means that, for example, +the =Run= interface needs to be used outside of it. + +The first attempt is just to use it to bind an identifier to run. + +#+begin_src scheme :noweb-ref first-du-test + > (let* ((s (make-state 0)) + (ms (du (s : MonadState) + (s.put! 41) + (s.state (lambda (s_) ['!yeh (+ s_ 1) ...]))))) + (Run-run s ms)) + (!yeh . 42) +#+end_src + +This is such a simple task that all we are doing is using the =>>= +operation. We could just use that with =using= and not =du= at all. + +#+begin_src scheme :noweb-ref first-du-test + > (using (s (make-state 0) : MonadState) + (s.run (s.>> + (s.put! 41) + (s.state (lambda (s_) ['!yeh (+ s_ 1) ...]))))) + (!yeh . 42) +#+end_src + +But most things are not as simple as a single =>>= or even a binding +=>>== . + +Do that there's an "inline" =(du id ...)= syntax. We use both of those +operations hidden here. +#+begin_src scheme :noweb-ref first-du-test + > (def (testme n) + (using (s (make-state n) : MonadState) + (s.run (du s + n <- (s.get) + (let (v (+ n 1)) + (if (eqv? v 42) (s.put! v) (s.put! error:))) + (s.return '!yeh))))) + > (testme 41) + (!yeh . 42) + > (testme 46) + (!yeh . error:) +#+end_src + +* =Fail=-ure IS an option +:PROPERTIES: +:CUSTOM_ID: fail-ure-is-an-option +:END: + +There are times when things fail. + +#+begin_src scheme :noweb-ref fail-interface + (interface Fail (fail)) + (interface (MonadFail Monad Fail)) +#+end_src + +** The =maybe= test type +:PROPERTIES: +:CUSTOM_ID: the-maybe-test-type +:END: + +For example there could be the abstract =maybe= and =nothing= +concepts. + +#+begin_src scheme :noweb-ref maybe-test + > (defstruct maybe (nothing)) + > (instance MonadFail (m maybe) + ((return a) a) + ((>>= ma f) + (if (eqv? m.nothing ma) ma (f ma))) + ((fail) m.nothing)) + + > (def (testme o (no #f)) + (du (mf (maybe no) : MonadFail) + n <- 1 + m <- (if (even? o) (mf.fail) o) + (+ m n))) + + > (testme 4) + #f + > (testme 5) + 6 + > (testme 2 'huh) + huh + > (testme 3) + 4 +#+end_src + + +** =Fail= and =:list= are natural friends +:PROPERTIES: +:CUSTOM_ID: fail-and-list-are-natural-friends +:END: + +#+begin_src scheme :noweb-ref fail-list + (instance MonadFail :list ((fail) [])) +#+end_src + +#+begin_src scheme :noweb-ref fail-list-test + > (def (testl lst) + (du (mf [] : MonadFail) + n <- lst + m <- (if (even? n) (mf.fail) (mf.return (+ 41 n))) + (mf.return (eqv? 42 m)))) + + > (testl [1 2 3 4 5 6]) + (#t #f #f) +#+end_src + + +* =Monad[Plus|Zero|Or]= +:PROPERTIES: +:CUSTOM_ID: monadpluszeroor +:END: + +For a list there should be a way to add items. =MonadPlus= is just +that. An Empty list is =Zero=. And =Or= is like a deterministic +version of =Plus=. + +For the Haskell foreground read [[https://wiki.haskell.org/MonadPlus_reform_proposal][here]]. + +#+begin_src scheme :noweb-ref zpo-interfaces + (interface Plus (plus a b)) + (interface (MonadPlus Monad Plus)) + (interface Zero (zero)) + (interface (MonadZero Monad Zero)) + (interface (MonadZeroPlus Monad Zero Plus)) + (interface Or (or x y)) + (interface (MonadZeroOrPlus Monad Or Plus Zero)) +#+end_src + +So a =:list= is of those three... + +#+begin_src scheme :noweb-ref zpo-list + (instance Plus :list ((plus a b) (append a b))) + (instance Zero :list ((zero) [])) + (instance Or :list ((or l1 l2) (if (null? l1) l2 l1))) +#+end_src + +... and because it's already a monad we can play with it like that. + +#+begin_src scheme :noweb-ref test-list-zpo + > (du (m [] : MonadPlus) + (m.plus (m.return 42) [42])) + (42 42) + > (du (m [] : MonadZero) + (m.zero)) + () + > (du (m [] : MonadZeroOrPlus) + (m.or (m.plus (m.return 42) (m.zero)) (m.zero))) + (42) + + +#+end_src + +* Transformers + +Different transformers may need to tranform one another or some +such. Beyond that there's =lift=. + +#+begin_src scheme :noweb-ref trans-interfaces + (interface Transformer (lift c)) + (interface (MonadTrans Monad Transformer)) +#+end_src + +** A =stateT= for wrapping state +:PROPERTIES: +:CUSTOM_ID: transformers-statet-redo-state-with-others-in-mind +:END: + +Previously, all the monad instances are separate. What if we want to +combine them? + +That's where transformers come in. + +A transformer is something with something else inside. + + +** The =Identity= identity monad +:PROPERTIES: +:CUSTOM_ID: the-identity-identity-monad +:END: + +Even though, or perhaps because =:t= does "nothing" there is a simple +"inner that does nothing" we can create. + + +#+begin_src scheme :noweb-ref Identity-struct + (defstruct Identity ()) + + (instance (m Monad) Identity + ((return a) a) + ((>>= ma f) (f ma)) + ((>> ma mb) (m.>>= ma (lambda _ mb)))) +#+end_src + +** The =Monad= for =stateT= +:PROPERTIES: +:CUSTOM_ID: the-monad-for-statet +:END: + +#+begin_src scheme :noweb-ref stateT-struct + (defstruct stateT (inner)) + + (instance (m Monad) (st stateT) + ((return a) + (using (inner st.inner : Monad) (lambda (s) (inner.return [a . s])))) + ((>>= ma f) + (using (inner st.inner : Monad) + (lambda (s) + (du inner + pair <- (ma s) + (with ((cons v s!) pair) ((f v) s!))))))) + +#+end_src + + +#+begin_src scheme :noweb-ref test-stateT + > (def (test-stateT-monad state) + (du (m state : Monad) + n <- (m.return 42) + (m.return [n (= n 42)]))) + > ((test-stateT-monad (make-stateT (Monad (Identity)))) 'state) + ((42 #t) . state) + > ((test-stateT-monad (make-stateT (Monad []))) 'state) + (((42 #t) . state)) +#+end_src + +** =Run= it +:PROPERTIES: +:CUSTOM_ID: run-it +:END: + + +#+begin_src scheme :noweb-ref stateT-struct + (instance Run (st stateT) + ((run mv (state (void))) (mv state))) +#+end_src + +** The =State= for =stateT= +:PROPERTIES: +:CUSTOM_ID: the-state-for-statet +:END: + +The state transformer is for state after all. + +#+begin_src scheme :noweb-ref stateT-struct + (instance State (st stateT) + ((get) (lambda (s) (du (m st.inner : Monad) (m.return [s . s])))) + ((put! s!) (lambda (s) (du (m st.inner : Monad) (m.return [s . s!]))))) + (instance MonadState (st stateT) + ((state f) (using (m st.inner : Monad) (lambda (s) (let (ret (f s)) (m.return ret)))))) + +#+end_src + +First the =get= and =put!=. + +#+begin_src scheme :noweb-ref test-stateT + > (def (test-stateT-State statet first-state) + (def run (du (m statet : MonadState) + first <- (m.put! 42) + second <- (m.get) + (m.put! 'final) + (m.return [first second]))) + (run first-state)) + > (test-stateT-State (make-stateT (Monad (Identity))) 'first) + ((first 42) . final) + > (test-stateT-State (make-stateT (Monad [])) 'second) + (((second 42) . final)) +#+end_src + +And the =state= procedure. + +#+begin_src scheme :noweb-ref test-stateT + > (def (test-stateT-state statet) + (du (m statet : MonadState) + (m.state (lambda (s!) [s! . 42])))) + > (using (s (make-stateT (Monad (Identity))) : Run) + (s.run (test-stateT-state s) 'first)) + (first . 42) + > (using (s (make-stateT (Monad [])) : Run) + (s.run (test-stateT-state s) 'second)) + ((second . 42)) +#+end_src + +** The =ZeroOrPlus= and =Fail= transformations + +Choices can be a big part of programming. + +#+begin_src scheme :noweb-ref stateT-struct + (instance Or (st stateT) + ((or x y) (lambda (s) (du (inner st.inner : Or) + (inner.or (x s) (y s)))))) + (instance Plus (st stateT) + ((plus x y) (lambda (s) (du (inner st.inner : Plus) + (inner.plus (x s) (y s)))))) + (instance Zero (st stateT) + ((zero) (lambda (s) (du (inner st.inner : Zero) + (inner.zero))))) + (instance Fail (st stateT) + ((fail) (lambda (s) (du (inner st.inner : Fail) + (inner.fail))))) + + +#+end_src + +#+begin_src scheme :noweb-ref test-stateT + > ((du (m (make-stateT []) : MonadZeroOrPlus) + ab <- (m.plus (m.return 'a) (m.return 'b)) + + (m.return ab)) + 42) + ((a . 42) (b . 42)) + > ((du (m (make-stateT []) : MonadZeroOrPlus) + a <- (m.or (m.return 'a) (m.return 'b)) + + (m.return a)) + 42) + ((a . 42)) + > ((du (m (make-stateT []) : MonadZeroOrPlus) + b <- (m.or (m.zero) (m.return 'b)) + + (m.return b)) + 42) + ((b . 42)) + +#+end_src + +** How to =lift= from inner + +Just because we're tried to make the =stateT= wrap most monadic +computations does not mean that we can trivially wrap any monadic +value from the =inner=. + +#+begin_src scheme :noweb-ref stateT-struct + (instance MonadTrans (st stateT) + ((lift c) (lambda (s) + (du (inner st.inner : Monad) + x <- c + (inner.return [x . s]))))) +#+end_src + +So we can take a function for the inner monad and lift it up! +#+begin_src scheme :noweb-ref test-stateT + > (def (listM-fn) + (du (m (MonadPlus []) :- MonadPlus) + (m.plus (m.return 41) (m.return 43)))) + > (listM-fn) + (41 43) + > ((du (m (make-stateT []) : MonadTrans) + foo <- (m.lift (listM-fn)) + (m.return (+ foo 1))) "state") + ((42 . "state") (44 . "state")) +#+end_src + +* Monadic Parser Combinators - Part 0.1 + +I'm not at all sure how I came across this but after writing my first +=Org Mode= parser, I found this quote: + + #+begin_quote + A Parser for Things is a function from Strings to Lists of Pairs of + Things and Strings! + + --Fritz Ruehr + #+end_quote + + +... along with the [[https://nottingham-repository.worktribe.com/output/1024440/monadic-parser-combinators][Monadic Parser Combinators]] paper. So we arrive at +this point. + +With the state transformer as is this should just work! + +Very simple. We'll just parse strings. + + +#+begin_src scheme :noweb-ref first-parser-test + > (interface (Parser MonadState Fail Or Plus) (item)) + > (defstruct (parser stateT) ()) + > (def current-parsee (make-parameter "42")) + > (def current-parser (make-parameter (make-parser (Monad [])))) + + > (instance (P Parser) (p parser) + ((item) (du P + idx <- (P.get) + len <- (P.return (string-length (current-parsee))) + (P.put! (1+ idx)) + (if (>= idx len) (P.fail) + (P.return (string-ref (current-parsee) idx)))))) + + > ((using (p (current-parser) : Parser) (p.item)) 0) + ((#\4 . 1)) + > ((using (p (current-parser) : Parser) (p.plus (p.item) (p.item))) 0) + ((#\4 . 1)(#\4 . 1)) +#+end_src + + +All we really need is a =char​=?= and we could have a parser! + + +#+begin_src scheme :noweb-ref first-parser-test + > (interface (Parsec Parser) (char=? char)) + > (defstruct (parsec parser) ()) + > (instance (P Parsec) (p parsec) + ((char=? char) + (du P + c <- (P.item) + (if (char=? char c) (P.return c) (P.fail))))) + > (current-parser (make-parsec (Monad []))) + > ((using (p (current-parser) : Parsec) + (p.char=? #\4)) 0) + ((#\4 . 1)) + > ((using (p (current-parser) : Parsec) + (p.char=? #\4)) 1) + () + + > (def (test-ltuae str) + (parameterize ((current-parsee str)) + (let (ret ((du (p (current-parser) : Parsec) + (p.>> (p.char=? #\4) (p.char=? #\2))) 0)) + (not (null? ret))))) + > (test-ltuae "42") + #t + > (test-ltuae "41") + #f +#+end_src + +* Catch/Throw equals =ErrorHandler= from =errorT= + +We all know about throwing and catching errors. The =...Handler= is +there to play nice with =:std/error= of course. + +#+begin_src scheme :noweb-ref Error-interface + (interface ErrorHandler (throw . args) (catch thunk handler) (error? thing)) + (interface (MonadError Monad ErrorHandler)) +#+end_src + +The Error wrapper can be another transformer. + +#+begin_src scheme :noweb-ref errorT-struct + (defstruct errorT (inner) constructor: :init!) + (defmethod {:init! errorT} + (lambda (self (inner (Monad (Identity)))) + (unless (Monad? inner) + (set! inner (Monad inner))) + (set! self.inner inner))) + +#+end_src + +Making the handler is easy. + +#+begin_src scheme :noweb-ref errorT-struct + (instance (me MonadError) (et errorT) + ((return a) (du (inner et.inner :- Monad) (inner.return a))) + ((>>= ma f) (du (inner et.inner :- Monad) + a <- ma + (if (me.error? a) (inner.return a) (f a)))) + ((error? thing) (Error? thing)) + ((throw msg . irritants) + (du (inner et.inner :- Monad) + (inner.return (Error msg irritants: irritants)))) + ((catch exp handler) + (du (inner et.inner :- Monad) + val <- exp + (if (me.error? val) (handler val) (inner.return val))))) + +#+end_src + +Testing it makes it clear. + +#+begin_src scheme :noweb-ref errorT-test + > (du (m (make-errorT) : MonadError) (m.return 42)) + 42 + > (du (m (make-errorT []) : MonadError) (m.return 42)) + (42) + > (def (test mo n (m (make-errorT mo))) + (du (m : MonadError) + foo <- (m.return n) + (if (eqv? 42 foo) (m.throw "LTUAE") (m.return foo)) + (m.return (- foo 1)))) + + > (Error-message (test (Identity) 42)) + "LTUAE" + > (map Error-message (test [] 42)) + ("LTUAE") + > (test (Identity) 43) + 42 + > (test [] 43) + (42) + > (using (m (make-errorT []) : MonadError) + (m.catch (test #f 43 m) (lambda (e) 'fourtwo))) + (42) + > (using (m (make-errorT []) : MonadError) + (m.catch (test #f 42 m) (lambda (e) 'fourtwo))) + fourtwo +#+end_src + +** Transform =State= to =errorT= + +This is after state in the train of thought so is defined here. + +#+begin_src scheme :noweb-ref errorT-struct + (instance MonadState (et errorT) + ((get) (du (inner et.inner :- MonadState) (inner.get))) + ((put! s) (du (inner et.inner :- MonadState) (inner.put! s))) + ((state f)(du (inner et.inner :- MonadState) (inner.state f)))) + + (instance MonadError (st stateT) + ((error? e?) (lambda (s) + (du (inner st.inner :- MonadError) + [(inner.error? e?) s ...]))) + ((throw message . args) + (lambda (s) + [ (apply MonadError-throw st.inner message args) s ...])) + ((catch exp handler) (lambda (s) (du (inner st.inner :- MonadError) + [(inner.catch exp handler) s ...])))) +#+end_src + +#+begin_src scheme :noweb-ref errorT-test + > (def parsr (make-stateT (make-errorT []))) + > (def parse (make-errorT (make-stateT []))) + > ((Monad-return parsr 42) 42) + ((42 . 42)) + > ((Monad-return parse 42) 42) + ((42 . 42)) + > (with ([[e . s]] ((MonadError-throw parse "Here") 42)) + [(cons (Error-message e) s)]) + (("Here" . 42)) + +#+end_src + + + +* Emacs +:PROPERTIES: +:CUSTOM_ID: emacs +:END: + +#+begin_src emacs-lisp + (require 'gerbil-mode) + (gerbil-put-indent '(du) 1) + (gerbil-put-indent '(instance) 2) +#+end_src + + +* /File/ interface.ss +:PROPERTIES: +:CUSTOM_ID: file-interfacess +:END: +#+begin_src scheme :noweb yes :tangle interface.ss + (import :std/interface :std/contract + #;"../instance":std/instance) + (export + #t + (interface-out unchecked: #t Monad Run State)) + + <> + + <> + + <> + + <> + + <> + + <> + + <> + +#+end_src + +* /File/ identity.ss +:PROPERTIES: +:CUSTOM_ID: file-identityss +:END: + +#+begin_src scheme :noweb yes :tangle identity.ss + (import ../instance #;"../instance" + ./interface #;"interface" + :std/interface :std/srfi/1) + (export #t) + + <> +#+end_src + +* /File/ list.ss +:PROPERTIES: +:CUSTOM_ID: file-listss +:END: + +#+begin_src scheme :noweb yes :tangle list.ss + (import ../instance #;"../instance" + ../interface #;"interface" + :std/interface :std/srfi/1) + (export #t) + + <> + + <> + + <> + +#+end_src + +* /File/ state.ss +:PROPERTIES: +:CUSTOM_ID: file-statess +:END: + +#+begin_src scheme :noweb yes :tangle state.ss + (import ../instance #;"../instance" + ./interface #;"interface" + ./syntax #;"syntax" + ./identity #;"identity" + :std/interface :std/srfi/1) + (export #t) + + <> + + <> + + <> + + <> + + <> + +#+end_src + +* /File/ error.ss +:PROPERTIES: +:CUSTOM_ID: file-errorss +:END: + +#+begin_src scheme :noweb yes :tangle error.ss + (import ../instance #;"../instance" + ./interface "interface" + ./syntax #;"syntax" + ./identity #;"identity" + ./state #;"state" + :std/interface :std/srfi/1 :std/error) + (export #t) + + <> +#+end_src + +* /File/ syntax.ss +:PROPERTIES: +:CUSTOM_ID: file-syntaxss +:END: + +#+begin_src scheme :noweb-ref du-syntax + (defsyntax (du stx) + (def (expand-bind id stx) + (with-syntax* ((id id) + (bind (stx-identifier #'id #'id ".>>=")) + (seq (stx-identifier #'id #'id ".>>"))) + (syntax-case stx (<-) + ((var <- from body ... end) + #'(bind from (lambda (var) (du id body ... end)))) + ((>> body ... end) + #'(seq >> (du id body ... end))) + ((end) #'end)))) + + (syntax-case stx () + ((_ id body ...) + (identifier? #'id) + (with-syntax ((bindings (expand-bind #'id #'(body ...)))) + #'bindings)) + ((_ (id expr ~ Monad) body ...) + (and (identifier? #'id) + (identifier? #'~)) + #'(using (id expr ~ Monad) + (du id body ...))) + ((_ (id ~ Monad) body ...) + (and (identifier? #'id) + (identifier? #'~)) + #'(using (id ~ Monad) + (du id body ...))) + ((_ ((id this ...) rest ...) body ...) + #'(using ((id this ...) rest ...) + (du id body ...))))) + + +#+end_src + +#+begin_src scheme :noweb yes :tangle syntax.ss + (import :std/contract) + (export #t) + + <> +#+end_src + +* /File/ ../monad.ss +:PROPERTIES: +:CUSTOM_ID: file-monadss +:END: + +#+begin_src scheme :tangle ../monad.ss + (import + ./monad/interface + ./monad/identity + ./monad/list + ./monad/state + ./monad/syntax + ./monad/error) + (export + (import: ./monad/interface) + (import: ./monad/identity) + (import: ./monad/list) + (import: ./monad/state) + (import: ./monad/syntax) + (import: ./monad/error)) +#+end_src + +* The Test File :noexport: +:PROPERTIES: +:CUSTOM_ID: the-test-file +:END: + +#+begin_src scheme :noweb yes :tangle ../../../src/std/monad-test.ss + ;;; -*- Gerbil -*- + ;;; (C) me at drewc.ca + ;;; :std/monad unit-tests + + (import :std/test + :std/error + :std/interactive + :srfi/13 + ; :std/instance + "instance" + "monad/interface" + "monad/identity" + "monad/list" + "monad/state" + "monad/syntax" + "monad/error" + (only-in :std/sugar hash try) + (only-in :gerbil/core error-object? with-catch)) + (export monad-test) + + (defsyntax (test-inline stx) + (syntax-case stx (>) + ((_ test-case: name rest ...) + #'(test-case name (test-inline rest ...))) + ((_ > form > rest ...) + #'(begin (displayln "... " 'form) form (test-inline > rest ...))) + ((_ > test result rest ...) + #'(begin (check test => 'result) (test-inline rest ...))) + ((_) #!void))) + + (set-test-verbose! #t) + + (def monad-test + (test-suite "Test :std/monad" + + (test-inline + test-case: ":t as Identity" + <> + > #t #t) + (test-inline + test-case: ":list as List" + <>) + + (test-inline + test-case: ":list and :t Left and Right identity" + <>) + + (test-inline + test-case: "First State Tests" + <>) + + (test-inline + test-case: "First du Tests" + <>) + + (test-inline + test-case: "Fail Tests" + <> + <>) + + (test-inline + test-case: "ZPO tests" + <>) + + + (test-inline + test-case: "StateT tests" + <>) + + (test-inline + test-case: "First Parser Tests" + <>) + + (test-inline + test-case: "ErrorT tests" + <>))) + + + + + + + +#+end_src diff --git a/src/std/monad/build.ss b/src/std/monad/build.ss new file mode 100755 index 000000000..de5be0270 --- /dev/null +++ b/src/std/monad/build.ss @@ -0,0 +1,16 @@ +#!/usr/bin/env gxi +;;; -*- Gerbil -*- +(import :std/build-script) + +(defbuild-script + '("instance" + "monad/syntax" + "monad/interface" + "monad/util" + "monad/Identity" + "monad/List" + "monad/State" + "Monad/Error" + + "monad" + )) diff --git a/src/std/monad/error.ss b/src/std/monad/error.ss new file mode 100644 index 000000000..d6dd7dcee --- /dev/null +++ b/src/std/monad/error.ss @@ -0,0 +1,43 @@ +(import ../instance #;"../instance" + ./interface "interface" + ./syntax #;"syntax" + ./identity #;"identity" + ./state #;"state" + :std/interface :std/srfi/1 :std/error) +(export #t) + +(defstruct errorT (inner) constructor: :init!) +(defmethod {:init! errorT} + (lambda (self (inner (Monad (Identity)))) + (unless (Monad? inner) + (set! inner (Monad inner))) + (set! self.inner inner))) + +(instance (me MonadError) (et errorT) + ((return a) (du (inner et.inner :- Monad) (inner.return a))) + ((>>= ma f) (du (inner et.inner :- Monad) + a <- ma + (if (me.error? a) (inner.return a) (f a)))) + ((error? thing) (Error? thing)) + ((throw msg . irritants) + (du (inner et.inner :- Monad) + (inner.return (Error msg irritants: irritants)))) + ((catch exp handler) + (du (inner et.inner :- Monad) + val <- exp + (if (me.error? val) (handler val) (inner.return val))))) + +(instance MonadState (et errorT) + ((get) (du (inner et.inner :- MonadState) (inner.get))) + ((put! s) (du (inner et.inner :- MonadState) (inner.put! s))) + ((state f)(du (inner et.inner :- MonadState) (inner.state f)))) + +(instance MonadError (st stateT) + ((error? e?) (lambda (s) + (du (inner st.inner :- MonadError) + [(inner.error? e?) s ...]))) + ((throw message . args) + (lambda (s) + [ (apply MonadError-throw st.inner message args) s ...])) + ((catch exp handler) (lambda (s) (du (inner st.inner :- MonadError) + [(inner.catch exp handler) s ...])))) diff --git a/src/std/monad/identity.ss b/src/std/monad/identity.ss new file mode 100644 index 000000000..9cdb09900 --- /dev/null +++ b/src/std/monad/identity.ss @@ -0,0 +1,11 @@ +(import ../instance #;"../instance" + ./interface #;"interface" + :std/interface :std/srfi/1) +(export #t) + + (defstruct Identity ()) + + (instance (m Monad) Identity + ((return a) a) + ((>>= ma f) (f ma)) + ((>> ma mb) (m.>>= ma (lambda _ mb)))) diff --git a/src/std/monad/instance-:list b/src/std/monad/instance-:list new file mode 100644 index 000000000..0c5d2fcfa --- /dev/null +++ b/src/std/monad/instance-:list @@ -0,0 +1,2 @@ +(instance Monad :list + ((return a) [a]) ((>>= ma f) (append-map f ma))) diff --git a/src/std/monad/instance.ss b/src/std/monad/instance.ss new file mode 100644 index 000000000..ef74edfcd --- /dev/null +++ b/src/std/monad/instance.ss @@ -0,0 +1,126 @@ +(import (for-syntax :std/stxutil) + :std/contract) +(export #t) +(defsyntax (instance stx) + + (def (wrap-using-klass self klass body) + (with-syntax (((body ...) body) (self self) (klass klass)) + (if (identifier? #'self) + #'(using (self :- klass) body ...) + #'(begin body ...)))) + + (def (wrap-using-interface var self Interface body) + (with-syntax ((body body) (self self) (var var) (Interface Interface)) + (if (and (identifier? #'var) (identifier? #'self) (identifier? #'Interface)) + #'(using (var self : Interface) body) + #'(begin body)))) + + (def (wrap-next-method type::t self id fn) + (let ((next-sym (gensym))) + (with-syntax* + ((type::t type::t) (id id) (fn fn) (self self) + (next-sym (datum->syntax #'id next-sym)) + (@next-method (stx-identifier #'id '@next-method)) + (new-body + (syntax-case #'fn () + ((lm args body ...) + #'(lm args + (let-syntax + ((@next-method + (syntax-rules () + ((_ arg (... ...)) + (next-sym self arg (... ...)))))) + body ...)))))) + #'(let* ((next-cache (make-hash-table-eq weak-keys: #t)) + (next-sym + (lambda (obj . args) + (def cached (hash-get next-cache obj)) + (if cached (apply cached obj args) + (let (next-meth + (or cached + (let (nm (next-method type::t obj 'id)) + (or nm (error "Cannot find next method" 'id type::t))))) + (when (not cached) (hash-put! next-cache obj next-meth)) + (apply next-meth obj args)))))) + new-body)))) + + (def (expand-method-lambda id self klass type::t Interface var args body (dot-arg #f)) + (with-syntax* ((args args) ((body ...) body) (klass klass) (type::t type::t) + (self self) (var var) (id id) (Interface Interface) + (tbody (wrap-using-klass #'self #'klass #'(body ...))) + (real-self (if (identifier? #'self) #'self + (datum->syntax #'id (gensym)))) + (ibody (wrap-using-interface #'var #'real-self #'Interface #'tbody)) + ((real-body ...) (cond ((identifier? #'var) + #'(ibody)) + ((identifier? #'self) #'(tbody)) + (else #'(body ...)))) + (real-args (if (stx-list? #'args) + (with-syntax (((args ...) #'args) (darg dot-arg)) + (if dot-arg #'(real-self args ... . daarg) + #'(real-self args ...))) + #'(real-self . args))) + (real-lambda #'(lambda real-args real-body ...)) + (method-fn (wrap-next-method #'type::t #'real-self #'id #'real-lambda))) + + #'method-fn)) + + (def (expand-bind-method type::t id rebind? + self klass Interface var args body) + (with-syntax* ((type::t type::t) (id id) (self self) (klass klass) + (Interface Interface) (var var) (args args) (body body) (rebind? rebind?) + (fn (expand-method-lambda #'id #'self #'klass #'type::t #'Interface + #'var #'args #'body))) + #'(bind-method! type::t 'id fn rebind?))) + + (def (expand-method klass type::t self Interface var mdef rebind?) + (with-syntax ((self self) (klass klass) (mdef mdef) (type::t type::t) (var var) + (Interface Interface) (rebind? rebind?)) + (syntax-case #'mdef () + (((id args ...) body ...) + (expand-bind-method #'type::t #'id #'rebind? #'self #'klass + #'Interface #'var #'(args ...) #'(body ...))) + (((id . arg) body ...) + (expand-bind-method #'type::t #'id #'rebind? #'self #'klass + #'Interface #'var #'arg #'(body ...))) + (((id args ... . dot-arg) body ...) + (expand-bind-method #'type::t #'id #'rebind? #'self #'klass + #'Interface #'var #'(args ...) #'(body ...) #'dot-arg))))) + + (def (expand-methods Interface var klass type::t self methods rebind?) + (with-syntax (((methods ...) methods) (var var) (Interface Interface) (rebind? rebind?) + (klass klass) (self self) (type::t type::t)) + + (let lp ((rest #'(methods ...)) (body [])) + (syntax-case rest () + ((mdef . rest) + (lp #'rest (cons ;#'mdef + (expand-method #'klass #'type::t #'self #'Interface #'var #'mdef #'rebind?) + body))) + (() (cons 'begin (reverse body))))))) + + (syntax-case stx () + ((_ Interface: Interface var: var Klass: klass self: self TypeT: type::t rebind: rebind? + methods ...) + (expand-methods #'Interface #'var #'klass #'type::t #'self #'(methods ...) #'rebind?)) + ((macro (Interface var) (Klass self) methods ... rebind: rebind?) + (let ((singleton? (not (syntax-local-type-info? #'Klass)))) + (with-syntax* (((values klass) + (if singleton? + (syntax-local-value (format-id #'Klass "~a::class" #'Klass)) + (syntax-local-value #'Klass))) + (type::t (runtime-type-identifier klass))) + + + #'(macro Interface: Interface var: var + Klass: Klass self: self + TypeT: type::t rebind: rebind? + methods ...)))) + ((macro Interface (Klass self) rest ... rebind: r) + #'(macro (Interface #f) (Klass self) rest ... rebind: r)) + ((macro (Interface var) Klass rest ... rebind: r) + #'(macro (Interface var) (Klass #f) rest ... rebind: r)) + ((macro Interface Klass rest ... rebind: r) + #'(macro (Interface #f) (Klass #f) rest ... rebind: r)) + ((macro no-rebind ...) + #'(macro no-rebind ... rebind: #f)))) diff --git a/src/std/monad/interface.ss b/src/std/monad/interface.ss new file mode 100644 index 000000000..a87bd77c6 --- /dev/null +++ b/src/std/monad/interface.ss @@ -0,0 +1,37 @@ +(import :std/interface :std/contract + #;"../instance":std/instance) + (export + #t + (interface-out unchecked: #t Monad Run State)) + + (begin + ;; class Monad m + (interface Monad (return a) (>>= ma f) (>> ma mb)) + ;; -- Minimal complete definition: + ;; -- return, >>= and >> + (instance (m Monad) :t + ((return a) a) + ((>>= ma f) (f ma)) + ((>> ma mb) (m.>>= ma (lambda (_) mb))))) + + (interface Run (run . args)) + + (interface (State Run) (get) (put! s) (state f)) + (interface (MonadState Monad State) (state f)) + + (interface Fail (fail)) + (interface (MonadFail Monad Fail)) + + (interface Plus (plus a b)) + (interface (MonadPlus Monad Plus)) + (interface Zero (zero)) + (interface (MonadZero Monad Zero)) + (interface (MonadZeroPlus Monad Zero Plus)) + (interface Or (or x y)) + (interface (MonadZeroOrPlus Monad Or Plus Zero)) + + (interface ErrorHandler (throw . args) (catch thunk handler) (error? thing)) + (interface (MonadError Monad ErrorHandler)) + + (interface Transformer (lift c)) + (interface (MonadTrans Monad Transformer)) diff --git a/src/std/monad/list.ss b/src/std/monad/list.ss new file mode 100644 index 000000000..584029816 --- /dev/null +++ b/src/std/monad/list.ss @@ -0,0 +1,13 @@ +(import ../instance #;"../instance" + ../interface #;"interface" + :std/interface :std/srfi/1) +(export #t) + +(instance Monad :list + ((return a) [a]) ((>>= ma f) (append-map f ma))) + +(instance MonadFail :list ((fail) [])) + +(instance Plus :list ((plus a b) (append a b))) +(instance Zero :list ((zero) [])) +(instance Or :list ((or l1 l2) (if (null? l1) l2 l1))) diff --git a/src/std/monad/state.ss b/src/std/monad/state.ss new file mode 100644 index 000000000..ad5ff8984 --- /dev/null +++ b/src/std/monad/state.ss @@ -0,0 +1,69 @@ +(import ../instance #;"../instance" + ./interface #;"interface" + ./syntax #;"syntax" + ./identity #;"identity" + :std/interface :std/srfi/1) +(export #t) + +(defstruct state (e)) + +(instance (m Monad) state + ((return a) (lambda (s) [a . s])) + ((>>= ma f) (lambda (s) (with ((cons v new-state) (ma s)) ((f v) new-state))))) + +(instance (r Run) (s state) + ((run m . _) (m s.e))) + +(instance State state + ((get) (lambda (s) [s . s])) + ((put! new) (lambda (s) [s . new]))) + + +(instance (m MonadState) state + ((state f) + (m.>>= (m.get) + (lambda (s) + (with ((cons a _s) (f s)) + (m.>> (m.put! _s) + (m.return a))))))) + + +(defstruct stateT (inner)) + +(instance (m Monad) (st stateT) + ((return a) + (using (inner st.inner : Monad) (lambda (s) (inner.return [a . s])))) + ((>>= ma f) + (using (inner st.inner : Monad) + (lambda (s) + (du inner + pair <- (ma s) + (with ((cons v s!) pair) ((f v) s!))))))) + +(instance Run (st stateT) + ((run mv (state (void))) (mv state))) +(instance State (st stateT) + ((get) (lambda (s) (du (m st.inner : Monad) (m.return [s . s])))) + ((put! s!) (lambda (s) (du (m st.inner : Monad) (m.return [s . s!]))))) +(instance MonadState (st stateT) + ((state f) (using (m st.inner : Monad) (lambda (s) (let (ret (f s)) (m.return ret)))))) + +(instance Or (st stateT) + ((or x y) (lambda (s) (du (inner st.inner : Or) + (inner.or (x s) (y s)))))) + (instance Plus (st stateT) + ((plus x y) (lambda (s) (du (inner st.inner : Plus) + (inner.plus (x s) (y s)))))) + (instance Zero (st stateT) + ((zero) (lambda (s) (du (inner st.inner : Zero) + (inner.zero))))) + (instance Fail (st stateT) + ((fail) (lambda (s) (du (inner st.inner : Fail) + (inner.fail))))) + + +(instance MonadTrans (st stateT) + ((lift c) (lambda (s) + (du (inner st.inner : Monad) + x <- c + (inner.return [x . s]))))) diff --git a/src/std/monad/syntax.ss b/src/std/monad/syntax.ss new file mode 100644 index 000000000..e4a88755e --- /dev/null +++ b/src/std/monad/syntax.ss @@ -0,0 +1,33 @@ +(import :std/contract) +(export #t) + +(defsyntax (du stx) + (def (expand-bind id stx) + (with-syntax* ((id id) + (bind (stx-identifier #'id #'id ".>>=")) + (seq (stx-identifier #'id #'id ".>>"))) + (syntax-case stx (<-) + ((var <- from body ... end) + #'(bind from (lambda (var) (du id body ... end)))) + ((>> body ... end) + #'(seq >> (du id body ... end))) + ((end) #'end)))) + + (syntax-case stx () + ((_ id body ...) + (identifier? #'id) + (with-syntax ((bindings (expand-bind #'id #'(body ...)))) + #'bindings)) + ((_ (id expr ~ Monad) body ...) + (and (identifier? #'id) + (identifier? #'~)) + #'(using (id expr ~ Monad) + (du id body ...))) + ((_ (id ~ Monad) body ...) + (and (identifier? #'id) + (identifier? #'~)) + #'(using (id ~ Monad) + (du id body ...))) + ((_ ((id this ...) rest ...) body ...) + #'(using ((id this ...) rest ...) + (du id body ...))))) From e9c49cfc8be6d06d3d61c8f04fdf4ad1972aa28f Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Fri, 16 Aug 2024 17:43:42 -0700 Subject: [PATCH 11/23] WIP: Update toc in monad readme --- src/std/monad/README.org | 84 +++++++++++++++++----------------------- 1 file changed, 36 insertions(+), 48 deletions(-) diff --git a/src/std/monad/README.org b/src/std/monad/README.org index e8c26986e..34fc7af10 100644 --- a/src/std/monad/README.org +++ b/src/std/monad/README.org @@ -23,56 +23,26 @@ attempt to Monadize Gerbil. - [[#fail-ure-is-an-option][Fail-ure IS an option]] - [[#the-maybe-test-type][The maybe test type]] - [[#fail-and-list-are-natural-friends][Fail and :list are natural friends]] -- [[#monadpluszeroor][Monad[Plus|Zero|Or]​]] -- [[#transformers-statet-redo-state-with-others-in-mind][Transformers! stateT Redo state with others in mind.]] +- [[#monadzeroorplus][MonadZeroOrPlus]] +- [[#transformers][Transformers]] + - [[#a-statet-for-wrapping-state][A stateT for wrapping state]] - [[#the-identity-identity-monad][The Identity identity monad]] - [[#the-monad-for-statet][The Monad for stateT]] - [[#run-it][Run it]] - [[#the-state-for-statet][The State for stateT]] -- [[#first-parser-manual-tranform][First Parser, manual tranform]] -- [[#hindleymilner-types-and-instances][Hindley–Milner type's and instance's]] -- [[#the-monad-interface][The Monad interface]] -- [[#the-identity-singleton][The Identity singleton.]] -- [[#the-list-singleton][The List singleton]] -- [[#monadfail-another-monad-interface][MonadFail, another monad interface.]] - - [[#empty-list-is-a-failure][Empty List is a failure.]] -- [[#the-interface-aka-typeclass-monad-definition][The interface AKA typeclass monad definition]] -- [[#the-type-form-for-a-singleton][The type form for a singleton]] -- [[#state-and-statet-transformer][State and StateT Transformer]] -- [[#failure-is-always-an-option-the-maybe-monad][Failure is always an option! The Maybe monad]] -- [[#the-monaderror-exception-handling][The MonadError: exception handling]] -- [[#wrapping-with-errort][Wrapping with ErrorT]] -- [[#next-method-vs-inner][Next Method vs inner]] -- [[#general-monad-utility-functions][General Monad Utility Functions]] -- [[#the-monad-module][The Monad module]] -- [[#gerbil][Gerbil]] + - [[#the-zeroorplus-and-fail-transformations][The ZeroOrPlus and Fail transformations]] + - [[#how-to-lift-from-inner][How to lift from inner]] +- [[#monadic-parser-combinators---part-01][Monadic Parser Combinators - Part 0.1]] +- [[#catchthrow-equals-errorhandler-from-errort][Catch/Throw equals ErrorHandler from errorT]] + - [[#transform-state-to-errort][Transform State to errorT]] - [[#emacs][Emacs]] -- [[#append-me][Append me]] - - [[#type-class-aka-interface]["Type Class", AKA Interface]] - - [[#an-instance-of-an-type-class-is-a-struct-that-has-the-interface-described][An Instance of an "Type Class" is a Struct that has the Interface described.]] - - [[#the-identity-monad][The Identity Monad.]] -- [[#instance-syntax][Instance Syntax]] -- [[#syntax-test-repl][Syntax test-repl]] - - [[#the-standard-testing-library][The Standard Testing Library]] - - [[#test-suite][test-suite]] - - [[#test-case][test-case]] - - [[#check][check]] - - [[#checkf][checkf]] - - [[#check-eq][check-eq?]] - - [[#check-not-eq][check-not-eq?]] - - [[#check-eqv][check-eqv?]] - - [[#check-not-eqv][check-not-eqv?]] - - [[#check-equal][check-equal?]] - - [[#check-not-equal][check-not-equal?]] - - [[#check-output][check-output]] - - [[#check-predicate][check-predicate]] - - [[#check-exception][check-exception]] - - [[#file-interfacess][File interface.ss]] - - [[#file-identityss][File identity.ss]] - - [[#file-listss][File list.ss]] - - [[#file-statess][File state.ss]] - - [[#file-syntaxss][File syntax.ss]] - - [[#file-monadss][File ../monad.ss]] +- [[#file-interfacess][File interface.ss]] +- [[#file-identityss][File identity.ss]] +- [[#file-listss][File list.ss]] +- [[#file-statess][File state.ss]] +- [[#file-errorss][File error.ss]] +- [[#file-syntaxss][File syntax.ss]] +- [[#file-monadss][File ../monad.ss]] :END: @@ -499,9 +469,9 @@ concepts. #+end_src -* =Monad[Plus|Zero|Or]= +* =MonadZeroOrPlus= :PROPERTIES: -:CUSTOM_ID: monadpluszeroor +:CUSTOM_ID: monadzeroorplus :END: For a list there should be a way to add items. =MonadPlus= is just @@ -545,6 +515,9 @@ So a =:list= is of those three... #+end_src * Transformers +:PROPERTIES: +:CUSTOM_ID: transformers +:END: Different transformers may need to tranform one another or some such. Beyond that there's =lift=. @@ -556,7 +529,7 @@ such. Beyond that there's =lift=. ** A =stateT= for wrapping state :PROPERTIES: -:CUSTOM_ID: transformers-statet-redo-state-with-others-in-mind +:CUSTOM_ID: a-statet-for-wrapping-state :END: Previously, all the monad instances are separate. What if we want to @@ -675,6 +648,9 @@ And the =state= procedure. #+end_src ** The =ZeroOrPlus= and =Fail= transformations +:PROPERTIES: +:CUSTOM_ID: the-zeroorplus-and-fail-transformations +:END: Choices can be a big part of programming. @@ -718,6 +694,9 @@ Choices can be a big part of programming. #+end_src ** How to =lift= from inner +:PROPERTIES: +:CUSTOM_ID: how-to-lift-from-inner +:END: Just because we're tried to make the =stateT= wrap most monadic computations does not mean that we can trivially wrap any monadic @@ -745,6 +724,9 @@ So we can take a function for the inner monad and lift it up! #+end_src * Monadic Parser Combinators - Part 0.1 +:PROPERTIES: +:CUSTOM_ID: monadic-parser-combinators---part-01 +:END: I'm not at all sure how I came across this but after writing my first =Org Mode= parser, I found this quote: @@ -817,6 +799,9 @@ All we really need is a =char​=?= and we could have a parser! #+end_src * Catch/Throw equals =ErrorHandler= from =errorT= +:PROPERTIES: +:CUSTOM_ID: catchthrow-equals-errorhandler-from-errort +:END: We all know about throwing and catching errors. The =...Handler= is there to play nice with =:std/error= of course. @@ -887,6 +872,9 @@ Testing it makes it clear. #+end_src ** Transform =State= to =errorT= +:PROPERTIES: +:CUSTOM_ID: transform-state-to-errort +:END: This is after state in the train of thought so is defined here. From 1ca4e8a19a63c2c7dcaae5cb41a98449e6ac18f7 Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Sat, 17 Aug 2024 11:00:13 -0700 Subject: [PATCH 12/23] WIP: Monad tests should work with lib --- src/std/monad-test.ss | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/std/monad-test.ss b/src/std/monad-test.ss index 1ede35493..801c650fa 100644 --- a/src/std/monad-test.ss +++ b/src/std/monad-test.ss @@ -6,14 +6,13 @@ :std/error :std/interactive :srfi/13 - ; :std/instance - "instance" - "monad/interface" - "monad/identity" - "monad/list" - "monad/state" - "monad/syntax" - "monad/error" + :std/instance #;"instance" + :std/monad/interface #;"monad/interface" + :std/monad/identity #;"monad/identity" + :std/monad/list #;"monad/list" + :std/monad/state #;"monad/state" + :std/monad/syntax #;"monad/syntax" + :std/monad/error #;"monad/error" (only-in :std/sugar hash try) (only-in :gerbil/core error-object? with-catch)) (export monad-test) From c604a66d0d2a2b595d77cd9d4faf25b3fdd0617f Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Sat, 17 Aug 2024 12:28:26 -0700 Subject: [PATCH 13/23] WIP: Remove uneeded build --- src/std/monad/build.ss | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100755 src/std/monad/build.ss diff --git a/src/std/monad/build.ss b/src/std/monad/build.ss deleted file mode 100755 index de5be0270..000000000 --- a/src/std/monad/build.ss +++ /dev/null @@ -1,16 +0,0 @@ -#!/usr/bin/env gxi -;;; -*- Gerbil -*- -(import :std/build-script) - -(defbuild-script - '("instance" - "monad/syntax" - "monad/interface" - "monad/util" - "monad/Identity" - "monad/List" - "monad/State" - "Monad/Error" - - "monad" - )) From 0f3a3174f77fc101069da1bc4467d72b0b40312f Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Thu, 22 Aug 2024 11:09:46 -0700 Subject: [PATCH 14/23] WIP: Monad tests pass FFS! --- src/std/monad-test.ss | 14 +++++++------- src/std/monad/README.org | 35 ++++++++++++++++++++--------------- src/std/monad/error.ss | 6 ++++++ src/std/monad/state.ss | 12 ++++++------ 4 files changed, 39 insertions(+), 28 deletions(-) diff --git a/src/std/monad-test.ss b/src/std/monad-test.ss index 801c650fa..a9d10ad2a 100644 --- a/src/std/monad-test.ss +++ b/src/std/monad-test.ss @@ -6,13 +6,13 @@ :std/error :std/interactive :srfi/13 - :std/instance #;"instance" - :std/monad/interface #;"monad/interface" - :std/monad/identity #;"monad/identity" - :std/monad/list #;"monad/list" - :std/monad/state #;"monad/state" - :std/monad/syntax #;"monad/syntax" - :std/monad/error #;"monad/error" + :std/instance + :std/monad/interface + :std/monad/identity + :std/monad/list + :std/monad/state + :std/monad/syntax + :std/monad/error (only-in :std/sugar hash try) (only-in :gerbil/core error-object? with-catch)) (export monad-test) diff --git a/src/std/monad/README.org b/src/std/monad/README.org index 34fc7af10..6449f3415 100644 --- a/src/std/monad/README.org +++ b/src/std/monad/README.org @@ -568,13 +568,13 @@ Even though, or perhaps because =:t= does "nothing" there is a simple (instance (m Monad) (st stateT) ((return a) - (using (inner st.inner : Monad) (lambda (s) (inner.return [a . s])))) + (using (inner st.inner : Monad) (lambda (s) (inner.return [a . s])))) ((>>= ma f) - (using (inner st.inner : Monad) - (lambda (s) - (du inner - pair <- (ma s) - (with ((cons v s!) pair) ((f v) s!))))))) + (using (inner st.inner : Monad) + (lambda (s) + (du inner + pair <- (ma s) + (with ((cons v s!) pair) ((f v) s!))))))) #+end_src @@ -839,7 +839,13 @@ Making the handler is easy. (du (inner et.inner :- Monad) val <- exp (if (me.error? val) (handler val) (inner.return val))))) - + + (instance Fail (et errorT) ((fail) (using (i et.inner : Fail) (i.fail)))) + (instance Or (et errorT) ((or a b) (using (i et.inner : Or) (i.or a b)))) + (instance Plus (et errorT) ((plus a b) (using (i et.inner : Plus) (i.plus a b)))) + (instance Run (et errorT) ((run fn arg) (using (i et.inner : Run) (i.run fn arg)))) + (instance Zero (et errorT) ((zero) (using (i et.inner : Zero) (i.zero)))) + #+end_src Testing it makes it clear. @@ -1107,14 +1113,13 @@ This is after state in the train of thought so is defined here. :std/error :std/interactive :srfi/13 - ; :std/instance - "instance" - "monad/interface" - "monad/identity" - "monad/list" - "monad/state" - "monad/syntax" - "monad/error" + :std/instance + :std/monad/interface + :std/monad/identity + :std/monad/list + :std/monad/state + :std/monad/syntax + :std/monad/error (only-in :std/sugar hash try) (only-in :gerbil/core error-object? with-catch)) (export monad-test) diff --git a/src/std/monad/error.ss b/src/std/monad/error.ss index d6dd7dcee..123e4024b 100644 --- a/src/std/monad/error.ss +++ b/src/std/monad/error.ss @@ -27,6 +27,12 @@ val <- exp (if (me.error? val) (handler val) (inner.return val))))) +(instance Fail (et errorT) ((fail) (using (i et.inner : Fail) (i.fail)))) +(instance Or (et errorT) ((or a b) (using (i et.inner : Or) (i.or a b)))) +(instance Plus (et errorT) ((plus a b) (using (i et.inner : Plus) (i.plus a b)))) +(instance Run (et errorT) ((run fn arg) (using (i et.inner : Run) (i.run fn arg)))) +(instance Zero (et errorT) ((zero) (using (i et.inner : Zero) (i.zero)))) + (instance MonadState (et errorT) ((get) (du (inner et.inner :- MonadState) (inner.get))) ((put! s) (du (inner et.inner :- MonadState) (inner.put! s))) diff --git a/src/std/monad/state.ss b/src/std/monad/state.ss index ad5ff8984..02890c174 100644 --- a/src/std/monad/state.ss +++ b/src/std/monad/state.ss @@ -32,13 +32,13 @@ (instance (m Monad) (st stateT) ((return a) - (using (inner st.inner : Monad) (lambda (s) (inner.return [a . s])))) + (using (inner st.inner : Monad) (lambda (s) (inner.return [a . s])))) ((>>= ma f) - (using (inner st.inner : Monad) - (lambda (s) - (du inner - pair <- (ma s) - (with ((cons v s!) pair) ((f v) s!))))))) + (using (inner st.inner : Monad) + (lambda (s) + (du inner + pair <- (ma s) + (with ((cons v s!) pair) ((f v) s!))))))) (instance Run (st stateT) ((run mv (state (void))) (mv state))) From a198b5d09a638a201d55017fa20b4b4c03f2907a Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Thu, 22 Aug 2024 12:51:32 -0700 Subject: [PATCH 15/23] WIP: Does rebind fix tests? --- src/std/monad/README.org | 3 ++- src/std/monad/interface.ss | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/std/monad/README.org b/src/std/monad/README.org index 6449f3415..76d473d5d 100644 --- a/src/std/monad/README.org +++ b/src/std/monad/README.org @@ -105,7 +105,8 @@ definition. (instance (m Monad) :t ((return a) a) ((>>= ma f) (f ma)) - ((>> ma mb) (m.>>= ma (lambda (_) mb))))) + ((>> ma mb) (m.>>= ma (lambda (_) mb)))) + rebind: #t) #+end_src Essentially that's that! a monad is just an abstraction around those diff --git a/src/std/monad/interface.ss b/src/std/monad/interface.ss index a87bd77c6..ffd989ca4 100644 --- a/src/std/monad/interface.ss +++ b/src/std/monad/interface.ss @@ -12,7 +12,8 @@ (instance (m Monad) :t ((return a) a) ((>>= ma f) (f ma)) - ((>> ma mb) (m.>>= ma (lambda (_) mb))))) + ((>> ma mb) (m.>>= ma (lambda (_) mb)))) + rebind: #t) (interface Run (run . args)) From 72a2e1f28b02e35a3c5f94c3ae2fe1bd533d61a9 Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Thu, 22 Aug 2024 17:06:50 -0700 Subject: [PATCH 16/23] Rebind in the right place, no :- where type is anything --- src/std/monad.ss | 28 +++++++++++++--------------- src/std/monad/README.org | 22 +++++++++++----------- src/std/monad/error.ss | 18 +++++++++--------- src/std/monad/interface.ss | 4 ++-- 4 files changed, 35 insertions(+), 37 deletions(-) diff --git a/src/std/monad.ss b/src/std/monad.ss index ff1d5683b..bc14f423d 100644 --- a/src/std/monad.ss +++ b/src/std/monad.ss @@ -1,16 +1,14 @@ (import - ./monad/interface - ./monad/identity - ./monad/list - ./monad/state - ./monad/syntax - ./monad/error) - (export - (import: - ./monad/interface) - ;; ./monad/identity - ;; ./monad/list - ;; ./monad/state - ;; ./monad/syntax - ;; ./monad/error -) + ./monad/interface + ./monad/identity + ./monad/list + ./monad/state + ./monad/syntax + ./monad/error) +(export + (import: ./monad/interface) + (import: ./monad/identity) + (import: ./monad/list) + (import: ./monad/state) + (import: ./monad/syntax) + (import: ./monad/error)) diff --git a/src/std/monad/README.org b/src/std/monad/README.org index 76d473d5d..123c649d7 100644 --- a/src/std/monad/README.org +++ b/src/std/monad/README.org @@ -105,8 +105,8 @@ definition. (instance (m Monad) :t ((return a) a) ((>>= ma f) (f ma)) - ((>> ma mb) (m.>>= ma (lambda (_) mb)))) - rebind: #t) + ((>> ma mb) (m.>>= ma (lambda (_) mb))) + rebind: #t)) #+end_src Essentially that's that! a monad is just an abstraction around those @@ -828,16 +828,16 @@ Making the handler is easy. #+begin_src scheme :noweb-ref errorT-struct (instance (me MonadError) (et errorT) - ((return a) (du (inner et.inner :- Monad) (inner.return a))) - ((>>= ma f) (du (inner et.inner :- Monad) + ((return a) (du (inner et.inner : Monad) (inner.return a))) + ((>>= ma f) (du (inner et.inner : Monad) a <- ma (if (me.error? a) (inner.return a) (f a)))) ((error? thing) (Error? thing)) ((throw msg . irritants) - (du (inner et.inner :- Monad) + (du (inner et.inner : Monad) (inner.return (Error msg irritants: irritants)))) ((catch exp handler) - (du (inner et.inner :- Monad) + (du (inner et.inner : Monad) val <- exp (if (me.error? val) (handler val) (inner.return val))))) @@ -887,18 +887,18 @@ This is after state in the train of thought so is defined here. #+begin_src scheme :noweb-ref errorT-struct (instance MonadState (et errorT) - ((get) (du (inner et.inner :- MonadState) (inner.get))) - ((put! s) (du (inner et.inner :- MonadState) (inner.put! s))) - ((state f)(du (inner et.inner :- MonadState) (inner.state f)))) + ((get) (du (inner et.inner : MonadState) (inner.get))) + ((put! s) (du (inner et.inner : MonadState) (inner.put! s))) + ((state f)(du (inner et.inner : MonadState) (inner.state f)))) (instance MonadError (st stateT) ((error? e?) (lambda (s) - (du (inner st.inner :- MonadError) + (du (inner st.inner : MonadError) [(inner.error? e?) s ...]))) ((throw message . args) (lambda (s) [ (apply MonadError-throw st.inner message args) s ...])) - ((catch exp handler) (lambda (s) (du (inner st.inner :- MonadError) + ((catch exp handler) (lambda (s) (du (inner st.inner : MonadError) [(inner.catch exp handler) s ...])))) #+end_src diff --git a/src/std/monad/error.ss b/src/std/monad/error.ss index 123e4024b..8b461bbc8 100644 --- a/src/std/monad/error.ss +++ b/src/std/monad/error.ss @@ -14,16 +14,16 @@ (set! self.inner inner))) (instance (me MonadError) (et errorT) - ((return a) (du (inner et.inner :- Monad) (inner.return a))) - ((>>= ma f) (du (inner et.inner :- Monad) + ((return a) (du (inner et.inner : Monad) (inner.return a))) + ((>>= ma f) (du (inner et.inner : Monad) a <- ma (if (me.error? a) (inner.return a) (f a)))) ((error? thing) (Error? thing)) ((throw msg . irritants) - (du (inner et.inner :- Monad) + (du (inner et.inner : Monad) (inner.return (Error msg irritants: irritants)))) ((catch exp handler) - (du (inner et.inner :- Monad) + (du (inner et.inner : Monad) val <- exp (if (me.error? val) (handler val) (inner.return val))))) @@ -34,16 +34,16 @@ (instance Zero (et errorT) ((zero) (using (i et.inner : Zero) (i.zero)))) (instance MonadState (et errorT) - ((get) (du (inner et.inner :- MonadState) (inner.get))) - ((put! s) (du (inner et.inner :- MonadState) (inner.put! s))) - ((state f)(du (inner et.inner :- MonadState) (inner.state f)))) + ((get) (du (inner et.inner : MonadState) (inner.get))) + ((put! s) (du (inner et.inner : MonadState) (inner.put! s))) + ((state f)(du (inner et.inner : MonadState) (inner.state f)))) (instance MonadError (st stateT) ((error? e?) (lambda (s) - (du (inner st.inner :- MonadError) + (du (inner st.inner : MonadError) [(inner.error? e?) s ...]))) ((throw message . args) (lambda (s) [ (apply MonadError-throw st.inner message args) s ...])) - ((catch exp handler) (lambda (s) (du (inner st.inner :- MonadError) + ((catch exp handler) (lambda (s) (du (inner st.inner : MonadError) [(inner.catch exp handler) s ...])))) diff --git a/src/std/monad/interface.ss b/src/std/monad/interface.ss index ffd989ca4..ab7e491b9 100644 --- a/src/std/monad/interface.ss +++ b/src/std/monad/interface.ss @@ -12,8 +12,8 @@ (instance (m Monad) :t ((return a) a) ((>>= ma f) (f ma)) - ((>> ma mb) (m.>>= ma (lambda (_) mb)))) - rebind: #t) + ((>> ma mb) (m.>>= ma (lambda (_) mb))) + rebind: #t)) (interface Run (run . args)) From 667855b47994cf24a70474b6587bace77a1e8e8f Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Sat, 31 Aug 2024 00:46:42 -0700 Subject: [PATCH 17/23] WIP: Major start on Parsec! --- src/std/parsec-test.ss | 320 ++++++++++ src/std/parsec/README.org | 1134 +++++++++++++++++++++++++++++++++ src/std/parsec/char.ss | 38 ++ src/std/parsec/combinators.ss | 78 +++ src/std/parsec/stream.ss | 169 +++++ src/std/parsec/syntax.ss | 34 + src/std/parsec/transformer.ss | 85 +++ 7 files changed, 1858 insertions(+) create mode 100644 src/std/parsec-test.ss create mode 100644 src/std/parsec/README.org create mode 100644 src/std/parsec/char.ss create mode 100644 src/std/parsec/combinators.ss create mode 100644 src/std/parsec/stream.ss create mode 100644 src/std/parsec/syntax.ss create mode 100644 src/std/parsec/transformer.ss diff --git a/src/std/parsec-test.ss b/src/std/parsec-test.ss new file mode 100644 index 000000000..2ff4892ba --- /dev/null +++ b/src/std/parsec-test.ss @@ -0,0 +1,320 @@ +;;; -*- Gerbil -*- +;;; (C) me at drewc.ca +;;; :std/parsec unit-tests + +(import :std/test + :std/error + :std/interactive + :srfi/13 + :std/parser/stream + :std/parser/base + :std/monad/interface + ;;:std/monad/error + :std/monad/list + :std/monad/syntax + "instance" + "parsec/stream" + "parsec/transformer" + "parsec/combinators" + "parsec/char" + "parsec/syntax" + (only-in :std/sugar hash try) + (only-in :gerbil/core error-object? with-catch)) +(export parsec-test) +(begin-foreign (include "~~lib/_gambit#.scm")) +(defsyntax (test-inline stx) + (syntax-case stx (>) + ((_ test-case: name rest ...) + #'(test-case name (test-inline rest ...))) + ((_ > form > rest ...) + #'(begin (displayln "... " 'form) form (test-inline > rest ...))) + ((_ > test result rest ...) + #'(begin (check test => 'result) (test-inline rest ...))) + ((_) #!void))) + +(set-test-verbose! #t) + +(def parsec-test + (test-suite "Test :std/parsec" + (test-inline + test-case: "Char Reader tests" + > (def rdr (open-input-string "42!")) + > (CharReader-peek-char rdr) + #\4 + > (using (rdr : CharReader) + (let ((one (rdr.read-char)) + (two (rdr.read-char))) + (string->number (list->string [one two])))) + 42 + > (CharReader-read-char rdr) + #\! + > (CharReader-read-char rdr) + #!eof + ) + + (test-inline + test-case: "Location tests" + > (interface (testLoc CharReader Location)) + > (def rdr (open-input-string "42\n!")) + > (using (l rdr : Location) (location-line (l.location))) + 0 + > (testLoc-read-char rdr) + #\4 + > (using ((r rdr : testLoc) + (loc (r.location) : location)) + loc.xoff) + 1 + > (using ((r rdr : testLoc) + (loc (r.location) : location)) + (let* ((a (r.read-char)) + (l0 loc.line) + (off1 (r.xoff)) + (c2 (location-col (r.location))) + (b (r.read-char)) + (l1 (location-line (r.location))) + (c (r.peek-char)) + (_ (r.read-char)) + (off2 (r.xoff)) + (eof (r.read-char)) + (off3 (r.xoff))) + + [a off1 l0 c2 b l1 c off2 eof off3])) + (#\2 2 0 2 #\newline 1 #\! 4 #!eof 4) + + ) + (test-inline + test-case: "Token tests" + > (interface (testTok Token Location)) + > (def port (open-input-string "(def ltuae 42)")) + + > (testTok-xoff port) + 0 + > (testTok-token port char-alphabetic?) + #f + > (testTok-xoff port) + 0 + > (testTok-token port (? (or char-alphabetic? char-numeric?))) + #f + > (testTok-token port (? (not (or char-alphabetic? char-numeric?)))) + #\( + > (testTok-xoff port) + 1 + ) + + + (test-inline + test-case: "Stream tests" + > (def port (open-input-string "42\n is the answer")) + > (def stream (make-buffered-char-reader port)) + > [(Location-xoff port) (Location-xoff stream)] + (0 0) + > (location-col (Location-location stream)) + 0 + > (CharReader-peek-char stream) + #\4 + > [(Location-xoff port) (Location-xoff stream)] + (0 0) + > (CharReader-read-char stream) + #\4 + > [(Location-xoff port) (Location-xoff stream)] + (1 1) + > (BufferedCharReader-put-back stream #\4) + > [(Location-xoff port) (Location-xoff stream)] + (1 0) + + > (Token-token stream) + #\4 + > (Token-token stream char-numeric?) + #\2 + > [(Location-xoff port) (Location-xoff stream)] + (2 2) + > (Token-token stream) + #\newline + > (using (stream :- buffered-char-reader) stream.lines) + (3) + > (Token-token stream) + #\space + > (location-line (Location-location stream)) + 1 + > (location-col (Location-location stream)) + 1 + > (Token-token stream) + #\i + > (location-col (Location-location stream)) + 2 + > (using (stream : BufferedCharReader) + (stream.put-back #\f) + (stream.put-back #\f)) + > (location-line (Location-location stream)) + 0 + > (location-col (Location-location stream)) + 1 + + ) + (test-inline + test-case: "Lookahead Stream tests" + > (def port (open-input-string "42\n is the answer")) + > (def bstream (make-buffered-char-reader port)) + > (def stream (make-lookahead-char-stream bstream)) + + > (lookahead-char-stream-lo stream) + 0 + > (buffered-char-reader-hi bstream) + 0 + > (Token-token stream) + #\4 + > (Token-token (make-lookahead-char-stream bstream)) + #\4 + > (lookahead-char-stream-lo stream) + 1 + > (Token-token stream) + #\2 + > (Token-token (make-lookahead-char-stream bstream)) + #\4 + > (Location-xoff stream) + 2 + > (Location-xoff bstream) + 0 + > (Token-token bstream) + #\4 + ) + (test-inline + test-case: "ParsecT tests" + > (defrule (use p body ...) (using (p (make-parsecT) : ParsecT) body ...)) + > (with ([[ret . state]] (use p (p.run (p.return 42) ""))) + (check-eqv? ret 42) + (Location-xoff state)) + 0 + > (map car (use p (p.run (p.return 42) "as"))) + (42) + > (map car (use p (p.run (p.plus (p.return 42) (p.return 42)) "42"))) + (42 42) + > (map car (use p (p.run (p.or (p.return 42) (p.return 42)) "42"))) + (42) + > (caar (use p (p.run (p.or (p.return 42) (p.return 43)) ""))) + 42 + > (caar (use p (p.run (p.or (p.>> (p.return 42) (p.fail)) (p.return 43)) ""))) + 43 + > (caar (use p (p.run (p.or (p.read-char) (p.return 43)) "heh"))) + #\h + > (use p (p.run (p.or (p.>> (p.read-char) (p.fail)) (p.return 43)) "heh")) + () + + ) + + (test-inline + test-case: "Combinator tests" + > (def current-parser (make-parameter (ParsecCombinators (make-parsecT)))) + > (defrule (parse id body ...) (using (id (current-parser) :- ParsecCombinators) body ...)) + > (def (test-or a b input) + (parse _ (_.run (_.or a b) input))) + > (caar (parse _ (test-or (_.any-token) (_.return 42) "asdf"))) + #\a + > (caar (parse _ (test-or (_.satisfy char-numeric?) (_.return 42) "asdf"))) + 42 + > (parse _ (test-or (du _ c <- (_.any-token) + (if (char-numeric? c) (_.return c) (_.zero))) + (_.return 42) + "fourty-two")) + () ;; null is one message that represent failure and what zero does by + ;; default + ;> (def current-parser (make-parameter (ParsecCombinators (make-parsecT)))) + > (defrule (u id body ...) (using (id (current-parser) :- ParsecCombinators) body ...)) + > (caar ((u t (let (la #f) + (t.or + (t.try (du t second <- (t.>> (t.any-token) (t.any-token)) + (begin (set! la second)(t.throw "This Failed")))) + (du t first <- (t.any-token) + (t.return [la first]))))) (open-input-string "asdf"))) + (#\s #\a) + > (def token + (du (_ (make-parsecT) : ParsecChar) + c <- (_.letter) + cs <- (_.many (_.or (_.letter) (_.char #\_))) + (_.return (list->string (cons c cs))))) + + > (caar (parse _ (_.run token "foo_bar bad"))) + "foo_bar" + > (caar (parse _ (_.run token "x+y"))) + "x" + + + + > (caar (parse _ (_.run (_.many (_.any-token)) "asd"))) + (#\a #\s #\d) + + > (caar (parse _ (_.run (_.many1 (_.any-token)) "asd"))) + (#\a #\s #\d) + + > (caar (parse _ (_.run (_.many-till (_.any-token) (_.satisfy (cut char=? <> #\:))) "asd:"))) + (#\a #\s #\d) + > (u parse (caar (parse.run (parse.any-token) "a"))) + #\a + > (u parse (parse.run (parse.any-token) "")) + () + > (u parse (caar (parse.run (parse.or (parse.any-token) (parse.return 42)) ""))) + 42 + + + > (caar (parse _ (_.run (_.satisfy char-numeric?) "42"))) + #\4 + > (parse _ (_.run (_.satisfy char-numeric?) "fourtwo")) + () + + + + ) + + (test-inline + test-case: "Dot tests" + > (caar (do-parse (.run (.return 42) ""))) + 42 + > (def-parse FourTwo (.char #\4) (.char #\2) (.return 42)) + > (caar (do-parse (.run FourTwo "42"))) + 42 + + ) + (test-inline + test-case: "Character Parsing tests" + > (caar (do-parse (.run (.string "asd") "asdfjkl;"))) + "asd" + > (caar (do-parse (.run (.string "asd" char-ci=?) "AsDfjkl;"))) + "AsD" + + ) + (test-inline + test-case: "Org Syntax Parsing tests" + > (def-parse EOL (.or (.eof) (.newline))) + + > (def-parse KEY + (.>> (.string "#+") + (.many-till + (.satisfy (? (not char-whitespace?))) + (.string ": ")))) + > (def-parse VALUE (.many-till (.any-token) EOL)) + + > (def-parse KEYWORD + key <- (.liftM list->string KEY) + value <- (.liftM list->string VALUE) + (.return ['keyword key: key value: value])) + + > (run-parser KEYWORD "#+TITLE: Org Mode keyword!") + (keyword key: "TITLE" value: "Org Mode keyword!") + + + + + + + + + + + ) + + + + + + + )) diff --git a/src/std/parsec/README.org b/src/std/parsec/README.org new file mode 100644 index 000000000..ea88c34bd --- /dev/null +++ b/src/std/parsec/README.org @@ -0,0 +1,1134 @@ + +#+TITLE: Parsec, a library for writing parsers +Recursive descent higher-order parser combinators? + +So a complicated parser can be made out of many smaller ones? + +Let's do it! + +* Cable of Toncents +:PROPERTIES: +:TOC: :include siblings :depth 5 :ignore (this) +:END: +:CONTENTS: + +:END: + + +* Introduction + +Parsing is such a common task that there are many different ways to go +about it. While =LL(1)= and =PEG= are the common and thus most +documented and implemented technique there are times when looking +ahead and falling back with =LL(k)= is nice. + +Sometimes being informal is easier, non? :) + +** Parsing Org Mode + +According to [[https://orgmode.org/worg/org-syntax.html][Org Syntax]] "Any Org document is represented by a sequence +of elements, that can recursively contain other elements and/or +objects". + +Here's a quick example... + +#+begin_src org + ,#+TITLE: This is an outline + ,#+this is not a keyword + This is the zeroth section + + ,* Heading 1 + This is the first section + ,**this is not a headline or section. + + This is an object: https://orgmode.org/worg/dev/org-element-api.html + + ,** Heading 1.1 + This is the first nested section + + ,* Heading 2 + + This is the end +#+end_src + +... with the first two lines helping to explain the exact reasoning +behind recursive descent and backtracking. + +The first line is a keyword. + + Keywords are structured according to the following pattern: + + =#+KEY: VALUE= + + - KEY :: A string consisting of any non-whitespace characters, other + than call (which would forms a babel call element). + - VALUE :: A string consisting of any characters but a newline. + +#+begin_src scheme :noweb-ref org-mode-parser-test + > (def-parse EOL (.or (.eof) (.newline))) + + > (def-parse KEY + (.>> (.string "#+") + (.many-till + (.satisfy (? (not char-whitespace?))) + (.string ": ")))) + > (def-parse VALUE (.many-till (.any-token) EOL)) + + > (def-parse KEYWORD + key <- (.liftM list->string KEY) + value <- (.liftM list->string VALUE) + (.return ['keyword key: key value: value])) + + > (run-parser KEYWORD "#+TITLE: Org Mode keyword!") + (keyword key: "TITLE" value: "Org Mode keyword!") + + + + + + + + + +#+end_src + + +* Emacs +:PROPERTIES: +:CUSTOM_ID: emacs +:END: + +#+begin_src emacs-lisp + (require 'gerbil-mode) + (gerbil-put-indent '(parse) 1) + (gerbil-put-indent '(def-parse) 1) +#+end_src + +* The dotted syntax + +Here's the thing. I want gerbil parsec to be popular and short form. + +#+begin_src scheme :noweb-ref dot-test + > (caar (do-parse (.run (.return 42) ""))) + 42 + > (def-parse FourTwo (.char #\4) (.char #\2) (.return 42)) + > (caar (do-parse (.run FourTwo "42"))) + 42 +#+end_src + +#+begin_src scheme :tangle "syntax.ss" + (import :std/monad/syntax :std/sugar + (for-syntax :gerbil/expander :std/sugar) + #;./transformer "transformer" + #;./mid-level "char") + (export #t) + (begin-syntax (def (dot-identifier? id) + (and (identifier? id) + (let (id-str (symbol->string (stx-e id))) + (eqv? (string-ref id-str 0) #\.))))) + + (defsyntax (do-parse stx) + (syntax-case stx () + ((_ parser ps ...) + (with-syntax ((@app (syntax-local-introduce '%%app)) + (@parser (syntax-local-introduce '%%parse))) + #'(let-syntax ((__app + (syntax-rules () + ((_ rator rand (... ...)) + (@app rator rand (... ...)))))) + (let-syntax ((@app + (lambda (stx) + (syntax-case stx () + ((_ rator . args) + (dot-identifier? #'rator) + (with-syntax ((method (stx-identifier #'rator '@parser #'rator))) + (syntax/loc stx + (method . args)))) + ((_ . args) + (syntax/loc stx + (__app . args))))))) + (du (@parser (current-parsec) : Parsec) + parser ps ...))))))) + + (defrule (def-parse id forms ...) (def id (do-parse forms ...))) +#+end_src + + +* Character Parsers + + +** =.letter= :: Parse a single letter + +Parses an alphabetic Unicode characters (lower-case, upper-case and +title-case letters, plus letters of caseless scripts and modifiers +letters according to =char-alphabetic?=). Returns the parsed +character. + +#+begin_src scheme :noweb-ref char-impl + ((letter) (P.satisfy char-alphabetic?)) +#+end_src + +** =.char= :: Parse a specific character + +#+begin_src scheme :noweb-ref char-impl + ((char c) (P.satisfy (cut char=? c <>))) +#+end_src + +** =.newline= :: + +#+begin_src scheme :noweb-ref char-impl + ((newline) (P.char #\newline)) +#+end_src + +** =.string= :: Parse and return a string + +There are times when we want to match against a string rather than +single characters. + +#+begin_src scheme :noweb-ref char-test + > (caar (do-parse (.run (.string "asd") "asdfjkl;"))) + "asd" +#+end_src + +Case is often not a concern so insensitive is sometimes prefered and not +frowed upon. + +#+begin_src scheme :noweb-ref char-test + > (caar (do-parse (.run (.string "asd" char-ci=?) "AsDfjkl;"))) + "AsD" +#+end_src + +#+begin_src scheme :noweb-ref char-impl + ((string str (c=? char=?)) + (def lst (if (list? str) str (string->list str))) + (def (pchars cs) + (if (null? cs) (P.return []) + (du P + x <- (P.satisfy (cut c=? <> (car cs))) + xs <- (pchars (cdr cs)) + (P.return (cons x xs))))) + (du P + cs <- (pchars lst) + (P.return (list->string cs)))) +#+end_src + +#+begin_src scheme :tangle char.ss :noweb yes + (import + :std/interface :std/contract :std/instance :std/monad/syntax + #;./transformer "transformer" + #;./stream "stream" + #;./combinators "combinators") + (export #t) + + (interface (ParsecChar ParsecCombinators) + (letter) (char c) (string str (c=? char=?)) (newline)) + + (instance (P ParsecChar) (pt parsecT) + <>) + + + ;; TODO: This is here for now but should be higher level. + + (interface (Parsec ParsecChar)) + + (def current-parsec (make-parameter (Parsec (make-parsecT)))) + + (def (run-parser p inp) + (using (P (current-parsec) : Parsec) + (let (res (P.run p inp)) + (if (null? res) res + (caar res))))) + + + +#+end_src + + +* Combinators + +These are the building blocks of parsers. + +#+begin_src scheme :noweb-ref test-parse-syntax + > (def current-parser (make-parameter (ParsecCombinators (make-parsecT)))) + > (defrule (parse id body ...) (using (id (current-parser) :- ParsecCombinators) body ...)) +#+end_src + + +** Predictive =or= :: a or b ? + +This combinator is a primitive that implements choice. The parser +~(p.or a b)~ first applies =a=. If it succeeds, the value of =a= is +returned. If p fails without consuming any input, parser =b= is +tried. + +This combinator is also a member of the =MonadOr= +interface. + +The parser is called predictive since =a= is only tried when parser p +didn't =read= anything but may have =peek='d. + +This non-backtracking behaviour allows for an efficient implementation +of the parser combinators as we are still =LL(1)=. + +#+begin_src scheme :noweb-ref comb-test + > (def (test-or a b input) + (parse _ (_.run (_.or a b) input))) + > (caar (parse _ (test-or (_.any-token) (_.return 42) "asdf"))) + #\a + > (caar (parse _ (test-or (_.satisfy char-numeric?) (_.return 42) "asdf"))) + 42 + > (parse _ (test-or (du _ c <- (_.any-token) + (if (char-numeric? c) (_.return c) (_.zero))) + (_.return 42) + "fourty-two")) + () ;; null is one message that represent failure and what zero does by + ;; default +#+end_src + +** TODO The =?= operator + +Sometimes where the parser fails and how it does so is not helpful +when trying to express that error to the user. + +So ~(_.? p "This is where it fails")~, for instance, will error with +that message if =p= fails /without consuming any input/. + + +** The =try= operation : LL(+inf) + +To make things efficient and "normal" by default Parsec is predictive +and non-backtracking AKA =LL(1)=. + +There's a number of reasons to be that way. Effeciancy and errors at +the "right place right time" for reporting, simple use of uncached +streams, etc. + +But occaisionally there's a need to look further ahead, consume input, +fail, and backtrack! + +#+begin_src scheme :noweb-ref comb-impl + ((try parser (Nothing (gensym))) + (def (ret-stream la (p? #t)) + (using ((la :- lookahead-char-stream) + (b la.bcr :- buffered-char-reader)) + (if (= b.lo b.hi) b.port b))) + (using (inner pt.inner : ParsecT-inner) + (du P + orig-stream <- (P.state (lambda (s) [s (make-lookahead-char-stream s) ...])) + ret <- (P.catch (inner.or parser (P.return [Nothing])) (lambda (e) (P.return [Nothing e ...]))) + new <- (P.state + (lambda (s) + (if (not (and (pair? ret) (eq? (car ret) Nothing))) + ;; success! + [ret orig-stream ...] + ;; failure + [Nothing (ret-stream s) ...]))) + (if (eq? new Nothing) + (if (pair? ret) (P.return (cdr ret)) (P.fail)) + (P.return new))))) + +#+end_src + +#+begin_src scheme :noweb-ref comb-test + ;> (def current-parser (make-parameter (ParsecCombinators (make-parsecT)))) + > (defrule (u id body ...) (using (id (current-parser) :- ParsecCombinators) body ...)) + > (caar ((u t (let (la #f) + (t.or + (t.try (du t second <- (t.>> (t.any-token) (t.any-token)) + (begin (set! la second)(t.throw "This Failed")))) + (du t first <- (t.any-token) + (t.return [la first]))))) (open-input-string "asdf"))) + (#\s #\a) +#+end_src + +** =many= :: parse zero or more times + +~(parse.many p)~ applies the parser p zero or more times. Returns a +list of the returned values of p. + +#+begin_src scheme :noweb-ref comb-test + > (def token + (du (_ (make-parsecT) : ParsecChar) + c <- (_.letter) + cs <- (_.many (_.or (_.letter) (_.char #\_))) + (_.return (list->string (cons c cs))))) + + > (caar (parse _ (_.run token "foo_bar bad"))) + "foo_bar" + > (caar (parse _ (_.run token "x+y"))) + "x" + + + +#+end_src + +#+begin_src scheme :noweb-ref manyAccum + (def (manyAccum P p) + (using (P : ParsecCombinators) + (def (parse?) + (du P + soff <- (P.xoff) + ret <- (P.or p (P.return Nothing)) + (if (Nothing? ret) (P.return ret) + (du P eoff <- (P.xoff) + (if (= soff eoff) + (P.fail "combinator 'many' is applied to a parser that does not consume") + (P.return ret)))))) + (def (recur) + (du P kar <- (parse?) + (if (Nothing? kar) (P.return '()) + (du P kdr <- (recur) + (P.return (cons kar kdr)))))) + (recur))) + +#+end_src + +#+begin_src scheme :noweb-ref comb-test + > (caar (parse _ (_.run (_.many (_.any-token)) "asd"))) + (#\a #\s #\d) + +#+end_src +#+begin_src scheme :noweb-ref comb-impl + ((many p) (manyAccum P p)) +#+end_src + +** =many1= :: Many, but at least one +#+begin_src scheme :noweb-ref comb-impl + ((many1 p) (du P + x <- p + xs <- (manyAccum P p) + (P.return (cons x xs)))) +#+end_src + +#+begin_src scheme :noweb-ref comb-test + > (caar (parse _ (_.run (_.many1 (_.any-token)) "asd"))) + (#\a #\s #\d) + +#+end_src + +** =many-till= + +#+begin_src scheme :noweb-ref comb-test + > (caar (parse _ (_.run (_.many-till (_.any-token) (_.satisfy (cut char=? <> #\:))) "asd:"))) + (#\a #\s #\d) +#+end_src + +#+begin_src scheme :noweb-ref comb-impl + ((many-till p end) + (def scan + (P.or (P.>> end (P.return [])) + (du P + x <- p + xs <- scan + (P.return (cons x xs))))) + + scan) + +#+end_src + + +** =.eof= + +#+begin_src scheme :noweb-ref comb-impl + ((eof) + (du P tok? <- (P.token eof-object? Nothing: Nothing) + (if (Nothing? tok?) (P.zero) (P.return tok?)))) +#+end_src + + +** =any-token= => + +The parser =any-token= accepts any kind of token that is not an +=eof-object?=. It returns the token. + +#+begin_src scheme :noweb-ref comb-impl + ((any-token (Nothing #f)) + (du P tok? <- (P.token (? (not eof-object?))) + (if (eq? tok? Nothing) (P.zero) (P.return tok?)))) +#+end_src + +#+begin_src scheme :noweb-ref comb-test + > (u parse (caar (parse.run (parse.any-token) "a"))) + #\a + > (u parse (parse.run (parse.any-token) "")) + () + > (u parse (caar (parse.run (parse.or (parse.any-token) (parse.return 42)) ""))) + 42 + + +#+end_src + +** =satisfy f= => or fail + +The parser =satisfy= accepts any kind of token that passes the =f= +predicate. + +#+begin_src scheme :noweb-ref comb-impl + ((satisfy f) + (du P tok? <- (P.token f Nothing: Nothing) + (if (eq? tok? Nothing) (P.zero) (P.return tok?)))) +#+end_src + +#+begin_src scheme :noweb-ref comb-test + > (caar (parse _ (_.run (_.satisfy char-numeric?) "42"))) + #\4 + > (parse _ (_.run (_.satisfy char-numeric?) "fourtwo")) + () + + +#+end_src + +#+begin_src scheme :tangle combinators.ss :noweb yes + (import + :std/interface :std/contract :std/instance :std/monad/syntax + :std/sugar + #;./transformer "transformer" + #;./stream "stream") + (export #t) + (def Nothing (gensym)) + (def (Nothing? t) (eq? Nothing t)) + (interface (ParsecCombinators ParsecT) + (satisfy f) (eof) + (any-token) + (try p) + (many p) (many1 p) (many-till p end)) + + <> + + (instance (P ParsecCombinators) (pt parsecT) + <>) +#+end_src + + +* A =[P|p]arsecT= transformer + +The =Parsec= concept is a =Monad= so we must transform into that. + +Essentially, =ParsecT= takes our streams and unites them with a monad. + + + +** =only-at xoff= for =.or= and =.plus= + +The only time the second parser can run is if the first parser fails +AND the first parser does not consume any characters. + +#+begin_src scheme :noweb-ref parsect-only-at-test + > (defrule (use p body ...) (using (p (ParsecT (make-parsecT)) :- ParsecT) body ...)) + > (map car (use p (p.run (p.plus (p.return 42) (p.return 43)) ""))) + (42 43) + + > (map car (use p (p.run (p.plus (p.>> (p.return 42) (p.fail)) (p.return 43)) ""))) + (43) + > (map car (use p (p.run (p.plus (p.read-char) (p.return 43)) "heh"))) + > (map car (use p (p.run (p.or (p.return 42) (p.return 42)) "42"))) + (42) + > (caar (use p (p.run (p.or (p.return 42) (p.return 43)) ""))) + 42 + > (caar (use p (p.run (p.or (p.>> (p.return 42) (p.fail)) (p.return 43)) ""))) + 43 + > (caar (use p (p.run (p.or (p.read-char) (p.return 43)) "heh"))) + #\h + > (use p (p.run (p.or (p.>> (p.read-char) (p.fail)) (p.return 43)) "heh")) + () + +#+end_src +*** The source code +#+begin_src scheme :noweb-ref only-at-or-plus + ((only-at xoff parser) + (du P yoff <- (P.xoff) + (if (= yoff xoff) + parser + (P.zero)))) + ((or a b (Nothing (gensym))) + (using (inner pt.inner :- ParsecT-inner) + (du P + soff <- (P.xoff) + ret <- (P.catch (inner.or a (P.return Nothing)) (lambda (e) (P.return Nothing))) + (begin #;(displayln "In or, a was" ret " xoff " soff) + (if (eq? ret Nothing) (P.only-at soff b) (P.return ret)))))) + ((plus a b) (using (inner pt.inner :- ParsecT-inner) + (du P + soff <- (P.xoff) + (inner.plus a (P.only-at soff b))))) +#+end_src + +** Interface and instance +#+begin_src scheme :noweb-ref ParsecT :noweb yes + + (instance (me MonadError) (et errorT) + ((return a) (du (inner et.inner : Monad) (inner.return a))) + ((>>= ma f) (du (inner et.inner : Monad) + a <- ma + (if (me.error? a) (inner.return a) (f a)))) + ((error? thing) (Error? thing)) + ((throw msg . irritants) + (du (inner et.inner : Monad) + (inner.return (Error msg irritants: irritants)))) + ((catch exp handler) + (du (inner et.inner : Monad) + val <- exp + ret <- (if (me.error? val) + (handler val) + (inner.return val)) + (inner.return ret))) + rebind: #t) + + + + (interface (ParsecT-inner MonadState ErrorHandler Zero Or Plus Fail)) + (interface (ParsecT CharReader Token Location ParsecT-inner) + (only-at xoff parser) (liftM fn . args)) + (defstruct parsecT (inner) constructor: :init!) + (defmethod {:init! parsecT} + (lambda (self (inner (ParsecT-inner (make-errorT (make-stateT []))))) + (struct-instance-init! self inner))) + + (instance (P ParsecT) (pt parsecT) + ((return a) (using (inner pt.inner :- ParsecT-inner) (inner.return a))) + ((>>= ma f) (using (inner pt.inner :- ParsecT-inner) (inner.>>= ma f))) + ((liftM fn ma) + (using (inner pt.inner :- ParsecT-inner) + (du inner x1 <- ma (inner.return (fn x1))))) + ((fail (msg "ParserError") . irritants) + (apply ParsecT-throw P msg irritants)) + ((get) (using (inner pt.inner :- ParsecT-inner) (inner.get))) + ((put! s) (using (inner pt.inner :- ParsecT-inner) (inner.put! s))) + ((run fn s) (using (inner pt.inner :- ParsecT-inner) + (def state (if (string? s) (open-input-string s) s)) + (inner.run fn state))) + ((state f) (using (inner pt.inner :- ParsecT-inner) (inner.state f))) + ((location) + (du P stream <- (P.get) + (P.return (Location-location stream)))) + ((xoff) (P.>>= (P.get) (lambda (stream) + (P.return (Location-xoff stream))))) + ((peek-char) (du P stream <- (P.get) (P.return (CharReader-peek-char stream)))) + ((read-char) (du P stream <- (P.get) (P.return (CharReader-read-char stream)))) + ((token (test identity) Nothing: (Nothing #f) . args) + (du P tok? <- (P.peek-char) + (if (not (test tok?)) (P.return Nothing) + (P.read-char)))) + <> + ((zero) (using (inner pt.inner :- ParsecT-inner) (inner.zero))) + ((catch e h) (using (inner pt.inner :- ParsecT-inner) (inner.catch e h))) + ((throw msg . irritants) (apply ParsecT-inner-throw pt.inner msg irritants)) + ((error? e?) (using (inner pt.inner :- ParsecT-inner) (inner.error? e?)))) +#+end_src + + +#+begin_src scheme :noweb-ref parsect-test + > (defrule (use p body ...) (using (p (make-parsecT) : ParsecT) body ...)) + > (with ([[ret . state]] (use p (p.run (p.return 42) ""))) + (check-eqv? ret 42) + (Location-xoff state)) + 0 + > (map car (use p (p.run (p.return 42) "as"))) + (42) + > (map car (use p (p.run (p.plus (p.return 42) (p.return 42)) "42"))) + (42 42) + > (map car (use p (p.run (p.or (p.return 42) (p.return 42)) "42"))) + (42) + > (caar (use p (p.run (p.or (p.return 42) (p.return 43)) ""))) + 42 + > (caar (use p (p.run (p.or (p.>> (p.return 42) (p.fail)) (p.return 43)) ""))) + 43 + > (caar (use p (p.run (p.or (p.read-char) (p.return 43)) "heh"))) + #\h + > (use p (p.run (p.or (p.>> (p.read-char) (p.fail)) (p.return 43)) "heh")) + () + +#+end_src + + +** /File/ parsec.ss + +#+begin_src scheme :tangle transformer.ss :noweb yes + (import :std/monad/error + :std/error + :std/monad/state + :std/monad/list + :std/monad/interface + :std/monad/syntax + :std/interface + :std/instance + #;./stream "stream") + (export #t (import: :std/monad/state)) + + <> +#+end_src + +* Streams + +** The minimal =CharReader= interface + +All parsers start with this front end. + +#+begin_src scheme :noweb-ref CharReader + (interface CharReader (peek-char) (read-char)) + (interface (BufferedCharReader CharReader) (put-back previous-input)) +#+end_src + +A character port is all we need to start. +#+begin_src scheme :noweb-ref CharReader + (instance CharReader (p :character-port) + ((read-char) (read-char p)) + ((peek-char) (peek-char p))) +#+end_src + +#+begin_src scheme :noweb-ref char-reader-test + > (def rdr (open-input-string "42!")) + > (CharReader-peek-char rdr) + #\4 + > (using (rdr : CharReader) + (let ((one (rdr.read-char)) + (two (rdr.read-char))) + (string->number (list->string [one two])))) + 42 + > (CharReader-read-char rdr) + #\! + > (CharReader-read-char rdr) + #!eof +#+end_src + +** Locations + +For parsing it's often very important to know where you are. + +#+begin_src scheme + (defstruct location (port line col off xoff)) +#+end_src + +There may be a few times where we only care about the number of the +current char AKA =xoff=. + +#+begin_src scheme :noweb-ref Location + (interface Location (location) (xoff)) +#+end_src + +#+begin_src scheme :noweb-ref Location + (instance Location (p :character-port) + ((location) (port-location p)) + ((xoff) (##fx+ (macro-character-port-rchars p) + (macro-character-port-rlo p)))) + +#+end_src + +#+begin_src scheme :noweb-ref location-test + > (interface (testLoc CharReader Location)) + > (def rdr (open-input-string "42\n!")) + > (using (l rdr : Location) (location-line (l.location))) + 0 + > (testLoc-read-char rdr) + #\4 + > (using ((r rdr : testLoc) + (loc (r.location) : location)) + loc.xoff) + 1 + > (using ((r rdr : testLoc) + (loc (r.location) : location)) + (let* ((a (r.read-char)) + (l0 loc.line) + (off1 (r.xoff)) + (c2 (location-col (r.location))) + (b (r.read-char)) + (l1 (location-line (r.location))) + (c (r.peek-char)) + (_ (r.read-char)) + (off2 (r.xoff)) + (eof (r.read-char)) + (off3 (r.xoff))) + + [a off1 l0 c2 b l1 c off2 eof off3])) + (#\2 2 0 2 #\newline 1 #\! 4 #!eof 4) + +#+end_src + +** Tokens? Tokenizer! + +Regardless of the fact that with enough specialization a =CharReader= +can be made from anything there may be other tokens beyond characters. + +Even with chars there's a simple reason: combine peek and read! + +#+begin_src scheme :noweb-ref Token + (interface Token + (token (test identity) Nothing: (Nothing #f) . args)) + (interface (TokenCharReader Token CharReader)) + + (instance (t TokenCharReader) :t + ((token (test identity) Nothing: (Nothing #f) . args) + (if (not (test (t.peek-char))) Nothing + (t.read-char)))) + + (instance Token (p :character-port) + ((token (test identity) Nothing: (Nothing #f) . args) + (if (not (test (peek-char p))) Nothing + (read-char p)))) +#+end_src + + +#+begin_src scheme :noweb-ref token-test + > (interface (testTok Token Location)) + > (def port (open-input-string "(def ltuae 42)")) + + > (testTok-xoff port) + 0 + > (testTok-token port char-alphabetic?) + #f + > (testTok-xoff port) + 0 + > (testTok-token port (? (or char-alphabetic? char-numeric?))) + #f + > (testTok-token port (? (not (or char-alphabetic? char-numeric?)))) + #\( + > (testTok-xoff port) + 1 +#+end_src + +*** Buffered Streams + +If we allow infinite lookahead we need to copy and on the new +one. + +#+begin_src scheme :noweb-ref stream-test + > (def port (open-input-string "42\n is the answer")) + > (def stream (make-buffered-char-reader port)) + > [(Location-xoff port) (Location-xoff stream)] + (0 0) + > (location-col (Location-location stream)) + 0 + > (CharReader-peek-char stream) + #\4 + > [(Location-xoff port) (Location-xoff stream)] + (0 0) + > (CharReader-read-char stream) + #\4 + > [(Location-xoff port) (Location-xoff stream)] + (1 1) + > (BufferedCharReader-put-back stream #\4) + > [(Location-xoff port) (Location-xoff stream)] + (1 0) + + > (Token-token stream) + #\4 + > (Token-token stream char-numeric?) + #\2 + > [(Location-xoff port) (Location-xoff stream)] + (2 2) + > (Token-token stream) + #\newline + > (using (stream :- buffered-char-reader) stream.lines) + (3) + > (Token-token stream) + #\space + > (location-line (Location-location stream)) + 1 + > (location-col (Location-location stream)) + 1 + > (Token-token stream) + #\i + > (location-col (Location-location stream)) + 2 + > (using (stream : BufferedCharReader) + (stream.put-back #\f) + (stream.put-back #\f)) + > (location-line (Location-location stream)) + 0 + > (location-col (Location-location stream)) + 1 + + #+end_src + +*** Lookahead streams + +What if we want/need to be anywhere in the stream? With +non-determanistic parsers that's a possibility! + +#+begin_src scheme :noweb-ref lstream-test + > (def port (open-input-string "42\n is the answer")) + > (def bstream (make-buffered-char-reader port)) + > (def stream (make-lookahead-char-stream bstream)) + + > (lookahead-char-stream-lo stream) + 0 + > (buffered-char-reader-hi bstream) + 0 + > (Token-token stream) + #\4 + > (Token-token (make-lookahead-char-stream bstream)) + #\4 + > (lookahead-char-stream-lo stream) + 1 + > (Token-token stream) + #\2 + > (Token-token (make-lookahead-char-stream bstream)) + #\4 + > (Location-xoff stream) + 2 + > (Location-xoff bstream) + 0 + > (Token-token bstream) + #\4 +#+end_src + + +*** /File/ parsec/stream.ss + +#+begin_src scheme :tangle stream.ss :noweb yes + (import :std/parser/stream :std/parser/base + :std/contract :std/srfi/1 :std/srfi/13 + :std/error :std/instance) + (export + #t + (import: :std/parser/base)) + + (extern namespace: std/parser/stream + char-stream-buf char-stream-port char-stream-lines + char-stream-buf-set! location-getc) + + (begin-foreign (include "~~lib/_gambit#.scm")) + (extern namespace: #f + macro-character-input-port? + macro-character-port-rlines + macro-character-port-rchars + macro-character-port-rcurline + macro-character-port-rlo) + + <> + + <> + + <> + + (defstruct buffered-char-reader (port start buf lo hi lines) + constructor: :init! + final: #t) + + (def default-buffered-char-reader-buffer-size 1024) + (defmethod {:init! buffered-char-reader} + (lambda (self port) + (unless (macro-character-input-port? port) + (raise-bad-argument + make-buffered-char-reader "input source; character-input-port" port)) + (let (start (using (l port : Location) (l.location))) + (struct-instance-init! + self port start + (make-string default-buffered-char-reader-buffer-size) + 0 0 [])))) + + (def (buffered-char-reader-getc bcr (unbuffered #f)) + (using (bcr :- buffered-char-reader) + (if (or unbuffered (eqv? bcr.lo bcr.hi)) + (let (c (read-char bcr.port)) + (when (eq? #\newline c) + (set! (buffered-char-reader-lines bcr) + [(Location-xoff bcr.port) bcr.lines ...])) + c) + (let (c (string-ref bcr.buf bcr.lo)) + (set! bcr.lo (1+ bcr.lo)) + c)))) + + (def (buffered-char-reader-peekc bcr) + (using (bcr :- buffered-char-reader) + (if (eqv? bcr.lo bcr.hi) + (peek-char bcr.port) + (string-ref bcr.buf bcr.lo)))) + + (def (buffered-char-reader-ungetc bcr char) + (using (bcr :- buffered-char-reader) + (let* ((new-hi (1+ bcr.hi)) + (len (string-length bcr.buf)) + (str (if (not (= bcr.hi len)) bcr.buf + (let (str (make-string (* 2 len))) + (string-copy! str 0 bcr.buf) + (set! bcr.buf str) + str)))) + (set! (string-ref str bcr.hi) char) + (set! bcr.hi new-hi)))) + + (instance BufferedCharReader (bcr buffered-char-reader) + ((read-char) (buffered-char-reader-getc bcr)) + ((peek-char) (buffered-char-reader-peekc bcr)) + ((put-back char) (buffered-char-reader-ungetc bcr char))) + + (instance (L Location) (bcr buffered-char-reader) + ((xoff) (if (= bcr.lo bcr.hi) + (Location-xoff bcr.port) + (+ (location-xoff bcr.start) + bcr.lo))) + ((location) + (if (= bcr.lo bcr.hi) + (Location-location bcr.port) + (let* ((xoff (L.xoff)) + (lines (find-tail (cut < <> xoff) bcr.lines)) + (base (if lines (car lines) -1)) + (col (##fx- xoff base 1)) + (line (if lines (length lines) 0))) + (make-location bcr.port line col 1 xoff))))) + + ;; lookahead-char-stream + ;; bcr : a buffered char reader + ;; lo : The starting xoff + ;; hi : Either #f or the end xoff (for delimit!) + + (defstruct lookahead-char-stream (bcr lo hi) + constructor: :init! + final: #t) + + (defmethod {:init! lookahead-char-stream} + (lambda (self reader (lo 0) (hi #f)) + (unless (buffered-char-reader? reader) + (set! reader (make-buffered-char-reader reader))) + (struct-instance-init! self reader lo hi))) + + (def (lookahead-char-stream-getc lcs) + (using ((lcs :- lookahead-char-stream) + (bcr lcs.bcr :- buffered-char-reader)) + (cond ((eqv? lcs.lo lcs.hi) (eof-object)) + ((= lcs.lo bcr.hi) + (let (c (buffered-char-reader-getc lcs.bcr #t)) + (buffered-char-reader-ungetc lcs.bcr c) + (set! lcs.lo (1+ lcs.lo)) + c)) + (else (let (c (string-ref bcr.buf lcs.lo)) + (set! lcs.lo (1+ lcs.lo)) + c))))) + + (def (lookahead-char-stream-peekc lcs) + (using ((lcs :- lookahead-char-stream) + (bcr lcs.bcr :- buffered-char-reader)) + (cond ((eqv? lcs.lo lcs.hi) (eof-object)) + ((= lcs.lo bcr.hi) + (peek-char bcr.port)) + (else (string-ref bcr.buf lcs.lo))))) + + (instance CharReader (lcs lookahead-char-stream) + ((read-char) (lookahead-char-stream-getc lcs)) + ((peek-char) (lookahead-char-stream-peekc lcs))) + + + (instance (L Location) (lcs lookahead-char-stream) + ((xoff) (using (bcr lcs.bcr :- buffered-char-reader) + (if (= lcs.lo bcr.hi) + (Location-xoff bcr.port) + (+ (location-xoff bcr.start) + lcs.lo)))) + ((location) + (using (bcr lcs.bcr :- buffered-char-reader) + (if (= lcs.lo bcr.hi) + (Location-location bcr.port) + (let* ((xoff (L.xoff)) + (lines (find-tail (cut < <> xoff) bcr.lines)) + (base (if lines (car lines) -1)) + (col (##fx- xoff base 1)) + (line (if lines (length lines) 0))) + (make-location bcr.port line col 1 xoff)))))) +#+end_src + + +* /File/ parsec-test.ss +#+begin_src scheme :noweb yes :tangle ../../../src/std/parsec-test.ss + ;;; -*- Gerbil -*- + ;;; (C) me at drewc.ca + ;;; :std/parsec unit-tests + + (import :std/test + :std/error + :std/interactive + :srfi/13 + :std/parser/stream + :std/parser/base + :std/monad/interface + ;;:std/monad/error + :std/monad/list + :std/monad/syntax + "instance" + "parsec/stream" + "parsec/transformer" + "parsec/combinators" + "parsec/char" + "parsec/syntax" + (only-in :std/sugar hash try) + (only-in :gerbil/core error-object? with-catch)) + (export parsec-test) + (begin-foreign (include "~~lib/_gambit#.scm")) + (defsyntax (test-inline stx) + (syntax-case stx (>) + ((_ test-case: name rest ...) + #'(test-case name (test-inline rest ...))) + ((_ > form > rest ...) + #'(begin (displayln "... " 'form) form (test-inline > rest ...))) + ((_ > test result rest ...) + #'(begin (check test => 'result) (test-inline rest ...))) + ((_) #!void))) + + (set-test-verbose! #t) + + (def parsec-test + (test-suite "Test :std/parsec" + (test-inline + test-case: "Char Reader tests" + <> + ) + + (test-inline + test-case: "Location tests" + <> + ) + (test-inline + test-case: "Token tests" + <> + ) + + + (test-inline + test-case: "Stream tests" + <> + ) + (test-inline + test-case: "Lookahead Stream tests" + <> + ) + (test-inline + test-case: "ParsecT tests" + <> + ) + + (test-inline + test-case: "Combinator tests" + <> + <> + + ) + + (test-inline + test-case: "Dot tests" + <> + + ) + (test-inline + test-case: "Character Parsing tests" + <> + + ) + (test-inline + test-case: "Org Syntax Parsing tests" + <> + + ) + + + + + + + )) + + + + + + + +#+end_src diff --git a/src/std/parsec/char.ss b/src/std/parsec/char.ss new file mode 100644 index 000000000..54cdde1ac --- /dev/null +++ b/src/std/parsec/char.ss @@ -0,0 +1,38 @@ +(import + :std/interface :std/contract :std/instance :std/monad/syntax + #;./transformer "transformer" + #;./stream "stream" + #;./combinators "combinators") + (export #t) + +(interface (ParsecChar ParsecCombinators) + (letter) (char c) (string str (c=? char=?)) (newline)) + +(instance (P ParsecChar) (pt parsecT) + ((letter) (P.satisfy char-alphabetic?)) + ((char c) (P.satisfy (cut char=? c <>))) + ((newline) (P.char #\newline)) + ((string str (c=? char=?)) + (def lst (if (list? str) str (string->list str))) + (def (pchars cs) + (if (null? cs) (P.return []) + (du P + x <- (P.satisfy (cut c=? <> (car cs))) + xs <- (pchars (cdr cs)) + (P.return (cons x xs))))) + (du P + cs <- (pchars lst) + (P.return (list->string cs))))) + + +;; TODO: This is here for now but should be higher level. + +(interface (Parsec ParsecChar)) + +(def current-parsec (make-parameter (Parsec (make-parsecT)))) + +(def (run-parser p inp) + (using (P (current-parsec) : Parsec) + (let (res (P.run p inp)) + (if (null? res) res + (caar res))))) diff --git a/src/std/parsec/combinators.ss b/src/std/parsec/combinators.ss new file mode 100644 index 000000000..0f2e8610c --- /dev/null +++ b/src/std/parsec/combinators.ss @@ -0,0 +1,78 @@ +(import + :std/interface :std/contract :std/instance :std/monad/syntax + :std/sugar + #;./transformer "transformer" + #;./stream "stream") + (export #t) +(def Nothing (gensym)) +(def (Nothing? t) (eq? Nothing t)) +(interface (ParsecCombinators ParsecT) + (satisfy f) (eof) + (any-token) + (try p) + (many p) (many1 p) (many-till p end)) + +(def (manyAccum P p) +(using (P : ParsecCombinators) + (def (parse?) + (du P + soff <- (P.xoff) + ret <- (P.or p (P.return Nothing)) + (if (Nothing? ret) (P.return ret) + (du P eoff <- (P.xoff) + (if (= soff eoff) + (P.fail "combinator 'many' is applied to a parser that does not consume") + (P.return ret)))))) + (def (recur) + (du P kar <- (parse?) + (if (Nothing? kar) (P.return '()) + (du P kdr <- (recur) + (P.return (cons kar kdr)))))) + (recur))) + + +(instance (P ParsecCombinators) (pt parsecT) + ((try parser (Nothing (gensym))) + (def (ret-stream la (p? #t)) + (using ((la :- lookahead-char-stream) + (b la.bcr :- buffered-char-reader)) + (if (= b.lo b.hi) b.port b))) + (using (inner pt.inner : ParsecT-inner) + (du P + orig-stream <- (P.state (lambda (s) [s (make-lookahead-char-stream s) ...])) + ret <- (P.catch (inner.or parser (P.return [Nothing])) (lambda (e) (P.return [Nothing e ...]))) + new <- (P.state + (lambda (s) + (if (not (and (pair? ret) (eq? (car ret) Nothing))) + ;; success! + [ret orig-stream ...] + ;; failure + [Nothing (ret-stream s) ...]))) + (if (eq? new Nothing) + (if (pair? ret) (P.return (cdr ret)) (P.fail)) + (P.return new))))) + + ((many p) (manyAccum P p)) + ((many1 p) (du P + x <- p + xs <- (manyAccum P p) + (P.return (cons x xs)))) + ((many-till p end) + (def scan + (P.or (P.>> end (P.return [])) + (du P + x <- p + xs <- scan + (P.return (cons x xs))))) + + scan) + + ((eof) + (du P tok? <- (P.token eof-object? Nothing: Nothing) + (if (Nothing? tok?) (P.zero) (P.return tok?)))) + ((any-token (Nothing #f)) + (du P tok? <- (P.token (? (not eof-object?))) + (if (eq? tok? Nothing) (P.zero) (P.return tok?)))) + ((satisfy f) + (du P tok? <- (P.token f Nothing: Nothing) + (if (eq? tok? Nothing) (P.zero) (P.return tok?))))) diff --git a/src/std/parsec/stream.ss b/src/std/parsec/stream.ss new file mode 100644 index 000000000..d044294c2 --- /dev/null +++ b/src/std/parsec/stream.ss @@ -0,0 +1,169 @@ +(import :std/parser/stream :std/parser/base + :std/contract :std/srfi/1 :std/srfi/13 + :std/error :std/instance) +(export + #t + (import: :std/parser/base)) + +(extern namespace: std/parser/stream + char-stream-buf char-stream-port char-stream-lines + char-stream-buf-set! location-getc) + +(begin-foreign (include "~~lib/_gambit#.scm")) + (extern namespace: #f +macro-character-input-port? +macro-character-port-rlines +macro-character-port-rchars +macro-character-port-rcurline +macro-character-port-rlo) + +(interface CharReader (peek-char) (read-char)) +(interface (BufferedCharReader CharReader) (put-back previous-input)) +(instance CharReader (p :character-port) + ((read-char) (read-char p)) + ((peek-char) (peek-char p))) + +(interface Location (location) (xoff)) +(instance Location (p :character-port) + ((location) (port-location p)) + ((xoff) (##fx+ (macro-character-port-rchars p) + (macro-character-port-rlo p)))) + + +(interface Token + (token (test identity) Nothing: (Nothing #f) . args)) +(interface (TokenCharReader Token CharReader)) + +(instance (t TokenCharReader) :t + ((token (test identity) Nothing: (Nothing #f) . args) + (if (not (test (t.peek-char))) Nothing + (t.read-char)))) + +(instance Token (p :character-port) + ((token (test identity) Nothing: (Nothing #f) . args) + (if (not (test (peek-char p))) Nothing + (read-char p)))) + +(defstruct buffered-char-reader (port start buf lo hi lines) + constructor: :init! + final: #t) + +(def default-buffered-char-reader-buffer-size 1024) +(defmethod {:init! buffered-char-reader} + (lambda (self port) + (unless (macro-character-input-port? port) + (raise-bad-argument + make-buffered-char-reader "input source; character-input-port" port)) + (let (start (using (l port : Location) (l.location))) + (struct-instance-init! + self port start + (make-string default-buffered-char-reader-buffer-size) + 0 0 [])))) + +(def (buffered-char-reader-getc bcr (unbuffered #f)) + (using (bcr :- buffered-char-reader) + (if (or unbuffered (eqv? bcr.lo bcr.hi)) + (let (c (read-char bcr.port)) + (when (eq? #\newline c) + (set! (buffered-char-reader-lines bcr) + [(Location-xoff bcr.port) bcr.lines ...])) + c) + (let (c (string-ref bcr.buf bcr.lo)) + (set! bcr.lo (1+ bcr.lo)) + c)))) + +(def (buffered-char-reader-peekc bcr) + (using (bcr :- buffered-char-reader) + (if (eqv? bcr.lo bcr.hi) + (peek-char bcr.port) + (string-ref bcr.buf bcr.lo)))) + +(def (buffered-char-reader-ungetc bcr char) + (using (bcr :- buffered-char-reader) + (let* ((new-hi (1+ bcr.hi)) + (len (string-length bcr.buf)) + (str (if (not (= bcr.hi len)) bcr.buf + (let (str (make-string (* 2 len))) + (string-copy! str 0 bcr.buf) + (set! bcr.buf str) + str)))) + (set! (string-ref str bcr.hi) char) + (set! bcr.hi new-hi)))) + +(instance BufferedCharReader (bcr buffered-char-reader) + ((read-char) (buffered-char-reader-getc bcr)) + ((peek-char) (buffered-char-reader-peekc bcr)) + ((put-back char) (buffered-char-reader-ungetc bcr char))) + +(instance (L Location) (bcr buffered-char-reader) + ((xoff) (if (= bcr.lo bcr.hi) + (Location-xoff bcr.port) + (+ (location-xoff bcr.start) + bcr.lo))) + ((location) + (if (= bcr.lo bcr.hi) + (Location-location bcr.port) + (let* ((xoff (L.xoff)) + (lines (find-tail (cut < <> xoff) bcr.lines)) + (base (if lines (car lines) -1)) + (col (##fx- xoff base 1)) + (line (if lines (length lines) 0))) + (make-location bcr.port line col 1 xoff))))) + +;; lookahead-char-stream +;; bcr : a buffered char reader +;; lo : The starting xoff +;; hi : Either #f or the end xoff (for delimit!) + +(defstruct lookahead-char-stream (bcr lo hi) + constructor: :init! + final: #t) + +(defmethod {:init! lookahead-char-stream} + (lambda (self reader (lo 0) (hi #f)) + (unless (buffered-char-reader? reader) + (set! reader (make-buffered-char-reader reader))) + (struct-instance-init! self reader lo hi))) + +(def (lookahead-char-stream-getc lcs) + (using ((lcs :- lookahead-char-stream) + (bcr lcs.bcr :- buffered-char-reader)) + (cond ((eqv? lcs.lo lcs.hi) (eof-object)) + ((= lcs.lo bcr.hi) + (let (c (buffered-char-reader-getc lcs.bcr #t)) + (buffered-char-reader-ungetc lcs.bcr c) + (set! lcs.lo (1+ lcs.lo)) + c)) + (else (let (c (string-ref bcr.buf lcs.lo)) + (set! lcs.lo (1+ lcs.lo)) + c))))) + +(def (lookahead-char-stream-peekc lcs) + (using ((lcs :- lookahead-char-stream) + (bcr lcs.bcr :- buffered-char-reader)) + (cond ((eqv? lcs.lo lcs.hi) (eof-object)) + ((= lcs.lo bcr.hi) + (peek-char bcr.port)) + (else (string-ref bcr.buf lcs.lo))))) + +(instance CharReader (lcs lookahead-char-stream) + ((read-char) (lookahead-char-stream-getc lcs)) + ((peek-char) (lookahead-char-stream-peekc lcs))) + + +(instance (L Location) (lcs lookahead-char-stream) + ((xoff) (using (bcr lcs.bcr :- buffered-char-reader) + (if (= lcs.lo bcr.hi) + (Location-xoff bcr.port) + (+ (location-xoff bcr.start) + lcs.lo)))) + ((location) + (using (bcr lcs.bcr :- buffered-char-reader) + (if (= lcs.lo bcr.hi) + (Location-location bcr.port) + (let* ((xoff (L.xoff)) + (lines (find-tail (cut < <> xoff) bcr.lines)) + (base (if lines (car lines) -1)) + (col (##fx- xoff base 1)) + (line (if lines (length lines) 0))) + (make-location bcr.port line col 1 xoff)))))) diff --git a/src/std/parsec/syntax.ss b/src/std/parsec/syntax.ss new file mode 100644 index 000000000..bdc70df71 --- /dev/null +++ b/src/std/parsec/syntax.ss @@ -0,0 +1,34 @@ +(import :std/monad/syntax :std/sugar + (for-syntax :gerbil/expander :std/sugar) + #;./transformer "transformer" + #;./mid-level "char") +(export #t) +(begin-syntax (def (dot-identifier? id) + (and (identifier? id) + (let (id-str (symbol->string (stx-e id))) + (eqv? (string-ref id-str 0) #\.))))) + +(defsyntax (do-parse stx) + (syntax-case stx () + ((_ parser ps ...) + (with-syntax ((@app (syntax-local-introduce '%%app)) + (@parser (syntax-local-introduce '%%parse))) + #'(let-syntax ((__app + (syntax-rules () + ((_ rator rand (... ...)) + (@app rator rand (... ...)))))) + (let-syntax ((@app + (lambda (stx) + (syntax-case stx () + ((_ rator . args) + (dot-identifier? #'rator) + (with-syntax ((method (stx-identifier #'rator '@parser #'rator))) + (syntax/loc stx + (method . args)))) + ((_ . args) + (syntax/loc stx + (__app . args))))))) + (du (@parser (current-parsec) : Parsec) + parser ps ...))))))) + +(defrule (def-parse id forms ...) (def id (do-parse forms ...))) diff --git a/src/std/parsec/transformer.ss b/src/std/parsec/transformer.ss new file mode 100644 index 000000000..6cdee026e --- /dev/null +++ b/src/std/parsec/transformer.ss @@ -0,0 +1,85 @@ +(import :std/monad/error + :std/error + :std/monad/state + :std/monad/list + :std/monad/interface + :std/monad/syntax + :std/interface + :std/instance + #;./stream "stream") +(export #t (import: :std/monad/state)) + + +(instance (me MonadError) (et errorT) + ((return a) (du (inner et.inner : Monad) (inner.return a))) + ((>>= ma f) (du (inner et.inner : Monad) + a <- ma + (if (me.error? a) (inner.return a) (f a)))) + ((error? thing) (Error? thing)) + ((throw msg . irritants) + (du (inner et.inner : Monad) + (inner.return (Error msg irritants: irritants)))) + ((catch exp handler) + (du (inner et.inner : Monad) + val <- exp + ret <- (if (me.error? val) + (handler val) + (inner.return val)) + (inner.return ret))) + rebind: #t) + + + +(interface (ParsecT-inner MonadState ErrorHandler Zero Or Plus Fail)) +(interface (ParsecT CharReader Token Location ParsecT-inner) + (only-at xoff parser) (liftM fn . args)) +(defstruct parsecT (inner) constructor: :init!) +(defmethod {:init! parsecT} + (lambda (self (inner (ParsecT-inner (make-errorT (make-stateT []))))) + (struct-instance-init! self inner))) + +(instance (P ParsecT) (pt parsecT) + ((return a) (using (inner pt.inner :- ParsecT-inner) (inner.return a))) + ((>>= ma f) (using (inner pt.inner :- ParsecT-inner) (inner.>>= ma f))) + ((liftM fn ma) + (using (inner pt.inner :- ParsecT-inner) + (du inner x1 <- ma (inner.return (fn x1))))) + ((fail (msg "ParserError") . irritants) + (apply ParsecT-throw P msg irritants)) + ((get) (using (inner pt.inner :- ParsecT-inner) (inner.get))) + ((put! s) (using (inner pt.inner :- ParsecT-inner) (inner.put! s))) + ((run fn s) (using (inner pt.inner :- ParsecT-inner) + (def state (if (string? s) (open-input-string s) s)) + (inner.run fn state))) + ((state f) (using (inner pt.inner :- ParsecT-inner) (inner.state f))) + ((location) + (du P stream <- (P.get) + (P.return (Location-location stream)))) + ((xoff) (P.>>= (P.get) (lambda (stream) + (P.return (Location-xoff stream))))) + ((peek-char) (du P stream <- (P.get) (P.return (CharReader-peek-char stream)))) + ((read-char) (du P stream <- (P.get) (P.return (CharReader-read-char stream)))) + ((token (test identity) Nothing: (Nothing #f) . args) + (du P tok? <- (P.peek-char) + (if (not (test tok?)) (P.return Nothing) + (P.read-char)))) + ((only-at xoff parser) + (du P yoff <- (P.xoff) + (if (= yoff xoff) + parser + (P.zero)))) + ((or a b (Nothing (gensym))) + (using (inner pt.inner :- ParsecT-inner) + (du P + soff <- (P.xoff) + ret <- (P.catch (inner.or a (P.return Nothing)) (lambda (e) (P.return Nothing))) + (begin #;(displayln "In or, a was" ret " xoff " soff) + (if (eq? ret Nothing) (P.only-at soff b) (P.return ret)))))) + ((plus a b) (using (inner pt.inner :- ParsecT-inner) + (du P + soff <- (P.xoff) + (inner.plus a (P.only-at soff b))))) + ((zero) (using (inner pt.inner :- ParsecT-inner) (inner.zero))) + ((catch e h) (using (inner pt.inner :- ParsecT-inner) (inner.catch e h))) + ((throw msg . irritants) (apply ParsecT-inner-throw pt.inner msg irritants)) + ((error? e?) (using (inner pt.inner :- ParsecT-inner) (inner.error? e?)))) From f6758b59a3b4e7029c2ad9e722855a28172ddc6a Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Sat, 31 Aug 2024 12:15:59 -0700 Subject: [PATCH 18/23] fix test verbosity for monad/parsec --- src/std/monad-test.ss | 4 ++-- src/std/monad/README.org | 4 ++-- src/std/parsec-test.ss | 13 +++++++------ src/std/parsec/README.org | 5 +++-- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/std/monad-test.ss b/src/std/monad-test.ss index a9d10ad2a..9b05717a7 100644 --- a/src/std/monad-test.ss +++ b/src/std/monad-test.ss @@ -22,12 +22,12 @@ ((_ test-case: name rest ...) #'(test-case name (test-inline rest ...))) ((_ > form > rest ...) - #'(begin (displayln "... " 'form) form (test-inline > rest ...))) + #'(begin (when *test-versbose* (displayln "... " 'form)) form (test-inline > rest ...))) ((_ > test result rest ...) #'(begin (check test => 'result) (test-inline rest ...))) ((_) #!void))) -(set-test-verbose! #t) +;; (set-test-verbose! #t) (def monad-test (test-suite "Test :std/monad" diff --git a/src/std/monad/README.org b/src/std/monad/README.org index 123c649d7..1c2395206 100644 --- a/src/std/monad/README.org +++ b/src/std/monad/README.org @@ -1130,12 +1130,12 @@ This is after state in the train of thought so is defined here. ((_ test-case: name rest ...) #'(test-case name (test-inline rest ...))) ((_ > form > rest ...) - #'(begin (displayln "... " 'form) form (test-inline > rest ...))) + #'(begin (when std/test#*test-versbose* (displayln "... " 'form)) form (test-inline > rest ...))) ((_ > test result rest ...) #'(begin (check test => 'result) (test-inline rest ...))) ((_) #!void))) - (set-test-verbose! #t) + ;; (set-test-verbose! #t) (def monad-test (test-suite "Test :std/monad" diff --git a/src/std/parsec-test.ss b/src/std/parsec-test.ss index 2ff4892ba..d5ff4b4f2 100644 --- a/src/std/parsec-test.ss +++ b/src/std/parsec-test.ss @@ -27,13 +27,13 @@ ((_ test-case: name rest ...) #'(test-case name (test-inline rest ...))) ((_ > form > rest ...) - #'(begin (displayln "... " 'form) form (test-inline > rest ...))) + #'(begin (when std/test#*test-verbose* (displayln "... " 'form)) form (test-inline > rest ...))) ((_ > test result rest ...) #'(begin (check test => 'result) (test-inline rest ...))) ((_) #!void))) -(set-test-verbose! #t) +;; (set-test-verbose! #f) (def parsec-test (test-suite "Test :std/parsec" (test-inline @@ -287,10 +287,11 @@ > (def-parse EOL (.or (.eof) (.newline))) > (def-parse KEY - (.>> (.string "#+") - (.many-till - (.satisfy (? (not char-whitespace?))) - (.string ": ")))) + (.>> (.string "#+") + (.many-till + (.satisfy (? (not char-whitespace?))) + (.string ": ")))) + > (def-parse VALUE (.many-till (.any-token) EOL)) > (def-parse KEYWORD diff --git a/src/std/parsec/README.org b/src/std/parsec/README.org index ea88c34bd..0da7483f1 100644 --- a/src/std/parsec/README.org +++ b/src/std/parsec/README.org @@ -72,6 +72,7 @@ The first line is a keyword. (.many-till (.satisfy (? (not char-whitespace?))) (.string ": ")))) + > (def-parse VALUE (.many-till (.any-token) EOL)) > (def-parse KEYWORD @@ -1058,13 +1059,13 @@ non-determanistic parsers that's a possibility! ((_ test-case: name rest ...) #'(test-case name (test-inline rest ...))) ((_ > form > rest ...) - #'(begin (displayln "... " 'form) form (test-inline > rest ...))) + #'(begin (when std/test#*test-verbose* (displayln "... " 'form)) form (test-inline > rest ...))) ((_ > test result rest ...) #'(begin (check test => 'result) (test-inline rest ...))) ((_) #!void))) - (set-test-verbose! #t) + ;; (set-test-verbose! #f) (def parsec-test (test-suite "Test :std/parsec" (test-inline From 0449a12061a4511d01e67c0da8373d5a1a87ba4d Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Sat, 31 Aug 2024 13:03:40 -0700 Subject: [PATCH 19/23] Fix monad-test imports --- src/std/monad-test.ss | 2 +- src/std/monad/README.org | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/std/monad-test.ss b/src/std/monad-test.ss index 9b05717a7..69274e2bc 100644 --- a/src/std/monad-test.ss +++ b/src/std/monad-test.ss @@ -22,7 +22,7 @@ ((_ test-case: name rest ...) #'(test-case name (test-inline rest ...))) ((_ > form > rest ...) - #'(begin (when *test-versbose* (displayln "... " 'form)) form (test-inline > rest ...))) + #'(begin (when std/test#*test-verbose* (displayln "... " 'form)) form (test-inline > rest ...))) ((_ > test result rest ...) #'(begin (check test => 'result) (test-inline rest ...))) ((_) #!void))) diff --git a/src/std/monad/README.org b/src/std/monad/README.org index 1c2395206..e5c4e585d 100644 --- a/src/std/monad/README.org +++ b/src/std/monad/README.org @@ -1130,7 +1130,7 @@ This is after state in the train of thought so is defined here. ((_ test-case: name rest ...) #'(test-case name (test-inline rest ...))) ((_ > form > rest ...) - #'(begin (when std/test#*test-versbose* (displayln "... " 'form)) form (test-inline > rest ...))) + #'(begin (when std/test#*test-verbose* (displayln "... " 'form)) form (test-inline > rest ...))) ((_ > test result rest ...) #'(begin (check test => 'result) (test-inline rest ...))) ((_) #!void))) From ba40c39c7882e9172afd63a32e3391912d7d4b1b Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Sat, 31 Aug 2024 14:03:17 -0700 Subject: [PATCH 20/23] gxtest: Add --quiet flag --- src/tools/gxtest.ss | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/src/tools/gxtest.ss b/src/tools/gxtest.ss index 74b0e858c..e1eef0720 100644 --- a/src/tools/gxtest.ss +++ b/src/tools/gxtest.ss @@ -5,6 +5,7 @@ :gerbil/gambit :std/cli/getopt :std/format + :std/misc/ports :std/iter :std/pregexp :std/sort @@ -14,13 +15,18 @@ ./env) (export main) +(extern namespace: "std/test" + !test-suite-error) + (def (main . args) (call-with-getopt gxtest-main args program: "gxtest" help: "run Gerbil tests in the command line" global-env-flag (flag 'verbose "-v" - help: "run in verbose mode where all test execution progress is displayed in stdout.") + help: "run in verbose mode where all test execution progress is displayed in stdout.") + (flag 'quiet "--quiet" + help: "run in in quiet mode where only errors are displayed") (option 'run "-r" "--run" help: "only run test suites whose name matches a given regex") ;; TODO this should be a multi-option for multiple features @@ -34,11 +40,11 @@ (let-hash opt (cond ((null? .args) - (run-tests ["."] .run .features .?verbose)) + (run-tests ["."] .run .features .?verbose .?quiet)) (else - (run-tests .args .run .features .?verbose))))) + (run-tests .args .run .features .?verbose .?quiet))))) -(def (run-tests args filter features verbose?) +(def (run-tests args filter features verbose? quiet?) (def import-errors []) (def filter-rx (and filter (pregexp filter))) @@ -67,7 +73,15 @@ (setup!)) (for ([name . suite] suites) (displayln ">>> run " name) - (run-test-suite! suite)) + (let (buf (and quiet? (open-string ""))) + (parameterize ((current-error-port + (or buf (current-error-port))) + (current-output-port + (or buf (current-output-port)))) + (run-test-suite! suite)) + (when buf (close-port buf)) + (when (and quiet? (!test-suite-error suite)) + (copy-port buf (current-output-port))))) (finally (when cleanup! (displayln ">>> cleanup") From fd188f1a3fedf8658a24b63ce986efc30ac18af8 Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Sat, 31 Aug 2024 14:08:13 -0700 Subject: [PATCH 21/23] Add --quiet to ci workflow --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3f2774fbb..eaaaa110f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,4 +66,4 @@ jobs: - name: Run Gerbil tests run: | export PATH=/opt/gerbil/bin:$PATH - gxtest src/gerbil/test/... src/std/... src/lang/... + gxtest --quiet src/gerbil/test/... src/std/... src/lang/... From 36a2d91742492d216ffc077e32ada805c7d7a496 Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Sat, 31 Aug 2024 17:16:20 -0700 Subject: [PATCH 22/23] Fix the try combinator/lookahead at EOF --- src/std/parsec-test.ss | 56 ++++---- src/std/parsec/README.org | 240 ++++++++++++++++++++++------------ src/std/parsec/char.ss | 2 +- src/std/parsec/combinators.ss | 8 +- src/std/parsec/stream.ss | 4 +- src/std/parsec/syntax.ss | 41 +++++- src/std/parsec/transformer.ss | 5 +- 7 files changed, 243 insertions(+), 113 deletions(-) diff --git a/src/std/parsec-test.ss b/src/std/parsec-test.ss index d5ff4b4f2..e084793ed 100644 --- a/src/std/parsec-test.ss +++ b/src/std/parsec-test.ss @@ -267,49 +267,59 @@ (test-inline test-case: "Dot tests" - > (caar (do-parse (.run (.return 42) ""))) + > (caar (do-parsec (.run (.return 42) ""))) 42 - > (def-parse FourTwo (.char #\4) (.char #\2) (.return 42)) - > (caar (do-parse (.run FourTwo "42"))) + > (def-parsec FourTwo (.char #\4) (.char #\2) (.return 42)) + > (caar (do-parsec (.run FourTwo "42"))) 42 ) (test-inline test-case: "Character Parsing tests" - > (caar (do-parse (.run (.string "asd") "asdfjkl;"))) + > (caar (do-parsec (.run (.string "asd") "asdfjkl;"))) "asd" - > (caar (do-parse (.run (.string "asd" char-ci=?) "AsDfjkl;"))) + > (caar (do-parsec (.run (.string "asd" char-ci=?) "AsDfjkl;"))) "AsD" ) (test-inline test-case: "Org Syntax Parsing tests" - > (def-parse EOL (.or (.eof) (.newline))) + > (def-parsec-bnf + EOL ::= (.or (.eof) (.newline)) - > (def-parse KEY - (.>> (.string "#+") - (.many-till - (.satisfy (? (not char-whitespace?))) - (.string ": ")))) + KEY ::= (.>> (.string "#+") + (.many-till + (.satisfy (? (not char-whitespace?))) + (.string ": "))) - > (def-parse VALUE (.many-till (.any-token) EOL)) + VALUE ::= (.many-till (.any-token) EOL) - > (def-parse KEYWORD - key <- (.liftM list->string KEY) - value <- (.liftM list->string VALUE) - (.return ['keyword key: key value: value])) + KEYWORD ::= key <- (.liftM list->string KEY) + value <- (.liftM list->string VALUE) + (.return ['keyword key: key value: value])) > (run-parser KEYWORD "#+TITLE: Org Mode keyword!") (keyword key: "TITLE" value: "Org Mode keyword!") - + > (def-parsec-bnf + GENERIC-LINE ::= (.many-till (.any-token) EOL) + + LINE-NO-TRY ::= (.or KEYWORD GENERIC-LINE)) - - + > (run-parser LINE-NO-TRY "asd\njkl") + (#\a #\s #\d) + > (run-parser LINE-NO-TRY "#+TITLE: Org Mode keyword!") + (keyword key: "TITLE" value: "Org Mode keyword!") + > (run-parser LINE-NO-TRY "#+heh yeah!") + #f + > (def-parsec-bnf + LINE ::= (.or (.try KEYWORD) GENERIC-LINE)) - - - - + > (run-parser LINE "asd\njkl") + (#\a #\s #\d) + > (run-parser LINE "#+TITLE: Org Mode keyword!") + (keyword key: "TITLE" value: "Org Mode keyword!") + > (list->string (run-parser LINE "#+heh yeah!")) + "#+heh yeah!" ) diff --git a/src/std/parsec/README.org b/src/std/parsec/README.org index 0da7483f1..982136d89 100644 --- a/src/std/parsec/README.org +++ b/src/std/parsec/README.org @@ -52,7 +52,9 @@ Here's a quick example... #+end_src ... with the first two lines helping to explain the exact reasoning -behind recursive descent and backtracking. +behind recursive descent and backtracking +. +*** First Line The first line is a keyword. @@ -65,32 +67,64 @@ The first line is a keyword. - VALUE :: A string consisting of any characters but a newline. #+begin_src scheme :noweb-ref org-mode-parser-test - > (def-parse EOL (.or (.eof) (.newline))) + > (def-parsec-bnf + EOL ::= (.or (.eof) (.newline)) - > (def-parse KEY - (.>> (.string "#+") - (.many-till - (.satisfy (? (not char-whitespace?))) - (.string ": ")))) + KEY ::= (.>> (.string "#+") + (.many-till + (.satisfy (? (not char-whitespace?))) + (.string ": "))) - > (def-parse VALUE (.many-till (.any-token) EOL)) + VALUE ::= (.many-till (.any-token) EOL) - > (def-parse KEYWORD - key <- (.liftM list->string KEY) - value <- (.liftM list->string VALUE) - (.return ['keyword key: key value: value])) + KEYWORD ::= key <- (.liftM list->string KEY) + value <- (.liftM list->string VALUE) + (.return ['keyword key: key value: value])) > (run-parser KEYWORD "#+TITLE: Org Mode keyword!") (keyword key: "TITLE" value: "Org Mode keyword!") - +#+end_src - - +*** Second Line :: =.try= a keyword! - - - - +The second line is not a keyword but the first 6 characters means it +could be. =LL(6)=? Infinite lookahead! + +Without a conflicting prefix it works great. + +#+begin_src scheme :noweb-ref org-mode-parser-test + > (def-parsec-bnf + GENERIC-LINE ::= (.many-till (.any-token) EOL) + + LINE-NO-TRY ::= (.or KEYWORD GENERIC-LINE)) + + > (run-parser LINE-NO-TRY "asd\njkl") + (#\a #\s #\d) + > (run-parser LINE-NO-TRY "#+TITLE: Org Mode keyword!") + (keyword key: "TITLE" value: "Org Mode keyword!") +#+end_src + +But we run into issues if we have a conflict. + +#+begin_src scheme :noweb-ref org-mode-parser-test + > (run-parser LINE-NO-TRY "#+heh yeah!") + #f +#+end_src + +That's because =.or= is always =LL(1)=. + +We can fix that using =.try=. + +#+begin_src scheme :noweb-ref org-mode-parser-test + > (def-parsec-bnf + LINE ::= (.or (.try KEYWORD) GENERIC-LINE)) + + > (run-parser LINE "asd\njkl") + (#\a #\s #\d) + > (run-parser LINE "#+TITLE: Org Mode keyword!") + (keyword key: "TITLE" value: "Org Mode keyword!") + > (list->string (run-parser LINE "#+heh yeah!")) + "#+heh yeah!" #+end_src @@ -110,51 +144,90 @@ The first line is a keyword. Here's the thing. I want gerbil parsec to be popular and short form. #+begin_src scheme :noweb-ref dot-test - > (caar (do-parse (.run (.return 42) ""))) + > (caar (do-parsec (.run (.return 42) ""))) 42 - > (def-parse FourTwo (.char #\4) (.char #\2) (.return 42)) - > (caar (do-parse (.run FourTwo "42"))) + > (def-parsec FourTwo (.char #\4) (.char #\2) (.return 42)) + > (caar (do-parsec (.run FourTwo "42"))) 42 #+end_src #+begin_src scheme :tangle "syntax.ss" - (import :std/monad/syntax :std/sugar - (for-syntax :gerbil/expander :std/sugar) - #;./transformer "transformer" - #;./mid-level "char") - (export #t) - (begin-syntax (def (dot-identifier? id) - (and (identifier? id) - (let (id-str (symbol->string (stx-e id))) - (eqv? (string-ref id-str 0) #\.))))) - - (defsyntax (do-parse stx) - (syntax-case stx () - ((_ parser ps ...) - (with-syntax ((@app (syntax-local-introduce '%%app)) - (@parser (syntax-local-introduce '%%parse))) - #'(let-syntax ((__app - (syntax-rules () - ((_ rator rand (... ...)) - (@app rator rand (... ...)))))) - (let-syntax ((@app - (lambda (stx) - (syntax-case stx () - ((_ rator . args) - (dot-identifier? #'rator) - (with-syntax ((method (stx-identifier #'rator '@parser #'rator))) - (syntax/loc stx - (method . args)))) - ((_ . args) - (syntax/loc stx - (__app . args))))))) - (du (@parser (current-parsec) : Parsec) - parser ps ...))))))) - - (defrule (def-parse id forms ...) (def id (do-parse forms ...))) + (import :std/monad/syntax :std/sugar + (for-syntax :gerbil/expander :std/sugar) + #;./transformer "transformer" + #;./mid-level "char") + (export #t) + (begin-syntax (def (dot-identifier? id) + (and (identifier? id) + (let (id-str (symbol->string (stx-e id))) + (eqv? (string-ref id-str 0) #\.))))) + + (defsyntax (do-parsec stx) + (syntax-case stx () + ((_ parser ps ...) + (with-syntax ((@app (syntax-local-introduce '%%app)) + (@parser (syntax-local-introduce '%%parse))) + #'(let-syntax ((__app + (syntax-rules () + ((_ rator rand (... ...)) + (@app rator rand (... ...)))))) + (let-syntax ((@app + (lambda (stx) + (syntax-case stx () + ((_ rator . args) + (dot-identifier? #'rator) + (with-syntax ((method (stx-identifier #'rator '@parser #'rator))) + (syntax/loc stx + (method . args)))) + ((_ . args) + (syntax/loc stx + (__app . args))))))) + (du (@parser (current-parsec) : Parsec) + parser ps ...))))))) + + (defrule (def-parsec id forms ...) (def id (do-parsec forms ...))) + + (defsyntax (def-parsec-bnf stx) + (def (expand-body forms) + (with-syntax (((frms ...) + (stx-foldr + (lambda (x xs) + (if (and + (stx-pair? xs) + (identifier? (stx-car xs)) + (free-identifier=? + (stx-car xs) #'::=)) + [] + (cons x xs))) + [] forms)) + (forms forms)) + (syntax-case #'(frms ...) () + ((foo ...) + #'(do-parsec frms ...))))) + (def (expand-tail forms) + (with-syntax* (((rest ...) + (let lp ((forms forms)) + (if (stx-null? forms) forms + (let (id (and (not (stx-null? (stx-cdr forms))) + (stx-car (stx-cdr forms)))) + (if (and (identifier? id) + (free-identifier=? + id #'::=)) + forms + (lp (stx-cdr forms)))))))) + (if (stx-null? #'(rest ...)) + #'(begin rest ...) + #'(def-parsec-bnf rest ...)))) + (syntax-case stx (:body :tail) + ((macro ID cceq forms ...) + (with-syntax ((body (expand-body #'(forms ...))) + (tail (expand-tail #'(forms ...)))) + #'(begin (def ID body) tail))))) + #+end_src + * Character Parsers @@ -187,7 +260,7 @@ There are times when we want to match against a string rather than single characters. #+begin_src scheme :noweb-ref char-test - > (caar (do-parse (.run (.string "asd") "asdfjkl;"))) + > (caar (do-parsec (.run (.string "asd") "asdfjkl;"))) "asd" #+end_src @@ -195,7 +268,7 @@ Case is often not a concern so insensitive is sometimes prefered and not frowed upon. #+begin_src scheme :noweb-ref char-test - > (caar (do-parse (.run (.string "asd" char-ci=?) "AsDfjkl;"))) + > (caar (do-parsec (.run (.string "asd" char-ci=?) "AsDfjkl;"))) "AsD" #+end_src @@ -237,7 +310,7 @@ frowed upon. (def (run-parser p inp) (using (P (current-parsec) : Parsec) (let (res (P.run p inp)) - (if (null? res) res + (if (null? res) #f (caar res))))) @@ -308,25 +381,27 @@ But occaisionally there's a need to look further ahead, consume input, fail, and backtrack! #+begin_src scheme :noweb-ref comb-impl - ((try parser (Nothing (gensym))) - (def (ret-stream la (p? #t)) - (using ((la :- lookahead-char-stream) - (b la.bcr :- buffered-char-reader)) - (if (= b.lo b.hi) b.port b))) - (using (inner pt.inner : ParsecT-inner) - (du P - orig-stream <- (P.state (lambda (s) [s (make-lookahead-char-stream s) ...])) - ret <- (P.catch (inner.or parser (P.return [Nothing])) (lambda (e) (P.return [Nothing e ...]))) - new <- (P.state - (lambda (s) - (if (not (and (pair? ret) (eq? (car ret) Nothing))) - ;; success! - [ret orig-stream ...] - ;; failure - [Nothing (ret-stream s) ...]))) - (if (eq? new Nothing) - (if (pair? ret) (P.return (cdr ret)) (P.fail)) - (P.return new))))) + ((try parser (Nothing (gensym))) + (def (ret-stream la (p? #t)) + (using ((la :- lookahead-char-stream) + (b la.bcr :- buffered-char-reader)) + (if (= b.lo b.hi) b.port b))) + (using (inner pt.inner : ParsecT-inner) + (du P + orig-stream <- (P.state (lambda (s) [s (make-lookahead-char-stream s) ...])) + ret <- (P.catch (inner.or parser (P.return [Nothing])) (lambda (e) (P.return [Nothing e ...]))) + new <- (P.state + (lambda (s) + (if (not (and (pair? ret) (eq? (car ret) Nothing))) + ;; success! + [ret orig-stream ...] + ;; failure + [Nothing (ret-stream s) ...]))) + (begin #;(displayln "In try new: " ret new (eq? new Nothing)) + (if (eq? new Nothing) + (if (and (pair? ret) (not (null? (cdr ret)))) + (P.return (cdr ret)) (P.fail)) + (P.return new)))))) #+end_src @@ -538,9 +613,10 @@ AND the first parser does not consume any characters. #+begin_src scheme :noweb-ref only-at-or-plus ((only-at xoff parser) (du P yoff <- (P.xoff) + (begin #;(displayln "End or @ xoff " yoff) (if (= yoff xoff) parser - (P.zero)))) + (P.zero))))) ((or a b (Nothing (gensym))) (using (inner pt.inner :- ParsecT-inner) (du P @@ -551,7 +627,7 @@ AND the first parser does not consume any characters. ((plus a b) (using (inner pt.inner :- ParsecT-inner) (du P soff <- (P.xoff) - (inner.plus a (P.only-at soff b))))) + (inner.plus a (P.only-at soff b))))) #+end_src ** Interface and instance @@ -945,6 +1021,7 @@ non-determanistic parsers that's a possibility! (string-copy! str 0 bcr.buf) (set! bcr.buf str) str)))) + #;(displayln "ungetc " bcr.hi " C " char) (set! (string-ref str bcr.hi) char) (set! bcr.hi new-hi)))) @@ -989,8 +1066,9 @@ non-determanistic parsers that's a possibility! (cond ((eqv? lcs.lo lcs.hi) (eof-object)) ((= lcs.lo bcr.hi) (let (c (buffered-char-reader-getc lcs.bcr #t)) + (when (not (eof-object? c)) (buffered-char-reader-ungetc lcs.bcr c) - (set! lcs.lo (1+ lcs.lo)) + (set! lcs.lo (1+ lcs.lo))) c)) (else (let (c (string-ref bcr.buf lcs.lo)) (set! lcs.lo (1+ lcs.lo)) diff --git a/src/std/parsec/char.ss b/src/std/parsec/char.ss index 54cdde1ac..261e475de 100644 --- a/src/std/parsec/char.ss +++ b/src/std/parsec/char.ss @@ -34,5 +34,5 @@ (def (run-parser p inp) (using (P (current-parsec) : Parsec) (let (res (P.run p inp)) - (if (null? res) res + (if (null? res) #f (caar res))))) diff --git a/src/std/parsec/combinators.ss b/src/std/parsec/combinators.ss index 0f2e8610c..6e4375862 100644 --- a/src/std/parsec/combinators.ss +++ b/src/std/parsec/combinators.ss @@ -48,9 +48,11 @@ [ret orig-stream ...] ;; failure [Nothing (ret-stream s) ...]))) - (if (eq? new Nothing) - (if (pair? ret) (P.return (cdr ret)) (P.fail)) - (P.return new))))) + (begin #;(displayln "In try new: " ret new (eq? new Nothing)) + (if (eq? new Nothing) + (if (and (pair? ret) (not (null? (cdr ret)))) + (P.return (cdr ret)) (P.fail)) + (P.return new)))))) ((many p) (manyAccum P p)) ((many1 p) (du P diff --git a/src/std/parsec/stream.ss b/src/std/parsec/stream.ss index d044294c2..1e9e1e6cd 100644 --- a/src/std/parsec/stream.ss +++ b/src/std/parsec/stream.ss @@ -87,6 +87,7 @@ macro-character-port-rlo) (string-copy! str 0 bcr.buf) (set! bcr.buf str) str)))) + #;(displayln "ungetc " bcr.hi " C " char) (set! (string-ref str bcr.hi) char) (set! bcr.hi new-hi)))) @@ -131,8 +132,9 @@ macro-character-port-rlo) (cond ((eqv? lcs.lo lcs.hi) (eof-object)) ((= lcs.lo bcr.hi) (let (c (buffered-char-reader-getc lcs.bcr #t)) + (when (not (eof-object? c)) (buffered-char-reader-ungetc lcs.bcr c) - (set! lcs.lo (1+ lcs.lo)) + (set! lcs.lo (1+ lcs.lo))) c)) (else (let (c (string-ref bcr.buf lcs.lo)) (set! lcs.lo (1+ lcs.lo)) diff --git a/src/std/parsec/syntax.ss b/src/std/parsec/syntax.ss index bdc70df71..7e8aa50a6 100644 --- a/src/std/parsec/syntax.ss +++ b/src/std/parsec/syntax.ss @@ -8,7 +8,7 @@ (let (id-str (symbol->string (stx-e id))) (eqv? (string-ref id-str 0) #\.))))) -(defsyntax (do-parse stx) +(defsyntax (do-parsec stx) (syntax-case stx () ((_ parser ps ...) (with-syntax ((@app (syntax-local-introduce '%%app)) @@ -31,4 +31,41 @@ (du (@parser (current-parsec) : Parsec) parser ps ...))))))) -(defrule (def-parse id forms ...) (def id (do-parse forms ...))) +(defrule (def-parsec id forms ...) (def id (do-parsec forms ...))) + +(defsyntax (def-parsec-bnf stx) + (def (expand-body forms) + (with-syntax (((frms ...) + (stx-foldr + (lambda (x xs) + (if (and + (stx-pair? xs) + (identifier? (stx-car xs)) + (free-identifier=? + (stx-car xs) #'::=)) + [] + (cons x xs))) + [] forms)) + (forms forms)) + (syntax-case #'(frms ...) () + ((foo ...) + #'(do-parsec frms ...))))) + (def (expand-tail forms) + (with-syntax* (((rest ...) + (let lp ((forms forms)) + (if (stx-null? forms) forms + (let (id (and (not (stx-null? (stx-cdr forms))) + (stx-car (stx-cdr forms)))) + (if (and (identifier? id) + (free-identifier=? + id #'::=)) + forms + (lp (stx-cdr forms)))))))) + (if (stx-null? #'(rest ...)) + #'(begin rest ...) + #'(def-parsec-bnf rest ...)))) + (syntax-case stx (:body :tail) + ((macro ID cceq forms ...) + (with-syntax ((body (expand-body #'(forms ...))) + (tail (expand-tail #'(forms ...)))) + #'(begin (def ID body) tail))))) diff --git a/src/std/parsec/transformer.ss b/src/std/parsec/transformer.ss index 6cdee026e..4de2b9c0a 100644 --- a/src/std/parsec/transformer.ss +++ b/src/std/parsec/transformer.ss @@ -65,9 +65,10 @@ (P.read-char)))) ((only-at xoff parser) (du P yoff <- (P.xoff) + (begin #;(displayln "End or @ xoff " yoff) (if (= yoff xoff) parser - (P.zero)))) + (P.zero))))) ((or a b (Nothing (gensym))) (using (inner pt.inner :- ParsecT-inner) (du P @@ -78,7 +79,7 @@ ((plus a b) (using (inner pt.inner :- ParsecT-inner) (du P soff <- (P.xoff) - (inner.plus a (P.only-at soff b))))) + (inner.plus a (P.only-at soff b))))) ((zero) (using (inner pt.inner :- ParsecT-inner) (inner.zero))) ((catch e h) (using (inner pt.inner :- ParsecT-inner) (inner.catch e h))) ((throw msg . irritants) (apply ParsecT-inner-throw pt.inner msg irritants)) From 997fb08ec646f371a80bae31b71c4c899221bd82 Mon Sep 17 00:00:00 2001 From: Drew Crampsie Date: Wed, 15 Jan 2025 00:33:19 +0000 Subject: [PATCH 23/23] Fix gxhttpd bug: No content length when chunking --- src/tools/gxhttpd.ss | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/tools/gxhttpd.ss b/src/tools/gxhttpd.ss index ef35ab50e..8d30104a3 100644 --- a/src/tools/gxhttpd.ss +++ b/src/tools/gxhttpd.ss @@ -478,11 +478,11 @@ => :procedure (let* ((content-type (path-extension->mime-type-name path)) (headers - [(if content-type + [["Content-Length" :: (number->string (file-info-size info))] + (if content-type ["Content-Type" :: content-type] ["Content-Type" :: "application/octet-stream"]) - ["Last-Modified" :: (number->string (exact (floor (time->seconds (file-info-last-modification-time info)))))] - ["Content-Length" :: (number->string (file-info-size info))]])) + ["Last-Modified" :: (number->string (exact (floor (time->seconds (file-info-last-modification-time info)))))]])) (if (fx<= (file-info-size info) max-file-cache-size) ;; cache the content @@ -501,12 +501,16 @@ (using (req :- http-request) (case req.method ((GET) - (http-response-file res headers path)) + ;; RFC 9112 states that "a sender (server) MUST NOT send + ;; a Content-Length header field in any message that + ;; contains a Transfer-Encoding header field.". + (http-response-file res (cdr headers) path)) ((HEAD) (http-response-write res 200 headers #f)) (else (http-response-write-condition res Forbidden)))))))) + (def (find-handler tab server-path) (let loop ((path server-path)) (cond