Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Regex schemas #312

Closed
wants to merge 32 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
b720f6f
wip
ikitommi Nov 13, 2020
355277b
wip
ikitommi Nov 13, 2020
619f7a1
Initial NFA code drop (from my Seqexp 'perf' branch).
Nov 24, 2020
ccc1fd6
Reimplement regex `validator`.
Nov 24, 2020
05eb30d
Seq regex parsing, first draft.
Nov 25, 2020
b4e1e5c
Make regex parser behave passably and add `malli.regex/parse(r)`.
Nov 26, 2020
99766f4
re/fn -> re/is
Nov 26, 2020
f71beba
Disable some broken stuff.
Nov 26, 2020
ebba0e0
Add some regex `explainer` sketches.
Nov 26, 2020
95fa6e7
Push `path` and `in` into ExplanatoryVM.
Nov 27, 2020
5de3895
Make regex schemas work with regular `validator` and `explainer`.
Nov 27, 2020
a18f729
Add `explain` instruction.
Nov 30, 2020
873c350
Disallow trailing seq via `end` instruction.
Nov 30, 2020
7c5a0e7
Fix ::end-of-input and ::input-remaining schema args.
Nov 30, 2020
9390282
Add missing regex validation `end` clause.
Nov 30, 2020
8d6876c
Extract `regex-validator` and `regex-explainer`.
Nov 30, 2020
dfc13b5
Improve regex LensSchema impls.
Nov 30, 2020
3a935e1
Add regex `validate` tests (imitating Seqexp tests).
Nov 30, 2020
680e669
Move bool coercion inside `exec-recognizer`.
Nov 30, 2020
ed62e6b
Add seqexp generators.
Dec 1, 2020
cd2a84b
Remove fixed FIXME.
Dec 1, 2020
75a1cf1
Move regex macros to separate namespace.
Dec 1, 2020
d392ea5
Move regex compiler to separate namespace.
Dec 1, 2020
8b32557
Make everything compile on cljs.
Dec 1, 2020
cdf92d8
Fix regex VM on cljs.
Dec 1, 2020
046169d
Add :nested schema for preventing regex schema 'inlining'.
Dec 1, 2020
7168669
Use list for regex parse stack.
Dec 2, 2020
9c329d4
Add seqexp transformers.
Dec 2, 2020
2efd016
Fix regex-transformer self-enter/leave.
Dec 3, 2020
aae9cb7
Unify encoder-regex and decoder-regex into transformer-regex.
Dec 3, 2020
c900902
opt: use ^:const
Dec 3, 2020
32d700c
Optimize regex decoder space (and time) usage.
Dec 3, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions deps.edn
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
lambdaisland/kaocha-junit-xml {:mvn/version "0.0-70"}
metosin/spec-tools {:mvn/version "0.10.3"}
metosin/schema-tools {:mvn/version "0.12.2"}
minimallist/minimallist {:mvn/version "0.0.6"}
net.cgrand/seqexp {:mvn/version "0.6.2"}
borkdude/sci {:git/url "https://github.com/borkdude/sci.git"
:sha "b310358cd41f761d7bbd50227a36d1160938ce71"}
prismatic/schema {:mvn/version "1.1.12"}
Expand Down
315 changes: 314 additions & 1 deletion src/malli/core.cljc
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
(ns malli.core
(:refer-clojure :exclude [eval type -deref deref -lookup -key])
#?(:cljs (:require-macros [malli.regex.macros :as rem]))
(:require [malli.sci :as ms]
[malli.regex :as re]
[malli.regex.compiler :as rec]
#?(:clj [malli.regex.macros :as rem])
[malli.registry :as mr])
#?(:clj (:import (java.util.regex Pattern)
(clojure.lang IDeref MapEntry))))
Expand Down Expand Up @@ -48,6 +52,11 @@
(-ref [this] "returns the reference name")
(-deref [this] "returns the referenced schema"))

(defprotocol RegexSchema
(-regex [this] "returns the regex")
(-explainer-regex [this path])
(-transformer-regex [this transformer method options]))

(defprotocol Walker
(-accept [this schema path options])
(-inner [this schema path options])
Expand Down Expand Up @@ -242,6 +251,29 @@
(and max f) (fn [x] (<= (f x) max))
max (fn [x] (<= x max)))))

(defn -into-regex [x]
(cond
(satisfies? RegexSchema x) (-regex x)
#_#_
(= :ref (-type x)) (re/lazy (fn []
(prn "lazy!" (-form x))
(if (false? (:inlined (-properties x)))
(re/is (-validator x))
(-into-regex (-deref x)))))
:else (re/is (-validator x))))

(defn -into-explainer-regex [x path]
(if (satisfies? RegexSchema x)
(-explainer-regex x path)
(re/explain-item x (-explainer x path))))

(defn -into-transformer-regex [x transformer method options]
(if (satisfies? RegexSchema x)
(-transformer-regex x transformer method options)
(case method
:encode (re/item-transformer (-validator x) (-transformer x transformer method options))
:decode (re/decoded-item (-transformer x transformer method options) (-validator x)))))

;;
;; Protocol Cache
;;
Expand Down Expand Up @@ -1020,6 +1052,161 @@
(-get [_ key default] (get children key default))
(-set [this key value] (-set-assoc-children this key value)))))))

(defn- regex-validator [schema]
(let [automaton (rec/compile (rem/asm
include (-regex schema)
end schema))]
(fn [x] (and (sequential? x) (re/exec-recognizer automaton x)))))

(defn- regex-explainer [schema path]
(let [automaton (rec/compile (rem/asm
include (-explainer-regex schema path)
end schema))]
(fn [x in acc]
(if (sequential? x)
(if-some [errors (re/exec-explainer automaton path x in -error)]
(into acc errors)
acc)
(conj acc (-error path in schema x ::invalid-type))))))

(defn- regex-transformer [schema transformer method options]
(let [this-transformer (-value-transformer transformer schema method options)
enter-this (or (:enter this-transformer) identity)
leave-this (or (:leave this-transformer) identity)
automaton (rec/compile (rem/asm
include (-transformer-regex schema transformer method options)
end schema))]
(if (= method :encode)
{:enter (fn [coll]
(let [coll (enter-this coll)
ts (re/exec-encoder-assignment automaton coll)]
(if ts
[(map (fn [{:keys [enter]} v] (if enter (enter v) v)) ts coll) ts]
[coll])))
:leave (fn [[coll ts]]
(leave-this (if ts
(map (fn [{:keys [leave]} v] (if leave (leave v) v)) ts coll)
coll)))}
{:enter (fn [coll]
(let [coll (enter-this coll)]
(or (re/exec-decoder-assignment automaton coll) [coll])))
:leave (fn [[coll ts]]
(leave-this (if ts
(map (fn [{:keys [leave]} v] (if leave (leave v) v)) ts coll)
coll)))})))

(defn -sequence-schema [{:keys [type re]}]
^{:type ::into-schema}
(reify IntoSchema
(-into-schema [_ properties children options]
;(-check-children! type properties children {:min 1, :max 1})
(let [[child :as children] (mapv #(schema % options) children)
form (-create-form type properties (mapv -form children))]
^{:type ::schema}
(reify
Schema
(-type [_] type)
(-type-properties [_])
(-validator [this] (regex-validator this))
(-explainer [this path] (regex-explainer this path))
(-transformer [this transformer method options] (regex-transformer this transformer method options))
(-walk [this walker path options]
(if (-accept walker this path options)
(-outer walker this path [(-inner walker child path options)] options)))
(-properties [_] properties)
(-options [_] options)
(-children [_] children)
(-form [_] form)

LensSchema
(-keep [_] true)
(-get [_ key default] (get children key default))
(-set [this key value] (-set-assoc-children this key value))

RegexSchema
(-regex [_] (re properties (map -into-regex children)))
(-explainer-regex [_ path]
(re properties (map-indexed (fn [i s] (-into-explainer-regex s (conj path i)))
children)))
(-transformer-regex [_ transformer method options]
(re properties (map #(-into-transformer-regex % transformer method options) children))))))))

(defn -sequence-entry-schema [{:keys [type re]}]
^{:type ::into-schema}
(reify IntoSchema
(-into-schema [_ properties children options]
;(-check-children! type properties children {:min 1, :max 1})
(let [[child :as children] (mapv (fn [[k c]] [k (schema c options)]) children)
form (-create-form type properties (mapv (fn [[k s]] [k (-form s)]) children))]
^{:type ::schema}
(reify
Schema
(-type [_] type)
(-type-properties [_])
(-validator [this] (regex-validator this))
(-explainer [this path] (regex-explainer this path))
(-transformer [this transformer method options] (regex-transformer this transformer method options))
(-walk [this walker path options]
(if (-accept walker this path options)
(-outer walker this path [(-inner walker child path options)] options)))
(-properties [_] properties)
(-options [_] options)
(-children [_] children)
(-form [_] form)

LensSchema
(-keep [_] true)
(-get [_ key default]
(or (some (fn [[k c]] (when (= k key) c)) children)
default))
(-set [this key value] (-set-entries this key value))

RegexSchema
(-regex [_] (re properties (map (fn [[k s]] [k (-into-regex s)]) children)))
(-explainer-regex [_ path]
(re properties (map (fn [[k s]] [k (-into-explainer-regex s (conj path k))])
children)))
(-transformer-regex [_ transformer method options]
(re properties (map (fn [[k s]] [k (-into-transformer-regex s transformer method options)])
children))))))))

(defn -nested-schema []
^{:type ::into-schema}
(reify IntoSchema
(-into-schema [_ properties children options]
(-check-children! :nested properties children {:min 1, :max 1})
(let [[schema :as children] (map #(schema % options) children)
form (-create-form :nested properties (map -form children))]
^{:type ::schema}
(reify
Schema
(-type [_] :nested)
(-type-properties [_])
(-validator [_]
(let [validator' (regex-validator schema)]
(fn [x] (or (nil? x) (validator' x)))))
(-explainer [_ path]
(let [explainer' (regex-explainer schema (conj path 0))]
(fn explain [x in acc]
(if (nil? x) acc (explainer' x in acc)))))
(-transformer [this transformer method options]
(-parent-children-transformer this children transformer method options))
(-walk [this walker path options]
(if (-accept walker this path options)
(-outer walker this path (-inner-indexed walker path children options) options)))
(-properties [_] properties)
(-options [_] options)
(-children [_] children)
(-parent [_] (-nested-schema))
(-form [_] form)

LensSchema
(-keep [_])
(-get [_ key default] (if (= 0 key) schema default))
(-set [this key value] (if (= 0 key)
(-set-children this [value])
(-fail! ::index-out-of-bounds {:schema this, :key key}))))))))

;;
;; public api
;;
Expand Down Expand Up @@ -1299,6 +1486,20 @@
:qualified-symbol (-qualified-symbol-schema)
:uuid (-uuid-schema)})

(defn sequence-schemas []
{:+ (-sequence-schema {:type :+, :re (fn [_ [child]] (re/+ child))})
:* (-sequence-schema {:type :*, :re (fn [_ [child]] (re/* child))})
:? (-sequence-schema {:type :?, :re (fn [_ [child]] (re/? child))})
;; FIXME: ##Inf blows up:
:repeat (-sequence-schema {:type :repeat, :re (fn [{:keys [min max] :or {min 0, max ##Inf}} [child]]
(re/repeat min max child))})
:alt (-sequence-schema {:type :alt, :re (fn [_ children] (apply re/alt children))})
:cat (-sequence-schema {:type :cat, :re (fn [_ children] (apply re/cat children))})
:cat* (-sequence-entry-schema {:type :cat*, :re (fn [_ children] (apply re/cat children))})
:alt* (-sequence-entry-schema {:type :alt*, :re (fn [_ children] (apply re/alt children))})

:nested (-nested-schema)})

(defn base-schemas []
{:and (-and-schema)
:or (-or-schema)
Expand All @@ -1319,7 +1520,7 @@
::schema (-schema-schema {:raw true})})

(defn default-schemas []
(merge (predicate-schemas) (class-schemas) (comparator-schemas) (type-schemas) (base-schemas)))
(merge (predicate-schemas) (class-schemas) (comparator-schemas) (type-schemas) (sequence-schemas) (base-schemas)))

(def default-registry
(mr/registry (cond (identical? mr/type "default") (default-schemas)
Expand Down Expand Up @@ -1353,3 +1554,115 @@
:meta ~(meta name)
:ns ns#
:name ~name'}))))

(defn << [success & ss]
(str (if success "\u001B[32m" "\u001B[31m") (str/join " " ss) "\u001B[0m"))
#_(
(defn << [success & ss]
(str (if success "\u001B[32m" "\u001B[31m") (str/join " " ss) "\u001B[0m"))

(doseq [[s [pass fail]] [[[:+ int?] [[[1] [1 2 3]] [[] ["1"]]]]
[[:* int?] [[[1] [1 2 3] []] [["1"]]]]
[[:repeat int?] [[[1] [1 2 3] []] [["1"]]]]
[[:repeat {:min 0} int?] [[[1] [1 2 3] []] [["1"]]]]
[[:repeat {:min 1} int?] [[[1] [1 2 3]] [[] ["1"]]]]
[[:repeat {:min 1, :max 2} int?] [[[1] [1 2]] [[1 2 3] [] ["1"]]]]
[[:cat int? boolean?] [[[1 true]] [[1] ["1" true]]]]
[[:cat* [:int int?] [:bool boolean?]] [[[1 true]] [[1] ["1" true]]]]
[[:alt int? boolean?] [[[1] [true]] [[] ["1"] [1 "2"]]]]
[[:alt* [:int int?] [:int boolean?]] [[[1] [true]] [[] ["1"] [1 "2"]]]]
[[:cat
int?
[:alt
string?
keyword?
[:cat
string?
keyword?]]
int?] [[[1 "a" 3]
[1 :b 3]
[1 "a" :b 3]]
[[1 "a" :b "3"]]]]
[[:cat*
[:head int?]
[:body [:alt*
[:option1 string?]
[:option2 keyword?]
[:option3 [:cat*
[:s string?]
[:k keyword?]]]]]
[:tail int?]] [[[1 "a" 3]
[1 :b 3]
[1 "a" :b 3]]
[[1 "a" :b "3"]]]]
[[:cat int? [:cat int? int?]] [[[1 2 3]] [[1 [2 3]]]]]
[[:cat int? [:schema [:cat int? int?]]] [[[1 [2 3]]] [[1 2 3]]]]]]
(println)
(println (form s))
(doseq [v pass]
(let [ok (validate s v)]
(println (<< ok "+" (if ok "+" "-") v))))
(doseq [v fail]
(let [ok (not (validate s v))]
(println (<< ok "-" (if ok "-" "+") v)))))

(validate
[:cat*
[:head int?]
[:body [:alt*
[:option1 string?]
[:option2 keyword?]
[:option3 [:schema
[:cat*
[:s [:tuple string? string?]]
[:k keyword?]]]]]]
[:tail int?]]
[1 [["sika" "pakkasella"] :tosi] 12])

(validate
[:schema {:registry {"hiccup" [:or
[:cat*
[:name keyword?]
[:props [:? [:map-of keyword? any?]]]
[:children [:* [:ref "hiccup"]]]]
[:or
nil?
boolean?
number?
string?]]}}
"hiccup"]
[:div {:class [:foo :bar]}
[:p "Hello, world of data"]])


(validate
[:schema
[:alt*
[:node [:cat*
[:name keyword?]
[:props [:? [:map-of keyword? any?]]]
[:children [:* string? #_[:ref "hiccup"]]]]]]]
[:p {:a 1} "Hello, world of data"])

(validate
[:alt
[:cat*
[:name keyword?]
[:props [:? [:map-of keyword? any?]]]
[:children [:* any?]]]]
[:div {:class "warning"} 123])


(validate
[:alt*
[:node [:alt*
[:name keyword?]
[:props [:? [:map-of keyword? any?]]]
[:children [:* any? #_[:ref "hiccup"]]]]]
[:primitive [:alt*
[:nil nil?]
[:boolean boolean?]
[:number number?]
[:text string?]]]]
[:div {:class [:foo :bar]}
nil #_[:p "Hello, world of data"]]))
Loading