diff --git a/.github/workflows/TagBot.yml b/.github/workflows/TagBot.yml new file mode 100644 index 0000000..f49313b --- /dev/null +++ b/.github/workflows/TagBot.yml @@ -0,0 +1,15 @@ +name: TagBot +on: + issue_comment: + types: + - created + workflow_dispatch: +jobs: + TagBot: + if: github.event_name == 'workflow_dispatch' || github.actor == 'JuliaTagBot' + runs-on: ubuntu-latest + steps: + - uses: JuliaRegistries/TagBot@v1 + with: + token: ${{ secrets.GITHUB_TOKEN }} + ssh: ${{ secrets.DOCUMENTER_KEY }} diff --git a/.github/workflows/Test.yml b/.github/workflows/Test.yml new file mode 100644 index 0000000..9ee0b46 --- /dev/null +++ b/.github/workflows/Test.yml @@ -0,0 +1,57 @@ +name: "test suite and docs" + +on: + push: + branches: + - main + +permissions: + contents: read + id-token: write + pages: write + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: julia-actions/setup-julia@v1 + with: + version: "1.10" + - uses: julia-actions/julia-buildpkg@v1 + - uses: julia-actions/julia-runtest@v1 + + # We set up a folder that Pluto can use to cache exported notebooks. If the notebook file did not change, then Pluto can take the exported file from cache instead of running the notebook. + - name: Set up notebook state cache + uses: actions/cache@v3 + with: + path: pluto_state_cache + key: ${{ runner.os }}-pluto_state_cache-v2-${{ hashFiles('**/Project.toml', '**/Manifest.toml', '.github/workflows/*' ) }}-${{ hashFiles('**/*jl') }} + restore-keys: | + ${{ runner.os }}-pluto_state_cache-v2-${{ hashFiles('**/Project.toml', '**/Manifest.toml', '.github/workflows/*' ) }} + + - name: Run & export Pluto notebooks + run: | + julia -e 'using Pkg + Pkg.activate(mktempdir()) + Pkg.add([ + Pkg.PackageSpec(name="PlutoSliderServer", version="0.3.2-0.3"), + ]) + + import PlutoSliderServer + + PlutoSliderServer.github_action("./docs"; + Export_cache_dir="pluto_state_cache", + Export_baked_notebookfile=false, + Export_baked_state=false, + # more parameters can go here + )' + + - name: Upload GitHub Pages artifact + uses: actions/upload-pages-artifact@v3.0.1 + with: + path: . + + - name: Deploy GitHub Pages site + uses: actions/deploy-pages@v4.0.5 + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ba39cc5 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +Manifest.toml diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..6aa077c --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 Paul Berg + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Project.toml b/Project.toml new file mode 100644 index 0000000..b025567 --- /dev/null +++ b/Project.toml @@ -0,0 +1,16 @@ +name = "Pinot" +uuid = "14b02da0-8be6-40d8-a4f8-a7d73caed26e" +license = "MIT" +authors = ["Paul "] +version = "0.1.0" + +[compat] +julia = "1.6" + +[extras] +Deno_jll = "04572ae6-984a-583e-9378-9577a1c2574d" +JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" + +[targets] +test = ["Test", "JSON3", "Deno_jll"] diff --git a/README.md b/README.md new file mode 100644 index 0000000..787144b --- /dev/null +++ b/README.md @@ -0,0 +1,40 @@ +# Pinot.jl + +An implementation of [Operational Transform](https://en.wikipedia.org/wiki/Operational_transformation) for plain text documents using the [Delta format](http://quilljs.com/docs/delta/). + +### Example + +```julia +using Pinot, Test + +initial_text = """ +this is a shared document. +""" + +edits_a = [ + Pinot.retain(10), + Pinot.insert("cool "), +] + +text_a = Pinot.apply(initial_text, edits_a) + +@test text_a == """ +this is a cool shared document. +""" + +edits_b = [ + Pinot.retain(10), + Pinot.delete(6), + Pinot.insert("collaborative"), +] + +@test Pinot.apply(initial_text, edits_b) == """ +this is a collaborative document. +""" + +edits_b_a = Pinot.transform(edits_a, edits_b, Pinot.Left) + +@test Pinot.apply(text_a, edits_b_a) == """ +this is a cool collaborative document. +""" +``` diff --git a/docs/index.jl b/docs/index.jl new file mode 100644 index 0000000..e8f3bd7 --- /dev/null +++ b/docs/index.jl @@ -0,0 +1,341 @@ +### A Pluto.jl notebook ### +# v0.19.41 + +using Markdown +using InteractiveUtils + +# ╔═╡ 00289434-0de8-11ef-2d46-090e108db876 +# ╠═╡ show_logs = false +begin + import Pkg + Pkg.develop(path="..") + Pkg.add("PlutoUI") +end + +# ╔═╡ 753f58db-33ed-4d95-b1b8-f1961a1e92b8 +using Pinot + +# ╔═╡ 2fc12db6-b3b9-4d15-adc8-98e50a328c68 +using PlutoUI + +# ╔═╡ 608e97bb-c538-4f0e-9ba0-f450abe975ea +md""" +# Pinot.jl +""" + +# ╔═╡ aedbe996-1c65-4c47-a1ff-4dbaf46d1615 +md""" +Pinot is a Julia package to perform [Operational Transformation](https://en.wikipedia.org/wiki/Operational_transformation). That is, it offers tools to describe and reconcile plain-text edits to implement collaborative text editing features. Pinot is based on the [Delta format](https://quilljs.com/docs/delta/) to describe documents and edits. + +A Delta is a series of edits, which can be one of the following three sorts: + + - retain + - insert + - delete +""" + +# ╔═╡ 96f3ee4e-6042-43ec-aae9-1f88defac9fa +md""" +## Describing edits +""" + +# ╔═╡ e7f2f5d0-7236-4613-888d-6df27cfb247c +retain(4) + +# ╔═╡ 2ee17e6c-87ea-4dce-b08f-802a1496b8bc +delete(3) + +# ╔═╡ 1a361814-537c-4360-86f9-200a38bd1536 +insert("ok") + +# ╔═╡ 73bfd292-f5e5-4e80-a473-e75ed1034304 +Docs.Binding(Pinot, :retain) + +# ╔═╡ 87add6ba-5a2c-49ee-8d9e-7bddccfe66f3 +let text = "Hello", + changes = [Pinot.retain(text)] + Pinot.apply(text, changes) +end + +# ╔═╡ 3439fb92-0784-4804-9c00-96caca2c002f +Docs.Binding(Pinot, :insert) + +# ╔═╡ f984541f-40d4-46ec-8098-260401c72dd6 +let text = "Hello", + changes = [ + Pinot.retain(5), + Pinot.insert(" World!"), + ] + Pinot.apply(text, changes) +end + +# ╔═╡ 3f5f710c-c9b5-46ad-9aa3-ffc122c4c1c5 +Docs.Binding(Pinot, :delete) + +# ╔═╡ 91546608-bb22-4a8f-8cbc-aaf6d41aadd0 +let text = "Hello", + changes = [ + Pinot.delete(5), + ] + Pinot.apply(text, changes) +end + +# ╔═╡ 7a7d91d3-c3b0-4713-a10b-36b2b81f2978 +md""" +!!! warning + The length of range represents the number of utf16 codepoints. + This is because the Delta format is meant to interoperate with Javascript which uses UTF16 encoding for its string encoding. +""" + +# ╔═╡ 286712bd-0d7c-46b1-b445-e7d206a5e03b +Docs.Binding(Pinot, :Unicode) + +# ╔═╡ 4471a2cb-62be-4359-8d37-9e2a81e0ea6d +md""" +Notice the difference in the following example when using the `ncodeunits(::Char)` base function which returns the number of UTF-8 codeunits. Using `Pinot.Unicode.utf16_ncodeunits(::String)` is required instead to have the right delta length. +""" + +# ╔═╡ 0c941e14-0d18-4a73-9956-69893d024dfe +let text = "🍕 is great", + changes = [ + Pinot.retain(ncodeunits('🍕')), + Pinot.insert("🍍"), + ] + Pinot.apply(text, changes) +end + +# ╔═╡ c280a351-2afa-4f4e-8672-2a779ab78f7f +let text = "🍕 is great", + changes = [ + Pinot.retain(Pinot.Unicode.utf16_ncodeunits('🍕')), + Pinot.insert("🍍"), + ] + Pinot.apply(text, changes) +end + +# ╔═╡ cb211736-7844-457b-9f1d-e9a12694dce3 +md""" +Throughout this section, we have been using the `Pinot.apply(::String, ::Vector{Range})::String` function which applies a set of edits to a string. +""" + +# ╔═╡ ba9390df-5be6-4fe8-b5be-4f32a8392d40 +Docs.Binding(Pinot, :apply) + +# ╔═╡ 0a66aad8-1c36-40d8-8f40-f816e8496774 +md""" +## Operational Transformation +""" + +# ╔═╡ 322ed9ad-e89c-44f7-bd80-e5c56bc051a9 +md""" +The main goal with Operational Transformation is to resolve conflicts between changes which have happened from the same starting documents and make all clients editing the document converge to the same final document. + +Consider a document `A`, with two clients editing it concurrently. Client 1 produces `A₁` and Client 2 produces `A₂`. + +``` +A--C₁-->A₁ +└--C₂-->A₂ +``` +""" + +# ╔═╡ 8473ef3f-6262-40ca-9b41-2e32d0a48a95 +Docs.Binding(Pinot, :transform) + +# ╔═╡ a897ab14-cb68-4868-9be4-2495ba125450 +A = "this is the initial document" + +# ╔═╡ f5fcb583-0035-45e8-bf0d-e5a1f332cb1a +C₁ = [ + retain("this is the "), + delete("initial "), + retain("document"), + insert(" produced by C₁"), +] + +# ╔═╡ 30bf8640-67b6-4c47-ab23-ecee6f177d77 +A₁ = apply(A, C₁) + +# ╔═╡ 40bc82cd-bb4d-4153-abd8-3966b26a5747 +C₂ = [ + retain("this is the "), + delete("initial "), + retain("document"), + insert(" produced by C₂"), +] + +# ╔═╡ aa02313c-1357-4058-9ec9-263fe8a8e8b9 +A₂ = apply(A, C₂) + +# ╔═╡ 4b6b3609-55a7-4fa1-8d41-0191e3c25f1c +md""" +Now let's consider that `C₂` sends its changes over to `C₁`, we want to transform `C₂` so that it starts from `A₁` instead of `A`. + +``` +A--C₁-->A₁--C₂′-->A₁₂ +``` + +!!! note + `transform` takes a third argument to indicate which set of edits logically happened before, this is used to prioritize one change-set over the other in ambiguous cases (ex: 2 inserts at the same position). +""" + +# ╔═╡ 85d1f091-568c-4797-acd5-497df27880fd +C₂′ = transform(C₁, C₂, Pinot.Left) + +# ╔═╡ 7a54928d-7062-4f14-859d-f97132b75c5a +A₁₂ = apply(A₁, C₂′) + +# ╔═╡ 63a7a5a2-e88f-4115-a673-5faf8797477a +md""" +Reciprocally, `C₁` can send its changes over to `C₂` and we want to do a similar transformation to produce `A₂₁` from `C₁′`. + +``` +A--C₂-->A₂--C₁′-->A₂₁ +``` +""" + +# ╔═╡ 6663a046-0a37-456a-bbbf-aa8ac5fa46fe +C₁′ = transform(C₂, C₁, Pinot.Right) + +# ╔═╡ e36daba0-5c33-4ccc-bd84-4c4bb290f3a7 +A₂₁ = apply(A₂, C₁′) + +# ╔═╡ 49fac7e6-a0c2-447c-ba41-b73af0fe9ae9 +md""" +We can now see that the transformation made the two clients converge to the same document. +""" + +# ╔═╡ 56a28ee3-c0b7-4413-b03f-c330ccca1328 +A₁₂ == A₂₁ + +# ╔═╡ 3420f9ae-4542-4de3-bff5-85b08cceea94 +md""" +The `transform_position` function indicates how a position in a document would be moved after applying a set of edits. +""" + +# ╔═╡ 3d37ac51-348c-4dca-b5b2-3ec832e702c8 +transform_position([insert("hello")], 1) + +# ╔═╡ dc112672-1745-4b8c-9d69-a59c8f0523e3 +transform_position([delete(3)], 4) + +# ╔═╡ 0b4cf99a-dca9-43dd-be59-020c5a49c274 +transform_position([retain(10), insert("ok")], 5) + +# ╔═╡ 5162f0db-8246-463c-a941-a1a7d9d300f6 +Docs.Binding(Pinot, :transform_position) + +# ╔═╡ d80c1335-9e40-4e2f-997e-bfe8e16a5d18 +md""" +## Composition + +We have seen that edits describe a change in the document and edits can be used to go from one state of a document to another. Composition can be used to combine two consecutive change-sets in a single one. +""" + +# ╔═╡ a0df881d-2ed1-491b-bc18-02f6766c34c0 +apply(A, compose(C₁, C₂′)) == A₁₂ + +# ╔═╡ 2717c757-e5b1-46cd-8454-d830b19109ce +Docs.Binding(Pinot, :compose) + +# ╔═╡ f77372a7-a8ea-4bf4-bcce-bf392920149b +md""" +## Inversion + +The inverse of a change set can be produce with the `invert` function which also requires the document state before this edit (so that deletes can become inserts). +""" + +# ╔═╡ 1eb1a0bf-cc5e-4d4e-8a64-9b05f7965d60 +apply(A₁, invert(A, C₁)) + +# ╔═╡ cfc29183-c12a-41c2-80b7-8a6e8a56a38a +Docs.Binding(Pinot, :invert) + +# ╔═╡ 747c9f89-945f-4d5d-9b93-376f87f54818 +md""" +## Compactions + +Some change descriptions can contain superfluous elements which can be compacted. + +Notice how the consecutive edits are merged and the trailing retain is removed as it is not required per the Delta specification: +""" + +# ╔═╡ cb25b8f1-9baf-4d07-b815-bb5ff338f07c +Pinot.compact([ + Pinot.insert("a"), + Pinot.insert("b"), + Pinot.insert("c"), + Pinot.delete(1), + Pinot.delete(2), + Pinot.retain(1), +]) + +# ╔═╡ 484f09f2-ed5c-449d-977a-48e211782703 +Docs.Binding(Pinot, :compact) + +# ╔═╡ 24037413-f4b0-4414-82a2-9120e063e705 +md""" +--- +""" + +# ╔═╡ f7c8608a-b299-4948-84ff-59ce466490aa +TableOfContents(include_definitions=true) + +# ╔═╡ dd3e5a52-32e2-4d5f-a9b4-e36eb4987adf + + +# ╔═╡ Cell order: +# ╟─608e97bb-c538-4f0e-9ba0-f450abe975ea +# ╠═753f58db-33ed-4d95-b1b8-f1961a1e92b8 +# ╟─aedbe996-1c65-4c47-a1ff-4dbaf46d1615 +# ╟─96f3ee4e-6042-43ec-aae9-1f88defac9fa +# ╠═e7f2f5d0-7236-4613-888d-6df27cfb247c +# ╠═2ee17e6c-87ea-4dce-b08f-802a1496b8bc +# ╠═1a361814-537c-4360-86f9-200a38bd1536 +# ╟─73bfd292-f5e5-4e80-a473-e75ed1034304 +# ╠═87add6ba-5a2c-49ee-8d9e-7bddccfe66f3 +# ╟─3439fb92-0784-4804-9c00-96caca2c002f +# ╠═f984541f-40d4-46ec-8098-260401c72dd6 +# ╟─3f5f710c-c9b5-46ad-9aa3-ffc122c4c1c5 +# ╠═91546608-bb22-4a8f-8cbc-aaf6d41aadd0 +# ╟─7a7d91d3-c3b0-4713-a10b-36b2b81f2978 +# ╟─286712bd-0d7c-46b1-b445-e7d206a5e03b +# ╟─4471a2cb-62be-4359-8d37-9e2a81e0ea6d +# ╠═0c941e14-0d18-4a73-9956-69893d024dfe +# ╠═c280a351-2afa-4f4e-8672-2a779ab78f7f +# ╟─cb211736-7844-457b-9f1d-e9a12694dce3 +# ╟─ba9390df-5be6-4fe8-b5be-4f32a8392d40 +# ╟─0a66aad8-1c36-40d8-8f40-f816e8496774 +# ╟─322ed9ad-e89c-44f7-bd80-e5c56bc051a9 +# ╟─8473ef3f-6262-40ca-9b41-2e32d0a48a95 +# ╠═a897ab14-cb68-4868-9be4-2495ba125450 +# ╠═f5fcb583-0035-45e8-bf0d-e5a1f332cb1a +# ╠═30bf8640-67b6-4c47-ab23-ecee6f177d77 +# ╠═40bc82cd-bb4d-4153-abd8-3966b26a5747 +# ╠═aa02313c-1357-4058-9ec9-263fe8a8e8b9 +# ╟─4b6b3609-55a7-4fa1-8d41-0191e3c25f1c +# ╠═85d1f091-568c-4797-acd5-497df27880fd +# ╠═7a54928d-7062-4f14-859d-f97132b75c5a +# ╟─63a7a5a2-e88f-4115-a673-5faf8797477a +# ╠═6663a046-0a37-456a-bbbf-aa8ac5fa46fe +# ╠═e36daba0-5c33-4ccc-bd84-4c4bb290f3a7 +# ╟─49fac7e6-a0c2-447c-ba41-b73af0fe9ae9 +# ╠═56a28ee3-c0b7-4413-b03f-c330ccca1328 +# ╟─3420f9ae-4542-4de3-bff5-85b08cceea94 +# ╠═3d37ac51-348c-4dca-b5b2-3ec832e702c8 +# ╠═dc112672-1745-4b8c-9d69-a59c8f0523e3 +# ╠═0b4cf99a-dca9-43dd-be59-020c5a49c274 +# ╟─5162f0db-8246-463c-a941-a1a7d9d300f6 +# ╟─d80c1335-9e40-4e2f-997e-bfe8e16a5d18 +# ╠═a0df881d-2ed1-491b-bc18-02f6766c34c0 +# ╟─2717c757-e5b1-46cd-8454-d830b19109ce +# ╟─f77372a7-a8ea-4bf4-bcce-bf392920149b +# ╠═1eb1a0bf-cc5e-4d4e-8a64-9b05f7965d60 +# ╟─cfc29183-c12a-41c2-80b7-8a6e8a56a38a +# ╟─747c9f89-945f-4d5d-9b93-376f87f54818 +# ╠═cb25b8f1-9baf-4d07-b815-bb5ff338f07c +# ╟─484f09f2-ed5c-449d-977a-48e211782703 +# ╟─24037413-f4b0-4414-82a2-9120e063e705 +# ╟─00289434-0de8-11ef-2d46-090e108db876 +# ╟─2fc12db6-b3b9-4d15-adc8-98e50a328c68 +# ╟─f7c8608a-b299-4948-84ff-59ce466490aa +# ╟─dd3e5a52-32e2-4d5f-a9b4-e36eb4987adf diff --git a/src/Pinot.jl b/src/Pinot.jl new file mode 100644 index 0000000..86a5993 --- /dev/null +++ b/src/Pinot.jl @@ -0,0 +1,16 @@ +module Pinot + +include("./unicode.jl") +include("./delta.jl") +include("./myers.jl") + +using .Unicode: utf16_ncodeunits +using .Delta: retain, insert, delete, apply, compact, invert, + compose, transform, transform_position, Left, Right, Range, + to_obj, from_obj +using .Diff: diff + +export retain, insert, delete, apply, compact, invert, compose, transform, transform_position +# public Range, Left, Right, to_obj, from_obj + +end # module Pinot diff --git a/src/delta.jl b/src/delta.jl new file mode 100644 index 0000000..28a8ec7 --- /dev/null +++ b/src/delta.jl @@ -0,0 +1,404 @@ +module Delta +# References +# https://github.com/codemirror/collab/blob/main/src/collab.ts +# https://codemirror.net/examples/collab/ +# https://github.com/livebook-dev/livebook/blob/main/lib/livebook/delta.ex +# https://www.npmjs.com/package/quill-delta + +import ..Unicode + +@enum RangeType Insert Retain Delete + +""" + retain(n) -> Range + insert(s) -> Range + delete(n) -> Range + +The primitive type to describe a change. A vector of `Range` +represents a [Delta](https://www.npmjs.com/package/quill-delta). + +!!! warning + The length of range represents the number of utf16 codepoints. + This is because the Delta format is meant to interoperate with Javascript which uses + UTF16 encoding for its string encoding. +""" +struct Range + type::RangeType + length::UInt32 + insert::Union{Nothing,String} # for inserts +end + +""" + retain(l::Integer) -> Range + +Retains `l` utf16 codepoints in the starting document. +""" +retain(l) = Range(Retain, l, nothing) + +""" + retain(s::String) -> Range + +Creates a range that retains the same length as `s`. +""" +retain(s::Union{String,SubString{String}}) = retain(Unicode.utf16_ncodeunits(s)) + +""" + insert(s::String) -> Range + +Insertions of string `s`. +""" +insert(s) = Range(Insert, Unicode.utf16_ncodeunits(s), s) + +""" + delete(l::Integer) -> Range + +Deletes `l` utf16 codepoints from the starting document. +""" +delete(l) = Range(Delete, l, nothing) + +""" + delete(s::String) -> Range + +Creates a range that deletes the same length as `s`. +""" +delete(s::Union{String,SubString{String}}) = delete(Unicode.utf16_ncodeunits(s)) + +Base.show(io::IO, r::Delta.Range) = begin + if r.type == Delta.Insert + show(io, Delta.insert) + print(io, "(") + show(io, r.insert) + print(io, ")") + else + show(io, r.type == Delta.Retain ? Delta.retain : Delta.delete) + print(io, "(") + show(io, r.length) + print(io, ")") + end +end + +function retain!(ranges, n) + if !isempty(ranges) && last(ranges).type == Retain + ranges[end] = retain(ranges[end].length + n) + else + push!(ranges, retain(n)) + end + ranges +end + +""" + invert(text, ops::Vector{Range}) -> Vector{Range} + +Given a set of edits on a text, produce the inverse set of edits. + +```julia +Pinot.apply(Pinot.apply(text, edits), Pinot.inverse(edits)) == text +``` +""" +function invert(text, ops::Vector{Range}) + out = similar(ops) + + offset = firstindex(text) + for (i, op) in enumerate(ops) + if op.type == Retain + out[i] = retain(op.length) + offset += op.length + elseif op.type == Insert + out[i] = delete(op.length) + elseif op.type == Delete + r = offset:Unicode.utf16_prevind(text, offset+op.length) + text_to_insert = Unicode.utf16_slice(text, r) + out[i] = insert(text_to_insert) + offset += op.length + end + end + + out +end + +# --- + +struct OpIterator + r::Vector{Range} + i::UInt32 # op index + ℓ::UInt32 # consumed length in r[i] +end +OpIterator(r) = OpIterator(r, firstindex(r), zero(UInt32)) + +function peek_length(it::OpIterator) + it.i > lastindex(it.r) && return typemax(UInt32) + op = it.r[it.i] + op.length - it.ℓ +end +function peek_type(it::OpIterator) + it.i > lastindex(it.r) && return Retain + op = it.r[it.i] + op.type +end + +function has_next(it::OpIterator) + it.i <= lastindex(it.r) +end + +function next(it::OpIterator, ℓ=nothing) + it.i > lastindex(it.r) && return Range(Retain, something(ℓ, typemax(UInt32)), nothing), it + op = it.r[it.i] + if op.type == Insert + ℓ = isnothing(ℓ) ? peek_length(it) : ℓ + new_insert = Unicode.utf16_slice(op.insert, 1+it.ℓ:it.ℓ+ℓ) + r = insert(new_insert) + + ni, nℓ = it.i, it.ℓ + ℓ + if it.ℓ + ℓ == op.length # move to next + ni += 1 + nℓ = 0 + end + return r, OpIterator(it.r, ni, nℓ) + end + ty = op.type + ℓ = isnothing(ℓ) ? peek_length(it) : ℓ + ni, nℓ = it.i, it.ℓ + ℓ + r = Range(ty, ℓ, nothing) + if it.ℓ + ℓ == op.length # move to next + ni += 1 + nℓ = 0 + end + r, OpIterator(it.r, ni, nℓ) +end + +# --- + +@enum Priority Left Right + +""" + transform(a::Vector{Range}, b::Vector{Range}, priority=Left) -> Vector{Range} + +Produces a version of `b` transformed over `a` such that: + +```julia +Pinot.apply(Pinot.apply(text, a), Pinot.transform(a, b, Pinot.Left)) == + Pinot.apply(Pinot.apply(text, b), Pinot.transform(b, a, Pinot.Right)) +``` + +`priority` is used to indicate which change happened before in conflict resolution. +""" +function transform(a, b, priority=Left) + out = Range[] + + before = priority === Left + + itA = OpIterator(a) + itB = OpIterator(b) + + while has_next(itA) || has_next(itB) + if peek_type(itA) == Insert && (before || peek_type(itB) != Insert) + ca, itA = next(itA) + retain!(out, Unicode.utf16_ncodeunits(ca.insert)) + elseif peek_type(itB) == Insert + cb, itB = next(itB) + push!(out, cb) + else + # ca, cb are either Retain or Delete + ℓ = min(peek_length(itA), peek_length(itB)) + + if peek_type(itA) == Delete + # our delete either makes their delete redundant or removes their retain + elseif peek_type(itB) == Delete + push!(out, delete(ℓ)) + else + # ca and cb are Retain + retain!(out, ℓ) + end + + _, itA = next(itA, ℓ) + _, itB = next(itB, ℓ) + end + end + + out +end + +""" + compose(a::Vector{Range}, b::Vector{Range}) -> Vector{Range} + +Returns a set of changes equivalent to sequentially applying `a` then `b`. + +```julia +Pinot.apply(Pinot.apply(text, a), b) == + Pinot.apply(text, Pinot.compose(a, b)) +``` +""" +function compose(a, b) + out = Range[] + + itA = OpIterator(a) + itB = OpIterator(b) + + while has_next(itA) || has_next(itB) + if peek_type(itB) == Insert # inserts in B are unconditional + cb, itB = next(itB) + push!(out, cb) + elseif peek_type(itA) == Delete # deletes in A are unconditional + ca, itA = next(itA) + push!(out, ca) + else + ℓ = min(peek_length(itA), peek_length(itB)) + ca, itA = next(itA, ℓ) + cb, itB = next(itB, ℓ) + if cb.type == Retain + push!(out, ca) + elseif cb.type == Delete && ca.type == Retain + push!(out, cb) + end + end + end + + out +end + +""" + apply(text::String, ranges::Vector{Range}) -> String + +Applies the changes to a text. +""" +function apply(s::String, ranges::Vector{Range}) + out = SubString{String}[] + current_pos = firstindex(s) + + N = Unicode.utf16_ncodeunits(s) + + for r in ranges + if r.type == Retain + i = min(N, Unicode.utf16_prevind(s, current_pos + r.length)) + # @show s N i current_pos + v = Unicode.utf16_view(s, current_pos:i) + # @show v current_pos i r.length + push!(out, v) + current_pos += r.length + elseif r.type == Delete + current_pos += r.length + elseif r.type == Insert + push!(out, r.insert) + end + end + + # retain the end + if current_pos <= N + push!(out, Unicode.utf16_view(s, current_pos:N)) + end + + join(out) +end + +""" + transform_position(ops::Vector{Range}, pos) -> Int + +Returns a new value for position after applying the changes +described by `ops`. We move the position to the right of an +insert. + +```julia +julia> Pinot.transform_position([retain(1), insert("a")], 1) == 2 +true +``` +""" +function transform_position(ops::Vector{Range}, pos) + # offset is current position in delta. + offset = 0 + for op in ops + offset > pos && return pos + + if op.type == Delete + pos = max(pos - op.length, offset) + elseif op.type == Retain + # does not affect position + offset += op.length + elseif op.type == Insert + # affects both position and offset in delta + pos += op.length + offset += op.length + end + end + + pos +end + +""" + compact(ops::Vector{Range}) -> Vector{Range} + +Returns a compacted set of changes which has the same effects +as `ops`. +""" +function compact(ops::Vector{Range}) + isempty(ops) && return copy(ops) + + out = Vector{Range}() + + i = firstindex(ops) + while i <= lastindex(ops) + op = ops[i] + + if !isempty(out) + prev = last(out) + if op.type == Retain && prev.type == Retain + out[end] = retain(prev.length + op.length) + elseif op.type == Delete && prev.type == Delete + out[end] = delete(prev.length + op.length) + elseif op.type == Insert && prev.type == Insert + out[end] = insert(prev.insert * op.insert) + elseif op.type == Insert && prev.type == Delete + # Normalize by putting inserts before deletes + out[end] = op + push!(out, prev) + else + push!(out, op) + end + else + push!(out, op) + end + + i = nextind(ops, i) + end + + while !isempty(out) && last(out).type == Retain + pop!(out) + end + + out +end + +# --- Js serialization --- + +function to_obj(op::Range) + if op.type == Retain + (; retain=op.length) + elseif op.type == Delete + (; delete=op.length) + elseif op.type == Insert + (; insert=op.insert::String) + end +end + +function to_obj(ops::Vector{Range}) + ops = map(to_obj, ops) + (; ops=ops) +end + +function from_obj(obj) + ops = obj["ops"] + map(ops) do op + if haskey(op, "retain") + retain(op["retain"]) + elseif haskey(op, "delete") + delete(op["delete"]) + elseif haskey(op, "insert") + insert(op["insert"]) + else + error("invalid op $op") + end + end +end + +export apply, invert, delete, retain, insert, compose, transform, transform_position, compact + +end # module Delta diff --git a/src/myers.jl b/src/myers.jl new file mode 100644 index 0000000..ea9159f --- /dev/null +++ b/src/myers.jl @@ -0,0 +1,107 @@ +module Diff + +import ..Pinot: Delta, Unicode + +""" + diff(a::String, b::String) -> Vector{Pinot.Range} + +An implementation of the Myers algorithm as proposed in [1]. + +[1]E. Myers (1986). "An O(ND) Difference Algorithm and Its Variations". + Algorithmica. 1 (2): 251–266. doi:10.1007/BF01840446. S2CID 6996809. +""" +function diff(a, b) + isempty(a) && return Delta.Range[Delta.insert(b)] + isempty(b) && return Delta.Range[Delta.delete(Unicode.utf16_ncodeunits(a))] + + trace = ses(a, b) + moves = backtrack(trace, a, b) + Delta.compact(apply_edits(moves, a, b)) +end + +function ses(a, b) + N = length(a) + M = length(b) + max = N + M + + # Trace is a path through the graph + trace = Vector{Int}[] + v = zeros(Int, 2*max+2) + + for d in 0:max + push!(trace, copy(v)) + for k in -d:2:d + x = if k == -d || (k != d && v[k+max] < v[k+2+max]) + v[k+2+max] + else + v[k+max]+1 + end + y = x - k + + #TODO: this nextind perf must be high since we start from the beginning + while x < N && y < M && a[nextind(a,1,x)] == b[nextind(b,1,y)] + x += 1 + y += 1 + end + + v[k+max+1] = x + + if x >= N && y >= M + return trace + end + end + end + throw("length of a ses is greater than max") +end + +function backtrack(trace, a, b) + x, y = length(a), length(b) + max = length(a) + length(b) + + moves = Tuple{Int,Int,Int,Int}[] + for (d,v) in Iterators.reverse(enumerate(trace)) + d = d-1 + k = x - y + + prev_k = if k == -d || (k != d && v[k+max] < v[k+2+max]) + k + 1 + else + k - 1 + end + prev_x = v[prev_k+1+max] + prev_y = prev_x - prev_k + + while x > prev_x && y > prev_y + push!(moves, (x-1, y-1, x, y)) + x, y = x - 1, y - 1 + end + + d > 0 && push!(moves, (prev_x, prev_y, x, y)) + + x, y = prev_x, prev_y + end + + moves +end + +function apply_edits(moves, a, b) + diffs = Delta.Range[] + for (prev_x, prev_y, x, y) in reverse(moves) + a_line = get(a, nextind(a,0,prev_x+1), a[1]) + b_line = get(b, nextind(b,0,prev_y+1), a[1]) + + if x == prev_x + push!(diffs, Delta.insert(string(b_line))) + elseif y == prev_y + push!(diffs, Delta.delete(Unicode.utf16_size(a_line))) + else + push!(diffs, Delta.retain(Unicode.utf16_size(b_line))) + end + end + + diffs +end + +export diff + +end # module Diff diff --git a/src/unicode.jl b/src/unicode.jl new file mode 100644 index 0000000..730eade --- /dev/null +++ b/src/unicode.jl @@ -0,0 +1,35 @@ +""" +Helpers to work with UTF-16 codepoint indices on **valid** UTF-8 backed strings. +""" +module Unicode + +# Returns the number of UTF-16 codepoints associated with a Julia Char that is assumed valid UTF-8 +# the core insight is that UTF-8 and UTF-16 share the same character range for 4 and 2 codepoints +# respectively. That is, 1,2 and 3 bytes characters in UTF-8 will be 1 UTF-16 codepoints whereas +# only 4 bytes UTF-8 characters are 2 UTF-16 codepoints (the range from U+010000 to U+10FFFF). +utf16_size(c) = 1 + (convert(UInt32, c) >= 0x010000) +utf16_ncodeunits(s) = sum(utf16_size, s; init=0) + +function utf8_idx(s, u16) + u16 == 0 && return u16 + i = firstindex(s) + for c in s + u16 -= utf16_size(c) + u16 <= 0 && return i + i += ncodeunits(c) + end + i +end + +function utf16_prevind(s, idx) + idx == 0 && return idx + u8 = utf8_idx(s, idx) + u8 == 0 && return 0 + u8 = prevind(s, u8) + u8 == 0 ? 0 : idx - utf16_size(s[u8]) +end + +utf16_slice(s, r) = string(utf16_view(s, r)) +utf16_view(s, r) = view(s, max(utf8_idx(s, first(r)),firstindex(s)):utf8_idx(s, last(r))) + +end # module Unicode diff --git a/test/quill.jl b/test/quill.jl new file mode 100644 index 0000000..5b079cc --- /dev/null +++ b/test/quill.jl @@ -0,0 +1,41 @@ +using JSON3, Deno_jll + +function send_command!(p, cmd) + write(p, JSON3.write(cmd)) + write(p, '\n') + JSON3.read(readuntil(p, '\n')) +end + +launch() = open(`$(deno()) run $(joinpath(@__DIR__, "quill.js"))`; write=true, read=true) +function quill_transform(p, text, a, b; priority=:left) + res = send_command!(p, (; + header="transform", priority=priority === Delta.Left, + text, a=Delta.to_obj(a), b=Delta.to_obj(b))) + res[:newtext], Delta.from_obj(res[:ops]) +end + +@testset "Delta - JS interop" begin + p = launch() + + edits_a = [Delta.retain(5)] + edits_b = [Delta.delete(2)] + + edits_b_a = Delta.transform(edits_a, edits_b, :right) + + text = "hello" + newtext, expected = quill_transform(p, text, edits_a, edits_b) + @test newtext == Pinot.apply(Pinot.apply(text, edits_a), edits_b_a) + @test expected == Delta.compact(edits_b_a) + + text = "🌅 this is operational transform" + text_a = "this is optimal transport 🚚" + text_b = "there is operational transform 🌐" + + edits_a = Pinot.diff(text, text_a) + edits_b = Pinot.diff(text, text_b) + + edits_b_a = Pinot.transform(edits_a, edits_b, Delta.Left) |> Pinot.compact + newtext, expected = quill_transform(p, text, edits_a, edits_b; priority=Pinot.Delta.Left) + @test newtext == Pinot.apply(text_a, edits_b_a) + @test Delta.compact(edits_b_a) == expected +end diff --git a/test/quill.js b/test/quill.js new file mode 100644 index 0000000..6273aaa --- /dev/null +++ b/test/quill.js @@ -0,0 +1,29 @@ +import {decode} from "https://deno.land/std@0.204.0/encoding/base64.ts"; +import {TextLineStream} from "https://deno.land/std@0.204.0/streams/mod.ts"; +import Delta from "npm:quill-delta" + +const lines = Deno.stdin.readable.pipeThrough(new TextDecoderStream()) + .pipeThrough(new TextLineStream()); + +let exit = false; +for await (let line of lines) { + const msg = JSON.parse(line); + switch (msg.header) { + case "transform": { + let text = new Delta().insert(msg["text"]) + let a = new Delta(msg["a"]); + let b = new Delta(msg["b"]); + let res = a.transform(b, msg["priority"]) + let newtext = text.compose(a).compose(res).ops[0].insert + console.log(JSON.stringify({ops : res, newtext})) + }; break; + case "exit": + exit = true; + break; + default: + throw (`invalid header ${msg.header}`); + } + if (exit) { + break; + } +} \ No newline at end of file diff --git a/test/runtests.jl b/test/runtests.jl new file mode 100644 index 0000000..b3b360e --- /dev/null +++ b/test/runtests.jl @@ -0,0 +1,166 @@ +using Test, Pinot +using Pinot: Delta + +include("./quill.jl") + +@testset "Delta - compact" begin + @testset "should remove trailing remain" begin + @test isempty(Delta.compact([Delta.retain(10)])) + end + + @testset "should fuse inserts" begin + @test Delta.compact([ + Delta.insert("hello"), + Delta.insert(" "), + Delta.insert("world"), + ]) == [Delta.insert("hello world")] + end + + @testset "should fuse deletes" begin + @test Delta.compact([ + Delta.delete(10), + Delta.delete(4), + Delta.retain(5), + ]) == [Delta.delete(14)] + end + + @testset "should put inserts before deletes" begin + @test Delta.compact([ + Delta.delete(2), + Delta.insert("ok"), + ]) == [Delta.insert("ok"), Delta.delete(2)] + end +end + +@testset "Delta - apply" begin + for (sa, sb) in [ + ("hello", "hola"), + ("🍕", "🍍"), + ("ok🍍ok🍕", "odsql🍍, sdkdq"), + ("i like pizza", "you like mezze"), + ("", "ok"), + ("ok", ""), + ] + ops = Pinot.diff(sa, sb) + res = Pinot.apply(sa, ops) + + @test res == sb + end + + @testset "apply empty" begin + @test Pinot.apply("hello!", Delta.Range[]) == "hello!" + end + + @test Pinot.apply("a", Pinot.Range[retain(1)]) == "a" + @test Pinot.apply("", Pinot.Range[]) == "" + @test Pinot.apply("", [Pinot.retain(0)]) == "" + @test Pinot.apply("", [Pinot.retain(typemax(UInt32))]) == "" + + @test Pinot.apply("🍕s", [Pinot.retain(3), insert("x")]) == "🍕sx" +end + +@testset "Delta - transform position" begin + text = "hello |how are you?" + + edits = [ + Delta.retain(3), + Delta.insert("lo"), + Delta.delete(2), + Delta.insert(" bob,"), + ] + + pos = findfirst(==('|'), text) + text_no_pos = filter(!=('|'), text) + + new_pos = Delta.transform_position(edits, pos) + + new_text = Delta.apply(text_no_pos, edits) + @test new_text == "hello bob, how are you?" + + new_text_with_pos = new_text[begin:prevind(new_text,new_pos)] * '|' * new_text[new_pos:end] + @test new_text_with_pos == "hello bob, |how are you?" + + @test Delta.transform_position(Delta.Range[], 0) == 0 + @test Delta.transform_position(Delta.Range[insert("a")], 0) == 1 + @test Delta.transform_position(Delta.Range[retain(1), delete(1), insert("a")], 2) == 2 +end + +@testset "Delta - invert" begin + text = "hello" + edits = [ + Delta.retain(4), + Delta.delete(1), + Delta.insert("a"), + ] + new_text = Delta.apply(text, edits) + @test text == Delta.apply(new_text, Delta.invert(text, edits)) +end + +@testset "Delta - internal OpIterator" begin + it = Delta.OpIterator([Delta.insert("hello")]) + op, new_it = Delta.next(it, 3) + + @test op.type == Delta.Insert + @test op.insert == "hel" + + op, new_it = Delta.next(new_it) + @test op.type == Delta.Insert + @test op.insert == "lo" + + @test Delta.peek_type(new_it) == Delta.Retain +end + +@testset "README.md example" begin + initial_text = """ + this is a shared document. + """ + + edits_a = [ + Pinot.retain(10), + Pinot.insert("cool "), + ] + + text_a = Pinot.apply(initial_text, edits_a) + + @test text_a == """ + this is a cool shared document. + """ + + edits_b = [ + Pinot.retain(10), + Pinot.delete(6), + Pinot.insert("collaborative"), + ] + + @test Pinot.apply(initial_text, edits_b) == """ + this is a collaborative document. + """ + + edits_b_a = Pinot.transform(edits_a, edits_b, Delta.Left) + + final_text = """ + this is a cool collaborative document. + """ + @test Pinot.apply(text_a, edits_b_a) == final_text + + @test Pinot.apply(initial_text, Delta.compose(edits_a, edits_b_a)) == final_text +end + +@testset "Fuzz" begin + for i in 1:10 + @testset let l = rand(10:30), + l1 = rand(10:30), + l2 = rand(10:30), + s = join(rand(('a':'z') ∪ ('A':'Z') ∪ ('🍍':'😎') ∪ ('γ':-1:'α'), l)), + s1 = join(rand(('a':'z') ∪ ('A':'Z') ∪ ('🍍':'😎') ∪ ('γ':-1:'α'), l1)), + s2 = join(rand(('a':'z') ∪ ('A':'Z') ∪ ('🍍':'😎') ∪ ('γ':-1:'α'), l2)), + e1 = Pinot.diff(s,s1), + e2 = Pinot.diff(s,s2) + + @test s1 == Pinot.apply(s, e1) + @test s2 == Pinot.apply(s, e2) + @test Pinot.apply(s1, Pinot.transform(e1,e2,Pinot.Left)) == + Pinot.apply(s2, Pinot.transform(e2,e1,Pinot.Right)) + end + end +end