Skip to content

Commit

Permalink
Merge pull request #1 from linusdm/diff_symbol_first_character_of_line
Browse files Browse the repository at this point in the history
Only consider the first character as the diff symbol to avoid ambiguous interpretations
  • Loading branch information
sorentwo authored Sep 16, 2024
2 parents c0fb449 + ee1083d commit a28ed29
Show file tree
Hide file tree
Showing 3 changed files with 83 additions and 57 deletions.
52 changes: 11 additions & 41 deletions lib/makeup/lexers/diff_lexer.ex
Original file line number Diff line number Diff line change
Expand Up @@ -6,56 +6,26 @@ defmodule Makeup.Lexers.DiffLexer do
@behaviour Makeup.Lexer

import NimbleParsec
import Makeup.Lexer.Combinators
import Makeup.Lexers.DiffLexer.Helper

whitespace =
[?\r, ?\s, ?\n, ?\f]
|> ascii_string(min: 1)
|> token(:whitespace)
heading = line_starting_with(["diff", "index"], :generic_heading)
inserted = line_starting_with(["+", ">"], :generic_inserted)
deleted = line_starting_with(["-", "<"], :generic_deleted)
strong = line_starting_with("!", :generic_strong)

line = utf8_string([{:not, ?\n}, {:not, ?\r}], min: 1)
line =
choice([heading, inserted, deleted, strong, text_line()])
|> map(:add_meta_diff_language)

inserted =
[string("+"), string(">")]
|> choice()
|> concat(line)
|> token(:generic_inserted)

deleted =
[string("-"), string("<")]
|> choice()
|> concat(line)
|> token(:generic_deleted)

strong =
"!"
|> string()
|> concat(line)
|> token(:generic_strong)

heading =
[string("diff"), string("index")]
|> choice()
|> concat(line)
|> token(:generic_heading)

text = token(line, :text)

root_element_combinator = choice([whitespace, heading, inserted, deleted, strong, text])

@doc false
def __as_diff_language__({type, meta, value}) do
defp add_meta_diff_language({type, meta, value}) do
{type, Map.put(meta, :language, :diff), value}
end

@impl Makeup.Lexer
defparsec(
:root_element,
root_element_combinator |> map({__MODULE__, :__as_diff_language__, []})
)
defparsec(:root_element, line |> optional(newline()))

@impl Makeup.Lexer
defparsec(:root, repeat(parsec(:root_element)))
defparsec(:root, repeat(line |> newline()) |> choice([eos(), line]))

@impl Makeup.Lexer
def postprocess(tokens, _opts \\ []), do: tokens
Expand Down
32 changes: 32 additions & 0 deletions lib/makeup/lexers/diff_lexer/helper.ex
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
defmodule Makeup.Lexers.DiffLexer.Helper do
@moduledoc false

import NimbleParsec
import Makeup.Lexer.Combinators

def line_starting_with(start, token_type) when is_binary(start) do
string(start)
|> rest_of_line()
|> token(token_type)
end

def line_starting_with([_, _ | _] = start, token_type) do
start
|> Enum.map(&string/1)
|> choice()
|> rest_of_line()
|> token(token_type)
end

def text_line() do
rest_of_line() |> token(:text)
end

defp rest_of_line(combinator \\ empty()) do
utf8_string(combinator, [not: ?\n, not: ?\r], min: 0)
end

def newline(combinator \\ empty()) do
concat(combinator, ascii_string([?\n, ?\r], min: 1) |> token(:whitespace))
end
end
56 changes: 40 additions & 16 deletions test/makeup/lexers/diff_lexer_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,13 @@ defmodule Makeup.Lexers.DiffLexerTest do
end
end

property "lexting a string with an insertion" do
property "lexing a string with an insertion" do
check all text <- inserted() do
assert [{:generic_inserted, %{}, ^text} | _] = lex(text)
end
end

property "lexting a string with a deletion" do
property "lexing a string with a deletion" do
check all text <- deleted() do
assert [{:generic_deleted, %{}, ^text}] = lex(text)
end
Expand Down Expand Up @@ -72,37 +72,61 @@ defmodule Makeup.Lexers.DiffLexerTest do
+++ b/setup
@@ -11,16 +11,22 @@ context line
unchanged
+ inserted
- deleted
> inserted
< deleted
+inserted
-deleted
>inserted
<deleted
"""

lexed =
text
|> lex()
|> Enum.reject(fn {type, _, _} -> type == :whitespace end)
lexed = lex(text, omit_whitespaces: true)

assert [
{:generic_heading, %{}, "diff --git a/setup"},
{:generic_heading, %{}, "index aaf4004c0f..0287685d2d 100755"},
{:generic_deleted, %{}, "--- a/setup"},
{:generic_inserted, %{}, "+++ b/setup"},
{:text, %{}, "@@ -11,16 +11,22 @@ context line"},
{:text, %{}, "unchanged"},
{:generic_inserted, %{}, "+ inserted"},
{:generic_deleted, %{}, "- deleted"},
{:generic_inserted, %{}, "> inserted"},
{:generic_deleted, %{}, "< deleted"}
{:text, %{}, " unchanged"},
{:generic_inserted, %{}, "+inserted"},
{:generic_deleted, %{}, "-deleted"},
{:generic_inserted, %{}, ">inserted"},
{:generic_deleted, %{}, "<deleted"}
] = lexed
end

test "marker expected in first position of each line" do
text = """
+text
-text
+-inserted
<>deleted
<text />
"""

lexed = lex(text, omit_whitespaces: true)

assert [
{:text, %{}, " +text"},
{:text, %{}, " -text"},
{:generic_inserted, %{}, "+-inserted"},
{:generic_deleted, %{}, "<>deleted"},
{:text, %{}, " <text />"}
] = lexed
end
end

defp lex(text) do
defp lex(text, opts \\ []) do
text
|> DiffLexer.lex(group_prefix: "group")
|> Postprocess.token_values_to_binaries()
|> Enum.map(fn {type, meta, value} -> {type, Map.delete(meta, :language), value} end)
|> then(fn tokens ->
if Keyword.get(opts, :omit_whitespaces, false) do
Enum.reject(tokens, fn {type, _, _} -> type == :whitespace end)
else
tokens
end
end)
end

# Properties
Expand Down

0 comments on commit a28ed29

Please sign in to comment.