From 8eba863f8fcd5876b13e66ef8d61ea29143426b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Valim?= Date: Mon, 25 Nov 2024 14:46:14 +0100 Subject: [PATCH] Properly track stab inside container_cursor_to_quoted, closes #13826 --- lib/elixir/lib/code/fragment.ex | 117 ++++++++++++++---- lib/elixir/src/elixir.erl | 4 +- lib/elixir/src/elixir_interpolation.erl | 13 +- lib/elixir/src/elixir_tokenizer.erl | 19 +-- lib/elixir/test/elixir/code_fragment_test.exs | 55 ++++++-- 5 files changed, 156 insertions(+), 52 deletions(-) diff --git a/lib/elixir/lib/code/fragment.ex b/lib/elixir/lib/code/fragment.ex index c78b437dfd4..75a9cf325a3 100644 --- a/lib/elixir/lib/code/fragment.ex +++ b/lib/elixir/lib/code/fragment.ex @@ -1099,6 +1099,13 @@ defmodule Code.Fragment do iex> Code.Fragment.container_cursor_to_quoted("foo +") {:ok, {:+, [line: 1], [{:foo, [line: 1], nil}, {:__cursor__, [line: 1], []}]}} + In order to parse the left-side of `->` properly, which appears both + in anonymous functions and do-end blocks, the trailing fragment option + must be given with the rest of the contents: + + iex> Code.Fragment.container_cursor_to_quoted("fn x", trailing_fragment: " -> :ok end") + {:ok, {:fn, [line: 1], [{:->, [line: 1], [[{:__cursor__, [line: 1], []}], :ok]}]}} + ## Options * `:file` - the filename to be reported in case of parsing errors. @@ -1121,46 +1128,108 @@ defmodule Code.Fragment do * `:literal_encoder` - a function to encode literals in the AST. See the documentation for `Code.string_to_quoted/2` for more information. + * `:trailing_fragment` (since v1.18.0) - the rest of the contents after + the cursor. This is necessary to correctly complete anonymous functions + and the left-hand side of `->` + """ @doc since: "1.13.0" @spec container_cursor_to_quoted(List.Chars.t(), keyword()) :: {:ok, Macro.t()} | {:error, {location :: keyword, binary | {binary, binary}, binary}} def container_cursor_to_quoted(fragment, opts \\ []) do + {trailing_fragment, opts} = Keyword.pop(opts, :trailing_fragment) opts = Keyword.take(opts, [:columns, :token_metadata, :literal_encoder]) - opts = [cursor_completion: true, emit_warnings: false] ++ opts + opts = [check_terminators: {:cursor, []}, emit_warnings: false] ++ opts file = Keyword.get(opts, :file, "nofile") line = Keyword.get(opts, :line, 1) column = Keyword.get(opts, :column, 1) case :elixir_tokenizer.tokenize(to_charlist(fragment), line, column, opts) do + {:ok, line, column, _warnings, rev_tokens, rev_terminators} + when trailing_fragment == nil -> + {rev_tokens, rev_terminators} = + with [close, open, {_, _, :__cursor__} = cursor | rev_tokens] <- rev_tokens, + {_, [_ | after_fn]} <- Enum.split_while(rev_terminators, &(elem(&1, 0) != :fn)), + true <- maybe_missing_stab?(rev_tokens), + [_ | rev_tokens] <- Enum.drop_while(rev_tokens, &(elem(&1, 0) != :fn)) do + {[close, open, cursor | rev_tokens], after_fn} + else + _ -> {rev_tokens, rev_terminators} + end + + tokens = reverse_tokens(line, column, rev_tokens, rev_terminators) + :elixir.tokens_to_quoted(tokens, file, opts) + {:ok, line, column, _warnings, rev_tokens, rev_terminators} -> - tokens = :lists.reverse(rev_tokens, rev_terminators) - - case :elixir.tokens_to_quoted(tokens, file, opts) do - {:ok, ast} -> - {:ok, ast} - - {:error, error} -> - # In case parsing fails, we give it another shot but handling fn/do/else/catch/rescue/after. - tokens = - :lists.reverse( - rev_tokens, - [{:stab_op, {line, column, nil}, :->}, {nil, {line, column + 2, nil}}] ++ - Enum.map(rev_terminators, fn tuple -> - {line, column, info} = elem(tuple, 1) - put_elem(tuple, 1, {line, column + 5, info}) - end) - ) - - case :elixir.tokens_to_quoted(tokens, file, opts) do - {:ok, ast} -> {:ok, ast} - {:error, _} -> {:error, error} - end - end + tokens = + with {before_start, [_ | _] = after_start} <- + Enum.split_while(rev_terminators, &(elem(&1, 0) not in [:do, :fn])), + true <- maybe_missing_stab?(rev_tokens), + opts = + Keyword.put(opts, :check_terminators, {:cursor, before_start}), + {:error, {meta, _, ~c"end"}, _rest, _warnings, trailing_rev_tokens} <- + :elixir_tokenizer.tokenize(to_charlist(trailing_fragment), line, column, opts) do + trailing_tokens = + reverse_tokens(meta[:line], meta[:column], trailing_rev_tokens, after_start) + + Enum.reverse(rev_tokens, drop_tokens(trailing_tokens, 0)) + else + _ -> reverse_tokens(line, column, rev_tokens, rev_terminators) + end + + :elixir.tokens_to_quoted(tokens, file, opts) {:error, info, _rest, _warnings, _so_far} -> {:error, :elixir.format_token_error(info)} end end + + defp reverse_tokens(line, column, tokens, terminators) do + {terminators, _} = + Enum.map_reduce(terminators, column, fn {start, _, _}, column -> + atom = :elixir_tokenizer.terminator(start) + + {{atom, {line, column, nil}}, column + length(Atom.to_charlist(atom))} + end) + + Enum.reverse(tokens, terminators) + end + + defp drop_tokens([{:"}", _} | _] = tokens, 0), do: tokens + defp drop_tokens([{:"]", _} | _] = tokens, 0), do: tokens + defp drop_tokens([{:")", _} | _] = tokens, 0), do: tokens + defp drop_tokens([{:">>", _} | _] = tokens, 0), do: tokens + defp drop_tokens([{:end, _} | _] = tokens, 0), do: tokens + defp drop_tokens([{:",", _} | _] = tokens, 0), do: tokens + defp drop_tokens([{:stab_op, _, :->} | _] = tokens, 0), do: tokens + + defp drop_tokens([{:"}", _} | tokens], counter), do: drop_tokens(tokens, counter - 1) + defp drop_tokens([{:"]", _} | tokens], counter), do: drop_tokens(tokens, counter - 1) + defp drop_tokens([{:")", _} | tokens], counter), do: drop_tokens(tokens, counter - 1) + defp drop_tokens([{:">>", _} | tokens], counter), do: drop_tokens(tokens, counter - 1) + defp drop_tokens([{:end, _} | tokens], counter), do: drop_tokens(tokens, counter - 1) + + defp drop_tokens([{:"{", _} | tokens], counter), do: drop_tokens(tokens, counter + 1) + defp drop_tokens([{:"[", _} | tokens], counter), do: drop_tokens(tokens, counter + 1) + defp drop_tokens([{:"(", _} | tokens], counter), do: drop_tokens(tokens, counter + 1) + defp drop_tokens([{:"<<", _} | tokens], counter), do: drop_tokens(tokens, counter + 1) + defp drop_tokens([{:fn, _} | tokens], counter), do: drop_tokens(tokens, counter + 1) + defp drop_tokens([{:do, _} | tokens], counter), do: drop_tokens(tokens, counter + 1) + + defp drop_tokens([_ | tokens], counter), do: drop_tokens(tokens, counter) + defp drop_tokens([], 0), do: [] + + defp maybe_missing_stab?([{:after, _} | _]), do: true + defp maybe_missing_stab?([{:do, _} | _]), do: true + defp maybe_missing_stab?([{:fn, _} | _]), do: true + defp maybe_missing_stab?([{:else, _} | _]), do: true + defp maybe_missing_stab?([{:catch, _} | _]), do: true + defp maybe_missing_stab?([{:rescue, _} | _]), do: true + + defp maybe_missing_stab?([{:stab_op, _, :->} | _]), do: false + defp maybe_missing_stab?([{:eol, _}, next | _]) when elem(next, 0) != :",", do: false + + defp maybe_missing_stab?([_ | tail]), do: maybe_missing_stab?(tail) + defp maybe_missing_stab?([]), do: false end diff --git a/lib/elixir/src/elixir.erl b/lib/elixir/src/elixir.erl index b53f67017af..b0bc9b2bc6f 100644 --- a/lib/elixir/src/elixir.erl +++ b/lib/elixir/src/elixir.erl @@ -460,8 +460,8 @@ quoted_to_erl(Quoted, ErlS, ExS, Env) -> string_to_tokens(String, StartLine, StartColumn, File, Opts) when is_integer(StartLine), is_binary(File) -> case elixir_tokenizer:tokenize(String, StartLine, StartColumn, Opts) of - {ok, _Line, _Column, [], Tokens, Terminators} -> - {ok, lists:reverse(Tokens, Terminators)}; + {ok, _Line, _Column, [], Tokens, []} -> + {ok, lists:reverse(Tokens)}; {ok, _Line, _Column, Warnings, Tokens, Terminators} -> (lists:keyfind(emit_warnings, 1, Opts) /= {emit_warnings, false}) andalso [elixir_errors:erl_warn(L, File, M) || {L, M} <- lists:reverse(Warnings)], diff --git a/lib/elixir/src/elixir_interpolation.erl b/lib/elixir/src/elixir_interpolation.erl index 934883201d4..63d918c8193 100644 --- a/lib/elixir/src/elixir_interpolation.erl +++ b/lib/elixir/src/elixir_interpolation.erl @@ -64,7 +64,8 @@ extract([$#, ${ | Rest], Buffer, Output, Line, Column, Scope, true, Last) -> {error, Reason}; {ok, EndLine, EndColumn, Warnings, Tokens, Terminators} when Scope#elixir_tokenizer.cursor_completion /= false -> NewScope = Scope#elixir_tokenizer{warnings=Warnings, cursor_completion=noprune}, - Output2 = build_interpol(Line, Column, EndLine, EndColumn, lists:reverse(Tokens, Terminators), Output1), + {CursorTerminators, _} = cursor_complete(EndLine, EndColumn, Terminators), + Output2 = build_interpol(Line, Column, EndLine, EndColumn, lists:reverse(Tokens, CursorTerminators), Output1), extract([], [], Output2, EndLine, EndColumn, NewScope, true, Last); {ok, _, _, _, _, _} -> {error, {string, Line, Column, "missing interpolation terminator: \"}\"", []}} @@ -117,6 +118,16 @@ strip_horizontal_space([H | T], Buffer, Counter) when H =:= $\s; H =:= $\t -> strip_horizontal_space(T, Buffer, Counter) -> {T, Buffer, Counter}. +cursor_complete(Line, Column, Terminators) -> + lists:mapfoldl( + fun({Start, _, _}, AccColumn) -> + End = elixir_tokenizer:terminator(Start), + {{End, {Line, AccColumn, nil}}, AccColumn + length(erlang:atom_to_list(End))} + end, + Column, + Terminators + ). + %% Unescape a series of tokens as returned by extract. unescape_tokens(Tokens) -> diff --git a/lib/elixir/src/elixir_tokenizer.erl b/lib/elixir/src/elixir_tokenizer.erl index 319e29ad702..2b3bc05d03d 100644 --- a/lib/elixir/src/elixir_tokenizer.erl +++ b/lib/elixir/src/elixir_tokenizer.erl @@ -115,9 +115,9 @@ tokenize(String, Line, Column, Opts) -> Scope = lists:foldl(fun ({check_terminators, false}, Acc) -> - Acc#elixir_tokenizer{terminators=none}; - ({cursor_completion, true}, Acc) -> - Acc#elixir_tokenizer{cursor_completion=prune_and_cursor}; + Acc#elixir_tokenizer{cursor_completion=false, terminators=none}; + ({check_terminators, {cursor, Terminators}}, Acc) -> + Acc#elixir_tokenizer{cursor_completion=prune_and_cursor, terminators=Terminators}; ({existing_atoms_only, ExistingAtomsOnly}, Acc) when is_boolean(ExistingAtomsOnly) -> Acc#elixir_tokenizer{existing_atoms_only=ExistingAtomsOnly}; ({static_atoms_encoder, StaticAtomsEncoder}, Acc) when is_function(StaticAtomsEncoder) -> @@ -138,11 +138,10 @@ tokenize(String, Line, Opts) -> tokenize([], Line, Column, #elixir_tokenizer{cursor_completion=Cursor} = Scope, Tokens) when Cursor /= false -> #elixir_tokenizer{ascii_identifiers_only=Ascii, terminators=Terminators, warnings=Warnings} = Scope, - {CursorColumn, CursorTerminators, AccTokens} = + {CursorColumn, AccTerminators, AccTokens} = add_cursor(Line, Column, Cursor, Terminators, Tokens), AllWarnings = maybe_unicode_lint_warnings(Ascii, Tokens, Warnings), - {AccTerminators, _AccColumn} = cursor_complete(Line, CursorColumn, CursorTerminators), {ok, Line, CursorColumn, AllWarnings, AccTokens, AccTerminators}; tokenize([], EndLine, EndColumn, #elixir_tokenizer{terminators=[{Start, {StartLine, StartColumn, _}, _} | _]} = Scope, Tokens) -> @@ -1747,16 +1746,6 @@ error(Reason, Rest, #elixir_tokenizer{warnings=Warnings}, Tokens) -> %% Cursor handling -cursor_complete(Line, Column, Terminators) -> - lists:mapfoldl( - fun({Start, _, _}, AccColumn) -> - End = terminator(Start), - {{End, {Line, AccColumn, nil}}, AccColumn + length(erlang:atom_to_list(End))} - end, - Column, - Terminators - ). - add_cursor(_Line, Column, noprune, Terminators, Tokens) -> {Column, Terminators, Tokens}; add_cursor(Line, Column, prune_and_cursor, Terminators, Tokens) -> diff --git a/lib/elixir/test/elixir/code_fragment_test.exs b/lib/elixir/test/elixir/code_fragment_test.exs index 3ceb4765395..a36f498e05f 100644 --- a/lib/elixir/test/elixir/code_fragment_test.exs +++ b/lib/elixir/test/elixir/code_fragment_test.exs @@ -1228,7 +1228,9 @@ defmodule CodeFragmentTest do assert cc2q!("foo(bar do baz ") == s2q!("foo(bar do baz(__cursor__()) end)") assert cc2q!("foo(bar do baz(") == s2q!("foo(bar do baz(__cursor__()) end)") assert cc2q!("foo(bar do baz bat,") == s2q!("foo(bar do baz(bat, __cursor__()) end)") - assert cc2q!("foo(bar do baz, bat") == s2q!("foo(bar do baz, __cursor__() -> nil end)") + + assert cc2q!("foo(bar do baz, bat", trailing_fragment: " -> :ok end") == + s2q!("foo(bar do baz, __cursor__() -> :ok end)") end test "keyword lists" do @@ -1283,6 +1285,48 @@ defmodule CodeFragmentTest do assert cc2q!("<>") end + test "anonymous functions" do + assert cc2q!("(fn", trailing_fragment: "-> end)") == s2q!("(fn __cursor__() -> nil end)") + + assert cc2q!("(fn", trailing_fragment: "-> 1 + 2 end)") == + s2q!("(fn __cursor__() -> 1 + 2 end)") + + assert cc2q!("(fn x", trailing_fragment: "-> :ok end)") == + s2q!("(fn __cursor__() -> :ok end)") + + assert cc2q!("(fn x", trailing_fragment: ", y -> :ok end)") == + s2q!("(fn __cursor__(), y -> :ok end)") + + assert cc2q!("(fn x,", trailing_fragment: "y -> :ok end)") == + s2q!("(fn x, __cursor__() -> :ok end)") + + assert cc2q!("(fn x,", trailing_fragment: "\ny -> :ok end)") == + s2q!("(fn x, __cursor__()\n -> :ok end)") + + assert cc2q!("(fn x, {", trailing_fragment: "y, z} -> :ok end)") == + s2q!("(fn x, {__cursor__(), z} -> :ok end)") + + assert cc2q!("(fn x, {y", trailing_fragment: ", z} -> :ok end)") == + s2q!("(fn x, {__cursor__(), z} -> :ok end)") + + assert cc2q!("(fn x, {y, ", trailing_fragment: "z} -> :ok end)") == + s2q!("(fn x, {y, __cursor__()} -> :ok end)") + + assert cc2q!("(fn x ->", trailing_fragment: ":ok end)") == + s2q!("(fn x -> __cursor__() end)") + + assert cc2q!("(fn x ->", trailing_fragment: ":ok end)") == + s2q!("(fn x -> __cursor__() end)") + + assert cc2q!("(fn") == s2q!("(__cursor__())") + assert cc2q!("(fn x") == s2q!("(__cursor__())") + assert cc2q!("(fn x,") == s2q!("(__cursor__())") + assert cc2q!("(fn x ->") == s2q!("(fn x -> __cursor__() end)") + assert cc2q!("(fn x -> x") == s2q!("(fn x -> __cursor__() end)") + assert cc2q!("(fn x, y -> x + y") == s2q!("(fn x, y -> x + __cursor__() end)") + assert cc2q!("(fn x, y -> x + y end") == s2q!("(__cursor__())") + end + test "removes tokens until opening" do assert cc2q!("(123") == s2q!("(__cursor__())") assert cc2q!("[foo") == s2q!("[__cursor__()]") @@ -1299,15 +1343,6 @@ defmodule CodeFragmentTest do assert cc2q!("foo bar, :atom") == s2q!("foo(bar, __cursor__())") end - test "removes anonymous functions" do - assert cc2q!("(fn") == s2q!("(fn __cursor__() -> nil end)") - assert cc2q!("(fn x") == s2q!("(fn __cursor__() -> nil end)") - assert cc2q!("(fn x ->") == s2q!("(fn x -> __cursor__() end)") - assert cc2q!("(fn x -> x") == s2q!("(fn x -> __cursor__() end)") - assert cc2q!("(fn x, y -> x + y") == s2q!("(fn x, y -> x + __cursor__() end)") - assert cc2q!("(fn x, y -> x + y end") == s2q!("(__cursor__())") - end - test "removes closed terminators" do assert cc2q!("foo([1, 2, 3]") == s2q!("foo(__cursor__())") assert cc2q!("foo({1, 2, 3}") == s2q!("foo(__cursor__())")