Skip to content

Commit

Permalink
Always use optimal encoding function (#512)
Browse files Browse the repository at this point in the history
  • Loading branch information
ypconstante authored Dec 28, 2023
1 parent deb5807 commit 539d76a
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 66 deletions.
54 changes: 45 additions & 9 deletions lib/floki/entities.ex
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,51 @@ defmodule Floki.Entities do
* greater-than sign - > - is replaced by ">".
All other simbols are going to remain the same.
Optimized IO data implementation from Plug.HTML
"""
@spec encode(String.t()) :: String.t()
def encode(string) when is_binary(string) do
String.replace(string, ["'", "\"", "&", "<", ">"], fn
"'" -> "&#39;"
"\"" -> "&quot;"
"&" -> "&amp;"
"<" -> "&lt;"
">" -> "&gt;"
end)
@spec encode(iodata()) :: iodata()
def encode(string) when is_binary(string), do: encode(string, 0, string, [])
def encode(data), do: encode(IO.iodata_to_binary(data))

escapes = [
{?<, "&lt;"},
{?>, "&gt;"},
{?&, "&amp;"},
{?", "&quot;"},
{?', "&#39;"}
]

for {match, insert} <- escapes do
defp encode(<<unquote(match), rest::bits>>, skip, original, acc) do
encode(rest, skip + 1, original, [acc | unquote(insert)])
end
end

defp encode(<<_char, rest::bits>>, skip, original, acc) do
encode(rest, skip, original, acc, 1)
end

defp encode(<<>>, _skip, _original, acc) do
acc
end

for {match, insert} <- escapes do
defp encode(<<unquote(match), rest::bits>>, skip, original, acc, len) do
part = binary_part(original, skip, len)
encode(rest, skip + len + 1, original, [acc, part | unquote(insert)])
end
end

defp encode(<<_char, rest::bits>>, skip, original, acc, len) do
encode(rest, skip, original, acc, len + 1)
end

defp encode(<<>>, 0, original, _acc, _len) do
original
end

defp encode(<<>>, skip, original, acc, len) do
[acc | binary_part(original, skip, len)]
end
end
53 changes: 1 addition & 52 deletions lib/floki/raw_html.ex
Original file line number Diff line number Diff line change
Expand Up @@ -131,11 +131,7 @@ defmodule Floki.RawHTML do
end

defp build_attrs({attr, value}, encoder) do
if encoder == @encoder do
[attr, "=\"", html_escape(value) | "\""]
else
[attr, "=\"", value | "\""]
end
[attr, "=\"", encoder.(value) | "\""]
end

defp build_attrs(attr, _encoder), do: attr
Expand Down Expand Up @@ -164,53 +160,6 @@ defmodule Floki.RawHTML do
end
end

# html_escape
# Optimized IO data implementation from Plug.HTML

defp html_escape(data) when is_binary(data), do: html_escape(data, 0, data, [])
defp html_escape(data), do: html_escape(IO.iodata_to_binary(data))

escapes = [
{?<, "&lt;"},
{?>, "&gt;"},
{?&, "&amp;"},
{?", "&quot;"},
{?', "&#39;"}
]

for {match, insert} <- escapes do
defp html_escape(<<unquote(match), rest::bits>>, skip, original, acc) do
html_escape(rest, skip + 1, original, [acc | unquote(insert)])
end
end

defp html_escape(<<_char, rest::bits>>, skip, original, acc) do
html_escape(rest, skip, original, acc, 1)
end

defp html_escape(<<>>, _skip, _original, acc) do
acc
end

for {match, insert} <- escapes do
defp html_escape(<<unquote(match), rest::bits>>, skip, original, acc, len) do
part = binary_part(original, skip, len)
html_escape(rest, skip + len + 1, original, [acc, part | unquote(insert)])
end
end

defp html_escape(<<_char, rest::bits>>, skip, original, acc, len) do
html_escape(rest, skip, original, acc, len + 1)
end

defp html_escape(<<>>, 0, original, _acc, _len) do
original
end

defp html_escape(<<>>, skip, original, acc, len) do
[acc | binary_part(original, skip, len)]
end

# helpers

# TODO: Use Enum.map_intersperse/3 when we require Elixir v1.10+
Expand Down
10 changes: 5 additions & 5 deletions test/floki/entities_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,23 @@ defmodule Floki.EntitiesTest do

describe "encode/1" do
test "encode single-quote" do
assert Entities.encode("'") == "&#39;"
assert IO.iodata_to_binary(Entities.encode("'")) == "&#39;"
end

test "encode double-quote" do
assert Entities.encode("\"") == "&quot;"
assert IO.iodata_to_binary(Entities.encode("\"")) == "&quot;"
end

test "ampersand" do
assert Entities.encode("&") == "&amp;"
assert IO.iodata_to_binary(Entities.encode("&")) == "&amp;"
end

test "encode less-than sign" do
assert Entities.encode("<") == "&lt;"
assert IO.iodata_to_binary(Entities.encode("<")) == "&lt;"
end

test "encode greater-than sign" do
assert Entities.encode(">") == "&gt;"
assert IO.iodata_to_binary(Entities.encode(">")) == "&gt;"
end

test "does not encode others" do
Expand Down

0 comments on commit 539d76a

Please sign in to comment.