Skip to content

Commit

Permalink
Optimize inspection of binaries
Browse files Browse the repository at this point in the history
We have used this code:

    data = Base.encode64(:crypto.strong_rand_bytes(2_000_000))

    Mix.Tasks.Profile.Tprof.profile(
      fn -> inspect(data, printable_limit: :infinity) end,
      type: :memory
    )

The first result reported this:

    Profile results of #PID<0.106.0>
    #                                    CALLS      %      WORDS PER CALL
    Total                             10666685 100.00 9268308211   868.90
    :code.ensure_loaded/1                    1   0.00          3     3.00
    Inspect.BitString.inspect/2              1   0.00          4     4.00
    Inspect.Algebra.group/2                  1   0.00          4     4.00
    Inspect.Algebra.format/2                 1   0.00          6     6.00
    Inspect.Algebra.format/3                 3   0.00          8     2.67
    :erlang.iolist_to_binary/1               2   0.00         11     5.50
    anonymous fn/2 in Kernel.struct/2        1   0.00         17    17.00
    Inspect.Opts.new/1                       1   0.00         17    17.00
    Code.Identifier.escape/5           2666669   0.06    5333344     2.00
    String.recur_printable?/2          2666669   0.29   26666778    10.00
    Code.Identifier.escape_map/1       2666668  28.72 2662035405   998.26
    Code.Identifier.escape_char/1      2666668  70.93 6574272614  2465.35

As you can see, most of the allocations were in escape_char,
which makes sense: it will be invoked a lot for raw binaries
and it allocated lists, instead of relying on binary optimizations.

We reimplemented Code.Identifier.escape/5 to rely on binary
optimizations which then yielded this:

    Profile results of #PID<0.112.0>
    #                                   CALLS      %    WORDS PER CALL
    Total                             5333350 100.00 26666864     5.00
    :code.ensure_loaded/1                   1   0.00        3     3.00
    Inspect.BitString.inspect/2             1   0.00        4     4.00
    Inspect.Algebra.group/2                 1   0.00        4     4.00
    Inspect.Algebra.format/2                1   0.00        6     6.00
    Code.Identifier.escape/4                1   0.00        8     8.00
    Code.Identifier.escape/5          2666669   0.00        8     0.00
    Inspect.Algebra.format/3                3   0.00        8     2.67
    :erlang.iolist_to_binary/1              2   0.00       11     5.50
    anonymous fn/2 in Kernel.struct/2       1   0.00       17    17.00
    Inspect.Opts.new/1                      1   0.00       17    17.00
    String.recur_printable?/2         2666669 100.00 26666778    10.00

Now the allocations are all on `recur_printable?/2`. By running this
command, we can find out why it is allocating so much:

    ERL_COMPILER_OPTIONS=bin_opt_info elixir lib/elixir/lib/string.ex

It happens that, due to a catch all, new binary contexts had to be
allocated. By addressing that, we now get:

    Profile results of #PID<0.119.0>
    #                                   CALLS      % WORDS PER CALL
    Total                             5333350 100.00    91     0.00
    :code.ensure_loaded/1                   1   3.30     3     3.00
    Inspect.BitString.inspect/2             1   4.40     4     4.00
    Inspect.Algebra.group/2                 1   4.40     4     4.00
    String.recur_printable?/2         2666669   5.49     5     0.00
    Inspect.Algebra.format/2                1   6.59     6     6.00
    Code.Identifier.escape/4                1   8.79     8     8.00
    Code.Identifier.escape/5          2666669   8.79     8     0.00
    Inspect.Algebra.format/3                3   8.79     8     2.67
    :erlang.iolist_to_binary/1              2  12.09    11     5.50
    anonymous fn/2 in Kernel.struct/2       1  18.68    17    17.00
    Inspect.Opts.new/1                      1  18.68    17    17.00

Which is several orders of magnitude fewer words. We basically
allocate the new escaped binary and a few extra bytes.

Closes #14029.
  • Loading branch information
josevalim committed Dec 3, 2024
1 parent eadbd8b commit 768fa49
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 30 deletions.
64 changes: 35 additions & 29 deletions lib/elixir/lib/code/identifier.ex
Original file line number Diff line number Diff line change
Expand Up @@ -71,41 +71,45 @@ defmodule Code.Identifier do
Escapes the given identifier.
"""
@spec escape(binary(), char() | nil, :infinity | non_neg_integer, (char() -> iolist() | false)) ::
{escaped :: iolist(), remaining :: binary()}
{escaped :: binary(), remaining :: binary()}
def escape(binary, char, limit \\ :infinity, fun \\ &escape_map/1)
when ((char in 0..0x10FFFF or is_nil(char)) and limit == :infinity) or
when (is_binary(binary) and ((char in 0..0x10FFFF or is_nil(char)) and limit == :infinity)) or
(is_integer(limit) and limit >= 0) do
escape(binary, char, limit, [], fun)
escape(binary, char, limit, <<>>, fun)
end

defp escape(<<_, _::binary>> = binary, _char, 0, acc, _fun) do
{acc, binary}
end

defp escape(<<char, t::binary>>, char, count, acc, fun) do
escape(t, char, decrement(count), [acc | [?\\, char]], fun)
escape(t, char, decrement(count), <<acc::binary, ?\\, char>>, fun)
end

defp escape(<<?#, ?{, t::binary>>, char, count, acc, fun) do
escape(t, char, decrement(count), [acc | [?\\, ?#, ?{]], fun)
escape(t, char, decrement(count), <<acc::binary, ?\\, ?#, ?{>>, fun)
end

defp escape(<<h::utf8, t::binary>>, char, count, acc, fun) do
escaped = if value = fun.(h), do: value, else: escape_char(h)
escape(t, char, decrement(count), [acc | escaped], fun)
if value = fun.(h) do
value = IO.iodata_to_binary(value)
escape(t, char, decrement(count), <<acc::binary, value::binary>>, fun)
else
escape(t, char, decrement(count), escape_char(h, acc), fun)
end
end

defp escape(<<a::4, b::4, t::binary>>, char, count, acc, fun) do
escape(t, char, decrement(count), [acc | [?\\, ?x, to_hex(a), to_hex(b)]], fun)
escape(t, char, decrement(count), <<acc::binary, ?\\, ?x, to_hex(a), to_hex(b)>>, fun)
end

defp escape(<<>>, _char, _count, acc, _fun) do
{acc, <<>>}
end

defp escape_char(0), do: [?\\, ?0]
defp escape_char(0, acc), do: <<acc::binary, ?\\, ?0>>

defp escape_char(char)
defp escape_char(char, acc)
# Some characters that are confusing (zero-width / alternative spaces) are displayed
# using their unicode representation:
# https://en.wikipedia.org/wiki/Universal_Character_Set_characters#Special-purpose_characters
Expand All @@ -131,42 +135,44 @@ defmodule Code.Identifier do
when char in 0x2000..0x200A
when char == 0x205F do
<<a::4, b::4, c::4, d::4>> = <<char::16>>
[?\\, ?u, to_hex(a), to_hex(b), to_hex(c), to_hex(d)]
<<acc::binary, ?\\, ?u, to_hex(a), to_hex(b), to_hex(c), to_hex(d)>>
end

defp escape_char(char)
defp escape_char(char, acc)
when char in 0x20..0x7E
when char in 0xA0..0xD7FF
when char in 0xE000..0xFFFD
when char in 0x10000..0x10FFFF do
<<char::utf8>>
<<acc::binary, char::utf8>>
end

defp escape_char(char) when char < 0x100 do
defp escape_char(char, acc) when char < 0x100 do
<<a::4, b::4>> = <<char::8>>
[?\\, ?x, to_hex(a), to_hex(b)]
<<acc::binary, ?\\, ?x, to_hex(a), to_hex(b)>>
end

defp escape_char(char) when char < 0x10000 do
defp escape_char(char, acc) when char < 0x10000 do
<<a::4, b::4, c::4, d::4>> = <<char::16>>
[?\\, ?x, ?{, to_hex(a), to_hex(b), to_hex(c), to_hex(d), ?}]
<<acc::binary, ?\\, ?x, ?{, to_hex(a), to_hex(b), to_hex(c), to_hex(d), ?}>>
end

defp escape_char(char) when char < 0x1000000 do
defp escape_char(char, acc) when char < 0x1000000 do
<<a::4, b::4, c::4, d::4, e::4, f::4>> = <<char::24>>
[?\\, ?x, ?{, to_hex(a), to_hex(b), to_hex(c), to_hex(d), to_hex(e), to_hex(f), ?}]

<<acc::binary, ?\\, ?x, ?{, to_hex(a), to_hex(b), to_hex(c), to_hex(d), to_hex(e), to_hex(f),
?}>>
end

defp escape_map(?\a), do: [?\\, ?a]
defp escape_map(?\b), do: [?\\, ?b]
defp escape_map(?\d), do: [?\\, ?d]
defp escape_map(?\e), do: [?\\, ?e]
defp escape_map(?\f), do: [?\\, ?f]
defp escape_map(?\n), do: [?\\, ?n]
defp escape_map(?\r), do: [?\\, ?r]
defp escape_map(?\t), do: [?\\, ?t]
defp escape_map(?\v), do: [?\\, ?v]
defp escape_map(?\\), do: [?\\, ?\\]
defp escape_map(?\a), do: "\\a"
defp escape_map(?\b), do: "\\b"
defp escape_map(?\d), do: "\\d"
defp escape_map(?\e), do: "\\e"
defp escape_map(?\f), do: "\\f"
defp escape_map(?\n), do: "\\n"
defp escape_map(?\r), do: "\\r"
defp escape_map(?\t), do: "\\t"
defp escape_map(?\v), do: "\\v"
defp escape_map(?\\), do: "\\\\"
defp escape_map(_), do: false

@compile {:inline, to_hex: 1, decrement: 1}
Expand Down
2 changes: 1 addition & 1 deletion lib/elixir/lib/string.ex
Original file line number Diff line number Diff line change
Expand Up @@ -326,7 +326,7 @@ defmodule String do
recur_printable?(string, character_limit)
end

defp recur_printable?(_string, 0), do: true
defp recur_printable?(<<_::binary>>, 0), do: true
defp recur_printable?(<<>>, _character_limit), do: true

for char <- 0x20..0x7E do
Expand Down

0 comments on commit 768fa49

Please sign in to comment.