Skip to content

Commit

Permalink
Correct error handling (#86)
Browse files Browse the repository at this point in the history
From github/tank-bohr

* Introduces custom exceptions

New API is

- SweetXml.stream_tags!/2,3
- SweetXml.stream!/2

* Fixes CR comments
  • Loading branch information
tank-bohr authored Nov 19, 2021
1 parent 66cb02c commit 86d039d
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 11 deletions.
101 changes: 100 additions & 1 deletion lib/sweet_xml.ex
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,20 @@ defmodule SweetXpath do
namespaces: []
end

defmodule SweetXml.XmerlFatal do
defexception [:message, :reason, :file, :line, :col]

@impl Exception
def exception({reason, {:file, file}, {:line, line}, {:col ,col}}) do
%__MODULE__{reason: reason, file: file, line: line, col: col, message: inspect(reason)}
end
end

defmodule SweetXml.DTDError do
defexception [:message]
end


defmodule SweetXml do
@moduledoc ~S"""
`SweetXml` is a thin wrapper around `:xmerl`. It allows you to convert a
Expand Down Expand Up @@ -143,7 +157,6 @@ defmodule SweetXml do
@type spec :: %SweetXpath{}
@type xmlElement :: record(:xmlElement)


@doc ~s"""
`sigil_x/2` simply returns a `%SweetXpath{}` struct, with modifiers converted to
boolean fields:
Expand Down Expand Up @@ -359,6 +372,40 @@ defmodule SweetXml do
end)
end

def stream_tags!(doc, tags, options \\ []) do
tags = if is_atom(tags), do: [tags], else: tags

{discard_tags, xmerl_options} = case :proplists.lookup(:discard, options) do
{:discard, tags} -> {tags, :proplists.delete(:discard, options)}
:none -> {[], options}
end

doc |> stream!(fn emit ->
[
hook_fun: fn
entity, xstate when Record.is_record(entity, :xmlElement) ->
name = xmlElement(entity, :name)
if length(tags) == 0 or name in tags do
emit.({name, entity})
end
{entity, xstate}
entity, xstate ->
{entity, xstate}
end,
acc_fun: fn
entity, acc, xstate when Record.is_record(entity, :xmlElement) ->
if xmlElement(entity, :name) in discard_tags do
{acc, xstate}
else
{[entity | acc], xstate}
end
entity, acc, xstate ->
{[entity | acc], xstate}
end
] ++ xmerl_options
end)
end

@doc """
Create an element stream from a XML `doc`.
Expand Down Expand Up @@ -428,6 +475,58 @@ defmodule SweetXml do
end
end

def stream!(doc, options_callback) when is_binary(doc) do
stream!([doc], options_callback)
end
def stream!([c | _] = doc, options_callback) when is_integer(c) do
stream([IO.iodata_to_binary(doc)], options_callback)
end
def stream!(doc, options_callback) do
Stream.resource fn ->
{parent, ref} = waiter = {self(), make_ref()}
opts = options_callback.(fn e -> send(parent, {:event, ref, e}) end)

ets = :ets.new(nil, [:public])
dtd_arg = :proplists.get_value(:dtd, opts, :all)
opts = :proplists.delete(:dtd, opts)
opts = SweetXml.Options.handle_dtd(dtd_arg, SweetXml.DTDError).(ets) ++ opts

{pid, monref} = spawn_monitor fn -> :xmerl_scan.string('', opts ++ continuation_opts(doc, waiter)) end
{ref, pid, monref, ets}
end, fn {ref, pid, monref, ets} = acc ->
receive do
{:DOWN, ^monref, :process, ^pid, :normal} ->
{:halt, {:parse_ended, ets}}
{:DOWN, ^monref, :process, ^pid, {:fatal, error}} ->
{:halt, {:fatal, error, ets}}
{:DOWN, ^monref, :process, ^pid, error} ->
{:halt, {:error, error, ets}}
{:event, ^ref, event} ->
{[event], acc}
{:wait, ^ref} ->
send(pid, {:continue, ref})
{[], acc}
end
end, fn
{:parse_ended, ets} ->
_ = :ets.delete(ets)
:ok

{:fatal, error, ets} ->
_ = :ets.delete(ets)
raise SweetXml.XmerlFatal, error

{:error, {exception, stacktrace}, ets} ->
_ = :ets.delete(ets)
reraise(exception, stacktrace)

{ref, pid, monref, ets} ->
Process.demonitor(monref)
_ = :ets.delete(ets)
flush_halt(pid, ref)
end
end

@doc ~S"""
`xpath` allows you to query an XML document with XPath.
Expand Down
30 changes: 20 additions & 10 deletions lib/sweet_xml/options.ex
Original file line number Diff line number Diff line change
@@ -1,23 +1,32 @@
defmodule SweetXml.Options do
@moduledoc false

def handle_dtd(:all) do
def handle_dtd(dtd_option, exception_module \\ RuntimeError)

def handle_dtd(:all, _exception_module) do
fn _ -> [] end
end
def handle_dtd(:none) do
def handle_dtd(:none, exception_module) do
fn ets ->
handle_dtd(:internal_only).(ets) ++ handle_dtd(only: []).(ets)
handle_dtd(:internal_only, exception_module).(ets) ++ handle_dtd([only: []], exception_module).(ets)
end
end
def handle_dtd(:internal_only) do
fn _ ->
[fetch_fun: fn _, _ -> {:error, "no external entity allowed"} end]
def handle_dtd(:internal_only, exception_module) do
case exception_module do
SweetXml.DTDError ->
fn _ ->
[fetch_fun: fn _, _ -> raise SweetXml.DTDError, message: "no external entity allowed" end]
end
_ ->
fn _ ->
[fetch_fun: fn _, _ -> {:error, "no external entity allowed"} end]
end
end
end
def handle_dtd(only: entity) when is_atom(entity) do
handle_dtd(only: [entity])
def handle_dtd([only: entity], exception_module) when is_atom(entity) do
handle_dtd([only: [entity]], exception_module)
end
def handle_dtd(only: entities) when is_list(entities) do
def handle_dtd([only: entities], exception_module) when is_list(entities) do
fn ets ->
read = fn
context, name, state ->
Expand All @@ -37,7 +46,8 @@ defmodule SweetXml.Options do
[] -> :ets.insert(ets, {{context, name}, value})
_ -> :ok
end
false -> raise("DTD not allowed: #{name}")
false ->
raise exception_module, message: "DTD not allowed: #{name}"
end
state

Expand Down
44 changes: 44 additions & 0 deletions test/files/invalid.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
<?xml version="1.05" encoding="UTF-8"?>
<game>
<matchups>
<matchup winner-id="1">
<name>Match One</name>
<teams>
<team>
<id>1</id>
<name>Team One</name>
</team>
<team>
<id>2</id>
<name>Team Two</name>
</team>
</teams>
</matchup>
<matchup winner-id="2">
<name>Match Two</name>
<teams>
<team>
<id>2</id>
<name>Team Two</name>
</team>
<team>
<id>3</id>
<name>Team Three</name>
</team>
</teams>
</matchup>
<matchup winner-id="1">
<name>Match Three</name>
<teams>
<team>
<id>1</id>
<name>Team One</name>
</team>
<team>
<id>3</id>
<name>Team & Three</name>
</team>
</teams>
</matchup>
</matchups>
</game>
46 changes: 46 additions & 0 deletions test/sweet_xml_stream_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,50 @@ defmodule SweetXmlStreamTest do
assert result == ['Nested Head', 'XML Parsing']
end

describe "stream_tags!/2" do
test "streaming tags", %{simple_stream: simple_stream} do
result =
simple_stream
|> stream_tags([:li, :special_match_key], discard: [:li, :special_match_key])
|> Stream.map(fn {_, doc} -> xpath(doc, ~x"./text()") end)
|> Enum.to_list

assert result == ['\n First', 'Second\n ', 'Third', 'Forth', 'first star']

result =
simple_stream
|> stream_tags(:head)
|> Stream.map(fn {_, doc} -> xpath(doc, ~x"./title/text()") end)
|> Enum.to_list

assert result == ['Nested Head', 'XML Parsing']
end

test "invalid xml" do
assert_raise SweetXml.XmerlFatal, ":error_scanning_entity_ref", fn ->
"test/files/invalid.xml"
|> File.stream!()
|> SweetXml.stream_tags!(:matchup, quiet: true)
|> Stream.run()
end
end

test "DTD error" do
assert_raise SweetXml.DTDError, "DTD not allowed: lol1", fn ->
"test/files/billion_laugh.xml"
|> File.stream!()
|> SweetXml.stream_tags!(:banana, dtd: :none, quiet: true)
|> Stream.run()
end
end

test "internal only" do
assert_raise SweetXml.DTDError, "no external entity allowed", fn ->
"test/files/xxe.xml"
|> File.stream!()
|> SweetXml.stream_tags!(:result, dtd: :internal_only)
|> Stream.run
end
end
end
end

0 comments on commit 86d039d

Please sign in to comment.