diff --git a/lib/saxy/parser/builder.ex b/lib/saxy/parser/builder.ex index b11ad28..05ca814 100644 --- a/lib/saxy/parser/builder.ex +++ b/lib/saxy/parser/builder.ex @@ -889,12 +889,9 @@ defmodule Saxy.Parser.Builder do token in unquote(edge_ngrams("]]")) when more? -> halt!(element_cdata(token, more?, original, pos, state, len)) - char <> rest when is_ascii(char) -> + char <> rest -> element_cdata(rest, more?, original, pos, state, len + 1) - <> <> rest -> - element_cdata(rest, more?, original, pos, state, len + Utils.compute_char_len(codepoint)) - _ -> Utils.parse_error(original, pos + len, state, {:token, :"]]"}) end diff --git a/test/saxy/parser/element_test.exs b/test/saxy/parser/element_test.exs index af6835a..acac93a 100644 --- a/test/saxy/parser/element_test.exs +++ b/test/saxy/parser/element_test.exs @@ -178,6 +178,11 @@ defmodule Saxy.Parser.ElementTest do assert Exception.message(error) == "unexpected end of input, expected token: :\"]]\"" end + test "handles CDATA with UTF-8 encoded £ symbol" do + events = assert_parse("") + assert find_events(events, :characters) == [{:characters, "£26,000"}] + end + test "parses processing instruction" do events = assert_parse("") assert length(events) == 2