Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core: Update to latest quick-xml #10471

Merged
merged 1 commit into from
Mar 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ bitflags = "2.0.2"
smallvec = { version = "1.10.0", features = ["union"] }
num-traits = "0.2"
num-derive = "0.3"
quick-xml = { git = "https://github.com/ruffle-rs/quick-xml", rev = "8496365ec1412eb5ba5de350937b6bce352fa0ba" }
quick-xml = "0.28.1"
downcast-rs = "1.2.0"
url = "2.3.1"
weak-table = "0.3.2"
Expand Down Expand Up @@ -59,7 +59,7 @@ version = "0.4.34"
[features]
default = []
lzma = ["lzma-rs", "swf/lzma"]
wasm-bindgen = [ "instant/wasm-bindgen" ]
wasm-bindgen = ["instant/wasm-bindgen"]
avm_debug = []
deterministic = []
timeline_debug = []
Expand Down
69 changes: 45 additions & 24 deletions core/src/avm1/object/xml_object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@ use crate::avm1::error::Error;
use crate::avm1::{Object, ScriptObject, TObject};
use crate::impl_custom_object;
use crate::string::{AvmString, WStr, WString};
use crate::xml::{XmlNode, ELEMENT_NODE, TEXT_NODE};
use crate::xml::{custom_unescape, XmlNode, ELEMENT_NODE, TEXT_NODE};
use gc_arena::{Collect, GcCell, MutationContext};
use quick_xml::events::attributes::AttrError;
use quick_xml::{events::Event, Reader};
use std::fmt;

Expand Down Expand Up @@ -123,21 +124,23 @@ impl<'gc> XmlObject<'gc> {
) -> Result<(), quick_xml::Error> {
let data_utf8 = data.to_utf8_lossy();
let mut parser = Reader::from_str(&data_utf8);
let mut buf = Vec::new();
let mut open_tags = vec![self.as_node()];

self.0.write(activation.context.gc_context).status = XmlStatus::NoError;

loop {
let event = parser.read_event(&mut buf).map_err(|error| {
let event = parser.read_event().map_err(|error| {
self.0.write(activation.context.gc_context).status = match error {
quick_xml::Error::UnexpectedEof(_)
| quick_xml::Error::NameWithQuote(_)
| quick_xml::Error::NoEqAfterName(_)
| quick_xml::Error::DuplicatedAttribute(_, _) => XmlStatus::ElementMalformed,
| quick_xml::Error::InvalidAttr(AttrError::ExpectedEq(_))
| quick_xml::Error::InvalidAttr(AttrError::Duplicated(_, _)) => {
XmlStatus::ElementMalformed
}
quick_xml::Error::EndEventMismatch { .. } => XmlStatus::MismatchedEnd,
quick_xml::Error::XmlDeclWithoutVersion(_) => XmlStatus::DeclNotTerminated,
quick_xml::Error::UnquotedValue(_) => XmlStatus::AttributeNotTerminated,
quick_xml::Error::InvalidAttr(AttrError::UnquotedValue(_)) => {
XmlStatus::AttributeNotTerminated
}
_ => XmlStatus::OutOfMemory,
// Not accounted for:
// quick_xml::Error::UnexpectedToken(_)
Expand All @@ -150,15 +153,17 @@ impl<'gc> XmlObject<'gc> {

match event {
Event::Start(bs) => {
let child = XmlNode::from_start_event(activation, bs, self.id_map())?;
let child =
XmlNode::from_start_event(activation, bs, self.id_map(), parser.decoder())?;
open_tags
.last_mut()
.unwrap()
.append_child(activation.context.gc_context, child);
open_tags.push(child);
}
Event::Empty(bs) => {
let child = XmlNode::from_start_event(activation, bs, self.id_map())?;
let child =
XmlNode::from_start_event(activation, bs, self.id_map(), parser.decoder())?;
open_tags
.last_mut()
.unwrap()
Expand All @@ -167,19 +172,17 @@ impl<'gc> XmlObject<'gc> {
Event::End(_) => {
open_tags.pop();
}
Event::Text(bt) | Event::CData(bt) => {
let text = bt.unescaped()?;
let is_whitespace_char = |c: &u8| matches!(*c, b'\t' | b'\n' | b'\r' | b' ');
let is_whitespace_text = text.iter().all(is_whitespace_char);
if !(text.is_empty() || ignore_white && is_whitespace_text) {
let text = AvmString::new_utf8_bytes(activation.context.gc_context, &text);
let child =
XmlNode::new(activation.context.gc_context, TEXT_NODE, Some(text));
open_tags
.last_mut()
.unwrap()
.append_child(activation.context.gc_context, child);
}
Event::Text(bt) => {
handle_text_cdata(
custom_unescape(&bt.into_inner(), parser.decoder())?.as_bytes(),
ignore_white,
&mut open_tags,
activation,
);
}
Event::CData(bt) => {
// This is already unescaped
handle_text_cdata(&bt.into_inner(), ignore_white, &mut open_tags, activation);
}
Event::Decl(bd) => {
let mut xml_decl = WString::from_buf(b"<?".to_vec());
Expand All @@ -193,8 +196,8 @@ impl<'gc> XmlObject<'gc> {
// but it doesn't expose the whole tag, only the inner portion of it.
// Flash is also case-insensitive for DOCTYPE declarations. However,
// the `.docTypeDecl` property preserves the original case.
let mut doctype = WString::from_buf(b"<!DOCTYPE".to_vec());
doctype.push_str(WStr::from_units(bt.escaped()));
let mut doctype = WString::from_buf(b"<!DOCTYPE ".to_vec());
doctype.push_str(WStr::from_units(&*bt.escape_ascii().collect::<Vec<_>>()));
doctype.push_byte(b'>');
self.0.write(activation.context.gc_context).doctype =
Some(AvmString::new(activation.context.gc_context, doctype));
Expand Down Expand Up @@ -246,3 +249,21 @@ impl<'gc> TObject<'gc> for XmlObject<'gc> {
Some(self.as_node())
}
}

fn handle_text_cdata<'gc>(
text: &[u8],
ignore_white: bool,
open_tags: &mut [XmlNode<'gc>],
activation: &mut Activation<'_, 'gc>,
) {
let is_whitespace_char = |c: &u8| matches!(*c, b'\t' | b'\n' | b'\r' | b' ');
let is_whitespace_text = text.iter().all(is_whitespace_char);
if !(text.is_empty() || ignore_white && is_whitespace_text) {
let text = AvmString::new_utf8_bytes(activation.context.gc_context, text);
let child = XmlNode::new(activation.context.gc_context, TEXT_NODE, Some(text));
open_tags
.last_mut()
.unwrap()
.append_child(activation.context.gc_context, child);
}
}
99 changes: 65 additions & 34 deletions core/src/avm2/e4x.rs
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,6 @@ impl<'gc> E4XNode<'gc> {

let data_utf8 = string.to_utf8_lossy();
let mut parser = Reader::from_str(&data_utf8);
let mut buf = Vec::new();
let mut open_tags: Vec<E4XNode<'gc>> = vec![];

// FIXME - look these up from static property and settings
Expand Down Expand Up @@ -264,8 +263,38 @@ impl<'gc> E4XNode<'gc> {
Ok(())
}

fn handle_text_cdata<'gc>(
text: &[u8],
ignore_white: bool,
open_tags: &mut [E4XNode<'gc>],
top_level: &mut Vec<E4XNode<'gc>>,
depth: usize,
is_text: bool,
activation: &mut Activation<'_, 'gc>,
) -> Result<(), Error<'gc>> {
let is_whitespace_char = |c: &u8| matches!(*c, b'\t' | b'\n' | b'\r' | b' ');
let is_whitespace_text = text.iter().all(is_whitespace_char);
if !(text.is_empty() || ignore_white && is_whitespace_text) {
let text = AvmString::new_utf8_bytes(activation.context.gc_context, text);
let node = E4XNode(GcCell::allocate(
activation.context.gc_context,
E4XNodeData {
parent: None,
local_name: None,
kind: if is_text {
E4XNodeKind::Text(text)
} else {
E4XNodeKind::CData(text)
},
},
));
push_childless_node(node, open_tags, top_level, depth, activation)?;
}
Ok(())
}

loop {
let event = parser.read_event(&mut buf).map_err(|error| {
let event = parser.read_event().map_err(|error| {
Error::RustError(format!("XML parsing error: {error:?}").into())
})?;

Expand All @@ -290,41 +319,38 @@ impl<'gc> E4XNode<'gc> {
top_level.push(node);
}
}
Event::Text(bt) | Event::CData(bt) => {
let text = bt.unescaped()?;
let is_whitespace_char = |c: &u8| matches!(*c, b'\t' | b'\n' | b'\r' | b' ');
let is_whitespace_text = text.iter().all(is_whitespace_char);
if !(text.is_empty() || ignore_white && is_whitespace_text) {
let text = AvmString::new_utf8_bytes(activation.context.gc_context, &text);
let node = E4XNode(GcCell::allocate(
activation.context.gc_context,
E4XNodeData {
parent: None,
local_name: None,
kind: match &event {
Event::Text(_) => E4XNodeKind::Text(text),
Event::CData(_) => E4XNodeKind::CData(text),
_ => unreachable!(),
},
},
));
push_childless_node(
node,
&mut open_tags,
&mut top_level,
depth,
activation,
)?;
}
Event::Text(bt) => {
handle_text_cdata(
bt.unescape()?.as_bytes(),
ignore_white,
&mut open_tags,
&mut top_level,
depth,
true,
activation,
)?;
}
Event::CData(bt) => {
// This is alreayd unescaped
handle_text_cdata(
bt,
ignore_white,
&mut open_tags,
&mut top_level,
depth,
false,
activation,
)?;
}
Event::Comment(bt) | Event::PI(bt) => {
if (matches!(event, Event::Comment(_)) && ignore_comments)
|| (matches!(event, Event::PI(_)) && ignore_processing_instructions)
{
continue;
}
let text = bt.unescaped()?;
let text = AvmString::new_utf8_bytes(activation.context.gc_context, &text);
let text = bt.unescape()?;
let text =
AvmString::new_utf8_bytes(activation.context.gc_context, text.as_bytes());
let kind = match event {
Event::Comment(_) => E4XNodeKind::Comment(text),
Event::PI(_) => E4XNodeKind::ProcessingInstruction(text),
Expand Down Expand Up @@ -358,15 +384,20 @@ impl<'gc> E4XNode<'gc> {
bs: &BytesStart<'_>,
) -> Result<Self, quick_xml::Error> {
// FIXME - handle namespace
let name = AvmString::new_utf8_bytes(activation.context.gc_context, bs.local_name());
let name =
AvmString::new_utf8_bytes(activation.context.gc_context, bs.local_name().into_inner());

let mut attribute_nodes = Vec::new();

let attributes: Result<Vec<_>, _> = bs.attributes().collect();
for attribute in attributes? {
let key = AvmString::new_utf8_bytes(activation.context.gc_context, attribute.key);
let value_bytes = attribute.unescaped_value()?;
let value = AvmString::new_utf8_bytes(activation.context.gc_context, &value_bytes);
let key = AvmString::new_utf8_bytes(
activation.context.gc_context,
attribute.key.into_inner(),
);
let value_str = attribute.unescape_value()?;
let value =
AvmString::new_utf8_bytes(activation.context.gc_context, value_str.as_bytes());

let attribute_data = E4XNodeData {
parent: None,
Expand Down
22 changes: 11 additions & 11 deletions core/src/html/text_format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -606,13 +606,11 @@ impl FormatSpans {
let mut reader = Reader::from_reader(&raw_bytes[..]);
reader.expand_empty_elements(true);
reader.check_end_names(false);
let mut buf = Vec::new();
loop {
buf.clear();
match reader.read_event(&mut buf) {
match reader.read_event() {
Ok(Event::Start(ref e)) => {
opened_starts.push(opened_buffer.len());
opened_buffer.extend(e.name());
opened_buffer.extend(e.name().into_inner());

let attributes: Result<Vec<_>, _> = e.attributes().with_checks(false).collect();
let attributes = match attributes {
Expand All @@ -626,12 +624,13 @@ impl FormatSpans {
attributes.iter().find_map(|attribute| {
attribute
.key
.into_inner()
.eq_ignore_ascii_case(name)
.then(|| decode_to_wstr(&attribute.value))
})
};
let mut format = format_stack.last().unwrap().clone();
match &e.name().to_ascii_lowercase()[..] {
match &e.name().into_inner().to_ascii_lowercase()[..] {
b"br" => {
if is_multiline {
text.push_byte(b'\n');
Expand Down Expand Up @@ -762,7 +761,7 @@ impl FormatSpans {
format_stack.push(format);
}
Ok(Event::Text(e)) if !e.is_empty() => {
let e = decode_to_wstr(e.escaped());
let e = decode_to_wstr(&e.into_inner());
let e = process_html_entity(&e).unwrap_or(e);
let format = format_stack.last().unwrap().clone();
text.push_str(&e);
Expand All @@ -772,7 +771,7 @@ impl FormatSpans {
// Check for a mismatch.
match opened_starts.last() {
Some(start) => {
if e.name() != &opened_buffer[*start..] {
if e.name().into_inner() != &opened_buffer[*start..] {
continue;
} else {
opened_buffer.truncate(*start);
Expand All @@ -782,7 +781,7 @@ impl FormatSpans {
None => continue,
}

match &e.name().to_ascii_lowercase()[..] {
match &e.name().into_inner().to_ascii_lowercase()[..] {
b"br" | b"sbr" => {
// Skip pop from `format_stack`.
continue;
Expand Down Expand Up @@ -1422,16 +1421,17 @@ impl<'a> FormatState<'a> {
self.close_tags();
}
let encoded = text.to_utf8_lossy();
let escaped = escape(encoded.as_bytes());
let escaped = escape(&encoded);

if let Cow::Borrowed(_) = &encoded {
// Optimization: if the utf8 conversion was a no-op, we know the text is ASCII;
// escaping special characters cannot insert new non-ASCII characters, so we can
// simply append the bytes directly without converting from UTF8.
self.result.push_str(WStr::from_units(&*escaped));
self.result.push_str(WStr::from_units(escaped.as_bytes()));
} else {
// TODO: updating our quick_xml fork to upstream will allow removing this UTF8 check.
let escaped = std::str::from_utf8(&escaped).expect("escaped text should be utf8");
let escaped =
std::str::from_utf8(escaped.as_bytes()).expect("escaped text should be utf8");
self.result.push_utf8(escaped);
}
}
Expand Down
2 changes: 1 addition & 1 deletion core/src/xml.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@
mod iterators;
mod tree;

pub use tree::{XmlNode, ELEMENT_NODE, TEXT_NODE};
pub use tree::{custom_unescape, XmlNode, ELEMENT_NODE, TEXT_NODE};
Loading