Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

async support v2 #417

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,17 @@ document-features = { version = "0.2", optional = true }
encoding_rs = { version = "0.8", optional = true }
serde = { version = "1.0", optional = true }
memchr = "2.5"
tokio = { version = "1.19", optional = true, default-features = false, features = ["io-util"] }
async-recursion = { version = "1.0", optional = true }

[dev-dependencies]
criterion = "0.3"
pretty_assertions = "1.2"
regex = "1"
serde = { version = "1.0", features = ["derive"] }
serde-value = "0.7"
tokio = { version = "1.20", default-features = false, features = ["macros", "rt-multi-thread"] }
tokio-test = "0.4"

[lib]
bench = false
Expand Down Expand Up @@ -101,6 +105,19 @@ serialize = ["serde"]
## Enables support for recognizing all [HTML 5 entities](https://dev.w3.org/html5/html-author/charref)
escape-html = []

## Enables support for asynchronous reading from `tokio`'s IO-Traits.
##
## This can be used for example with `Reader::from_async_reader(read)` where `read`
## is some type implementing `tokio::io::AsyncBufRead`.
async = ["tokio", "async-recursion"]

## Enables support for asynchronous reading from files using `tokio`. This feature
## also automatically enables the `async` feature as well.
##
## This can be used for example with `Reader::from_file_async(path)` where `path`
## is a file path.
async-fs = ["async", "tokio/fs"]

[package.metadata.docs.rs]
all-features = true

Expand All @@ -119,3 +136,7 @@ required-features = ["serialize"]
[[test]]
name = "serde-migrated"
required-features = ["serialize"]

[[test]]
name = "async_test"
required-features = ["async"]
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,11 @@ let xml = r#"<tag1 att1 = "test">
</tag2>
</tag1>"#;

let mut reader = Reader::from_str(xml);
let mut reader = Reader::from_reader(xml.as_bytes());
// If you want to read from a string or byte slice without buffering, use:
// let mut reader = Reader::from_str(xml);
// In that case, `Vec` is *not* needed for buffering below and you should use
// `read_event` instead of `read_event_into`.
reader.trim_text(true);

let mut count = 0;
Expand Down Expand Up @@ -75,9 +79,8 @@ let xml = r#"<this_tag k1="v1" k2="v2"><child>text</child></this_tag>"#;
let mut reader = Reader::from_str(xml);
reader.trim_text(true);
let mut writer = Writer::new(Cursor::new(Vec::new()));
let mut buf = Vec::new();
loop {
match reader.read_event_into(&mut buf) {
match reader.read_event() {
Ok(Event::Start(ref e)) if e.name() == b"this_tag" => {

// crates a new element ... alternatively we could reuse `e` by calling
Expand All @@ -101,7 +104,6 @@ loop {
Ok(e) => assert!(writer.write_event(&e).is_ok()),
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
}
buf.clear();
}

let result = writer.into_inner().into_inner();
Expand Down
2 changes: 1 addition & 1 deletion benches/macrobenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ static PLAYERS: &[u8] = include_bytes!("../tests/documents/players.xml");
// TODO: read the namespaces too
// TODO: use fully normalized attribute values
fn parse_document(doc: &[u8]) -> XmlResult<()> {
let mut r = Reader::from_reader(doc);
let mut r = Reader::from_bytes(doc);
loop {
match r.read_event()? {
Event::Start(e) | Event::Empty(e) => {
Expand Down
70 changes: 22 additions & 48 deletions benches/microbenches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,15 @@ fn read_event(c: &mut Criterion) {
let mut group = c.benchmark_group("read_event");
group.bench_function("trim_text = false", |b| {
b.iter(|| {
let mut r = Reader::from_reader(SAMPLE);
let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(
count, 1550,
Expand All @@ -50,19 +48,17 @@ fn read_event(c: &mut Criterion) {

group.bench_function("trim_text = true", |b| {
b.iter(|| {
let mut r = Reader::from_reader(SAMPLE);
let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Start(_)) | Ok(Event::Empty(_)) => count += 1,
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(
count, 1550,
Expand All @@ -79,18 +75,16 @@ fn read_namespaced_event(c: &mut Criterion) {
let mut group = c.benchmark_group("read_namespaced_event");
group.bench_function("trim_text = false", |b| {
b.iter(|| {
let mut r = Reader::from_reader(SAMPLE);
let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
let mut ns_buf = Vec::new();
loop {
match r.read_namespaced_event(&mut buf, &mut ns_buf) {
match r.read_namespaced_event(&mut ns_buf) {
Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1,
Ok((_, Event::Eof)) => break,
_ => (),
}
buf.clear();
}
assert_eq!(
count, 1550,
Expand All @@ -101,20 +95,18 @@ fn read_namespaced_event(c: &mut Criterion) {

group.bench_function("trim_text = true", |b| {
b.iter(|| {
let mut r = Reader::from_reader(SAMPLE);
let mut r = Reader::from_bytes(SAMPLE);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
let mut ns_buf = Vec::new();
loop {
match r.read_namespaced_event(&mut buf, &mut ns_buf) {
match r.read_namespaced_event(&mut ns_buf) {
Ok((_, Event::Start(_))) | Ok((_, Event::Empty(_))) => count += 1,
Ok((_, Event::Eof)) => break,
_ => (),
}
buf.clear();
}
assert_eq!(
count, 1550,
Expand All @@ -130,78 +122,66 @@ fn one_event(c: &mut Criterion) {
let mut group = c.benchmark_group("One event");
group.bench_function("StartText", |b| {
let src = "Hello world!".repeat(512 / 12).into_bytes();
let mut buf = Vec::with_capacity(1024);
b.iter(|| {
let mut r = Reader::from_reader(src.as_ref());
let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false).check_comments(false);
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::StartText(e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};

buf.clear();

assert_eq!(nbtxt, 504);
})
});

group.bench_function("Start", |b| {
let src = format!(r#"<hello target="{}">"#, "world".repeat(512 / 5)).into_bytes();
let mut buf = Vec::with_capacity(1024);
b.iter(|| {
let mut r = Reader::from_reader(src.as_ref());
let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Start(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};

buf.clear();

assert_eq!(nbtxt, 525);
})
});

group.bench_function("Comment", |b| {
let src = format!(r#"<!-- hello "{}" -->"#, "world".repeat(512 / 5)).into_bytes();
let mut buf = Vec::with_capacity(1024);
b.iter(|| {
let mut r = Reader::from_reader(src.as_ref());
let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Comment(e)) => nbtxt += e.decode_and_unescape(&r).unwrap().len(),
something_else => panic!("Did not expect {:?}", something_else),
};

buf.clear();

assert_eq!(nbtxt, 520);
})
});

group.bench_function("CData", |b| {
let src = format!(r#"<![CDATA[hello "{}"]]>"#, "world".repeat(512 / 5)).into_bytes();
let mut buf = Vec::with_capacity(1024);
b.iter(|| {
let mut r = Reader::from_reader(src.as_ref());
let mut r = Reader::from_bytes(src.as_ref());
let mut nbtxt = criterion::black_box(0);
r.check_end_names(false)
.check_comments(false)
.trim_text(true);
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::CData(ref e)) => nbtxt += e.len(),
something_else => panic!("Did not expect {:?}", something_else),
};

buf.clear();

assert_eq!(nbtxt, 518);
})
});
Expand All @@ -213,12 +193,11 @@ fn attributes(c: &mut Criterion) {
let mut group = c.benchmark_group("attributes");
group.bench_function("with_checks = true", |b| {
b.iter(|| {
let mut r = Reader::from_reader(PLAYERS);
let mut r = Reader::from_bytes(PLAYERS);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Empty(e)) => {
for attr in e.attributes() {
let _attr = attr.unwrap();
Expand All @@ -228,20 +207,18 @@ fn attributes(c: &mut Criterion) {
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(count, 1041);
})
});

group.bench_function("with_checks = false", |b| {
b.iter(|| {
let mut r = Reader::from_reader(PLAYERS);
let mut r = Reader::from_bytes(PLAYERS);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Empty(e)) => {
for attr in e.attributes().with_checks(false) {
let _attr = attr.unwrap();
Expand All @@ -251,20 +228,18 @@ fn attributes(c: &mut Criterion) {
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(count, 1041);
})
});

group.bench_function("try_get_attribute", |b| {
b.iter(|| {
let mut r = Reader::from_reader(PLAYERS);
let mut r = Reader::from_bytes(PLAYERS);
r.check_end_names(false).check_comments(false);
let mut count = criterion::black_box(0);
let mut buf = Vec::new();
loop {
match r.read_event_into(&mut buf) {
match r.read_event() {
Ok(Event::Empty(e)) if e.name() == QName(b"player") => {
for name in ["num", "status", "avg"] {
if let Some(_attr) = e.try_get_attribute(name).unwrap() {
Expand All @@ -279,7 +254,6 @@ fn attributes(c: &mut Criterion) {
Ok(Event::Eof) => break,
_ => (),
}
buf.clear();
}
assert_eq!(count, 150);
})
Expand Down
3 changes: 1 addition & 2 deletions examples/custom_entities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,11 @@ fn main() -> Result<(), Box<dyn std::error::Error>> {
let mut reader = Reader::from_str(DATA);
reader.trim_text(true);

let mut buf = Vec::new();
let mut custom_entities: HashMap<String, String> = HashMap::new();
let entity_re = Regex::new(r#"<!ENTITY\s+([^ \t\r\n]+)\s+"([^"]*)"\s*>"#)?;

loop {
match reader.read_event_into(&mut buf) {
match reader.read_event() {
Ok(Event::DocType(ref e)) => {
for cap in entity_re.captures_iter(&e) {
custom_entities.insert(
Expand Down
34 changes: 34 additions & 0 deletions examples/read_buffered.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
// This example demonstrates how a reader (for example when reading from a file)
// can be buffered. In that case, data read from the file is written to a supplied
// buffer and returned XML events borrow from that buffer.
// That way, allocations can be kept to a minimum.

fn main() -> Result<(), quick_xml::Error> {
use quick_xml::events::Event;
use quick_xml::Reader;

let mut reader = Reader::from_file("tests/documents/document.xml")?;
reader.trim_text(true);

let mut buf = Vec::new();

let mut count = 0;

loop {
match reader.read_event_into(&mut buf) {
Ok(Event::Start(ref e)) => {
let name = e.name();
let name = reader.decoder().decode(name.as_ref())?;
println!("read start event {:?}", name.as_ref());
count += 1;
}
Ok(Event::Eof) => break, // exits the loop when reaching end of file
Err(e) => panic!("Error at position {}: {:?}", reader.buffer_position(), e),
_ => (), // There are several other `Event`s we do not consider here
}
}

println!("read {} start events in total", count);

Ok(())
}
Loading