Skip to content

Commit

Permalink
Move encoding integration tests to tests/encodings.rs
Browse files Browse the repository at this point in the history
  • Loading branch information
Mingun committed Jun 23, 2024
1 parent a00a996 commit 8c75ecd
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 74 deletions.
50 changes: 0 additions & 50 deletions src/reader/buffered_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,54 +460,4 @@ mod test {
identity,
&mut Vec::new()
);

#[cfg(feature = "encoding")]
mod encoding {
use crate::events::Event;
use crate::reader::Reader;
use encoding_rs::{UTF_16LE, UTF_8, WINDOWS_1251};
use pretty_assertions::assert_eq;

/// Checks that encoding is detected by BOM and changed after XML declaration
/// BOM indicates UTF-16LE, but XML - windows-1251
#[test]
fn bom_detected() {
let mut reader =
Reader::from_reader(b"\xFF\xFE<?xml encoding='windows-1251'?>".as_ref());
let mut buf = Vec::new();

assert_eq!(reader.decoder().encoding(), UTF_8);
assert!(matches!(
reader.read_event_into(&mut buf).unwrap(),
Event::Decl(_)
));
assert_eq!(reader.decoder().encoding(), WINDOWS_1251);

assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
}

/// Checks that encoding is changed by XML declaration, but only once
#[test]
fn xml_declaration() {
let mut reader = Reader::from_reader(
b"<?xml encoding='UTF-16'?><?xml encoding='windows-1251'?>".as_ref(),
);
let mut buf = Vec::new();

assert_eq!(reader.decoder().encoding(), UTF_8);
assert!(matches!(
reader.read_event_into(&mut buf).unwrap(),
Event::Decl(_)
));
assert_eq!(reader.decoder().encoding(), UTF_16LE);

assert!(matches!(
reader.read_event_into(&mut buf).unwrap(),
Event::Decl(_)
));
assert_eq!(reader.decoder().encoding(), UTF_16LE);

assert_eq!(reader.read_event_into(&mut buf).unwrap(), Event::Eof);
}
}
}
21 changes: 0 additions & 21 deletions src/reader/slice_reader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,25 +376,4 @@ mod test {
identity,
()
);

#[cfg(feature = "encoding")]
mod encoding {
use crate::events::Event;
use crate::reader::Reader;
use encoding_rs::UTF_8;
use pretty_assertions::assert_eq;

/// Checks that XML declaration cannot change the encoding from UTF-8 if
/// a `Reader` was created using `from_str` method
#[test]
fn str_always_has_utf8() {
let mut reader = Reader::from_str("<?xml encoding='UTF-16'?>");

assert_eq!(reader.decoder().encoding(), UTF_8);
reader.read_event().unwrap();
assert_eq!(reader.decoder().encoding(), UTF_8);

assert_eq!(reader.read_event().unwrap(), Event::Eof);
}
}
}
52 changes: 49 additions & 3 deletions tests/encodings.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use encoding_rs::{UTF_16BE, UTF_16LE, UTF_8, WINDOWS_1251};
use pretty_assertions::assert_eq;
use quick_xml::events::{BytesEnd, BytesStart, BytesText, Event::*};
use quick_xml::Reader;
use quick_xml::reader::Reader;

mod decode {
use encoding_rs::{UTF_16BE, UTF_16LE, UTF_8};
use super::*;
use pretty_assertions::assert_eq;
use quick_xml::encoding::*;
use quick_xml::encoding::detect_encoding;

static UTF16BE_TEXT_WITH_BOM: &[u8] = include_bytes!("documents/encoding/utf16be-bom.xml");
static UTF16LE_TEXT_WITH_BOM: &[u8] = include_bytes!("documents/encoding/utf16le-bom.xml");
Expand Down Expand Up @@ -225,3 +227,47 @@ fn bom_removed_from_initial_text() {
assert_eq!(r.read_event().unwrap(), End(BytesEnd::new("paired")));
assert_eq!(r.read_event().unwrap(), Eof);
}

/// Checks that encoding is detected by BOM and changed after XML declaration
/// BOM indicates UTF-16LE, but XML - windows-1251
#[test]
fn bom_overridden_by_declaration() {
let mut reader = Reader::from_reader(b"\xFF\xFE<?xml encoding='windows-1251'?>".as_ref());
let mut buf = Vec::new();

assert_eq!(reader.decoder().encoding(), UTF_8);
assert!(matches!(reader.read_event_into(&mut buf).unwrap(), Decl(_)));
assert_eq!(reader.decoder().encoding(), WINDOWS_1251);

assert_eq!(reader.read_event_into(&mut buf).unwrap(), Eof);
}

/// Checks that encoding is changed by XML declaration, but only once
#[test]
fn only_one_declaration_changes_encoding() {
let mut reader =
Reader::from_reader(b"<?xml encoding='UTF-16'?><?xml encoding='windows-1251'?>".as_ref());
let mut buf = Vec::new();

assert_eq!(reader.decoder().encoding(), UTF_8);
assert!(matches!(reader.read_event_into(&mut buf).unwrap(), Decl(_)));
assert_eq!(reader.decoder().encoding(), UTF_16LE);

assert!(matches!(reader.read_event_into(&mut buf).unwrap(), Decl(_)));
assert_eq!(reader.decoder().encoding(), UTF_16LE);

assert_eq!(reader.read_event_into(&mut buf).unwrap(), Eof);
}

/// Checks that XML declaration cannot change the encoding from UTF-8 if
/// a `Reader` was created using `from_str` method
#[test]
fn str_always_has_utf8() {
let mut reader = Reader::from_str("<?xml encoding='UTF-16'?>");

assert_eq!(reader.decoder().encoding(), UTF_8);
reader.read_event().unwrap();
assert_eq!(reader.decoder().encoding(), UTF_8);

assert_eq!(reader.read_event().unwrap(), Eof);
}

0 comments on commit 8c75ecd

Please sign in to comment.