Skip to content

Commit

Permalink
Fix newline transposition for SAX parser
Browse files Browse the repository at this point in the history
  • Loading branch information
ohler55 committed Feb 10, 2022
1 parent d15fdc7 commit ec78820
Show file tree
Hide file tree
Showing 6 changed files with 42 additions and 15 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

All changes to the Ox gem are documented here. Releases follow semantic versioning.

## [2.14.9] - 2022-02-11

### Fixed

- Fixed the `\r` replacement with `\n` with the SAX parser according to https://www.w3.org/TR/2008/REC-xml-20081126/#sec-line-ends.

## [2.14.8] - 2022-02-09

### Fixed
Expand Down
2 changes: 1 addition & 1 deletion ext/ox/parse.c
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ static void fix_newlines(char *buf) {
if ('\n' == *(s + 1)) {
continue;
}
*s = '\n';
*d = '\n';
} else if (d < s) {
*d = *s;
}
Expand Down
31 changes: 22 additions & 9 deletions ext/ox/sax.c
Original file line number Diff line number Diff line change
Expand Up @@ -307,8 +307,8 @@ static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions opti
dr->encoding = rb_enc_find(ox_default_options.encoding);
}
dr->utf8 = (NULL == dr->encoding || rb_utf8_encoding() == dr->encoding);
if (NULL == dr->encoding || rb_utf8_encoding() == dr->encoding) { // UTF-8
dr->get_name = dr->options.symbolize ? ox_utf8_sym : ox_utf8_name; // TBD UTF8 sym?
if (NULL == dr->encoding || rb_utf8_encoding() == dr->encoding) { // UTF-8
dr->get_name = dr->options.symbolize ? ox_utf8_sym : ox_utf8_name; // TBD UTF8 sym?
} else {
dr->get_name = dr->options.symbolize ? ox_enc_sym : ox_enc_name;
}
Expand All @@ -334,7 +334,7 @@ static char skipBOM(SaxDrive dr) {
if (0xEF == (uint8_t)c) { /* only UTF8 is supported */
if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
dr->encoding = ox_utf8_encoding;
c = buf_get(&dr->buf);
c = buf_get(&dr->buf);
} else {
ox_sax_drive_error(dr, BAD_BOM "invalid BOM or a binary file.");
c = '\0';
Expand Down Expand Up @@ -1217,7 +1217,7 @@ static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml,
attr_value = dr->buf.str;
if (is_encoding) {
dr->encoding = rb_enc_find(dr->buf.str);
is_encoding = 0;
is_encoding = 0;
}
}
if (0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
Expand Down Expand Up @@ -1368,7 +1368,8 @@ int ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long co
char *b = str;

while ('\0' != *s) {
if ('&' == *s) {
switch (*s) {
case '&': {
int c = 0;
char *end;

Expand Down Expand Up @@ -1458,13 +1459,25 @@ int ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long co
}
*b++ = (char)c;
col++;
} else {
break;
}
case '\r':
s++;
if ('\n' == *s) {
line++;
col = 0;
}
continue;
}
line++;
col = 1;
*b++ = '\n';
break;
case '\n':
line++;
col = 0;
// fall through
default:
col++;
*b++ = *s++;
break;
}
}
*b = '\0';
Expand Down
2 changes: 1 addition & 1 deletion lib/ox/version.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@

module Ox
# Current version of the module.
VERSION = '2.14.8'
VERSION = '2.14.9'
end
8 changes: 4 additions & 4 deletions test/sax/sax_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -871,7 +871,7 @@ def test_sax_skip_none
handler = TypeSax.new(:as_s)
xml = %{<top> Pete\r\n Ohler\r</top>}
Ox.sax_parse(handler, StringIO.new(xml))
assert_equal(%{ Pete\r\n Ohler\r}, handler.item)
assert_equal(%{ Pete\n Ohler\n}, handler.item)
end

def test_sax_skip_none_nested
Expand All @@ -884,11 +884,11 @@ def test_sax_skip_none_nested
assert_equal([[:start_element, :top],
[:text, " "],
[:start_element, :child],
[:text, "Pete\r"],
[:text, "Pete\n"],
[:end_element, :child],
[:text, "\n "],
[:start_element, :child],
[:text, " Ohler\r"],
[:text, " Ohler\n"],
[:end_element, :child],
[:end_element, :top]], handler.calls)
end
Expand All @@ -898,7 +898,7 @@ def test_sax_skip_return
handler = TypeSax.new(:as_s)
xml = %{<top> Pete\r\n Ohler\r</top>}
Ox.sax_parse(handler, StringIO.new(xml), :skip => :skip_return)
assert_equal(%{ Pete\n Ohler\r}, handler.item)
assert_equal(%{ Pete\n Ohler\n}, handler.item)
end

def test_sax_skip_white
Expand Down
8 changes: 8 additions & 0 deletions test/tests.rb
Original file line number Diff line number Diff line change
Expand Up @@ -636,6 +636,14 @@ def test_skip_return
assert_equal(%{\n<top> Pete\n Ohler</top>\n}, x2)
end

def test_skip_return2
Ox::default_options = $ox_object_options
xml = %{<top> Pete\rOhler</top>}
doc = Ox.load(xml, :mode => :generic, :symbolize_keys => false, :skip => :skip_return)
x2 = Ox.dump(doc)
assert_equal(%{\n<top> Pete\nOhler</top>\n}, x2)
end

def test_skip_space
Ox::default_options = $ox_object_options
xml = %{<top> Pete\r\n Ohler</top>}
Expand Down

0 comments on commit ec78820

Please sign in to comment.