Skip to content

Commit

Permalink
FIX: revisited XML codec to better handle more "XML prolog" versions …
Browse files Browse the repository at this point in the history
…(including 2 tests)
  • Loading branch information
Oldes committed Apr 24, 2020
1 parent 3770ef6 commit 04cf273
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 38 deletions.
56 changes: 18 additions & 38 deletions src/mezz/codec-xml.r
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
REBOL [
Title: "A more XML 1.0 compliant set of XML parsing tools."
File: %codec-xml.r
Date: 19-Nov-2018
Version: 0.8.0
Date: 24-Apr-2020
Version: 0.8.1
Author: ["Gavin F. McKenzie" "Oldes"]
Email: %brianwisti--yahoo--com
Purpose: {
Expand Down Expand Up @@ -137,6 +137,7 @@ REBOL [
@@TBD: say more here
}
History: [
0.8.1 { Oldes: fixed Prolog parsing in some cases}
0.8.0 { Oldes: used original script as a Rebol3 codec}
0.7.6 { Version from 1-jul-2009 downloaded from rebol.org}
0.7.4 { Fixed a defect to allow optional space around
Expand Down Expand Up @@ -374,26 +375,14 @@ register-codec [
;
block-handler: make xml-parse-handler [
xml-doc: copy []
xml-block: copy []
xml-block: none
xml-content: copy ""

start-document: func [
][
start-document: does [
;
; Seed the document
;
xml-block: reduce copy/deep [
'document [
version none
encoding none
standalone none
doctype none
pubid none
sysid none
subset none
]
none
]
xml-block: reduce ['document copy #() none]
]
xml-decl: func [
version-info [string! none!]
Expand Down Expand Up @@ -821,15 +810,12 @@ register-codec [
any xmlMisc
opt [xmlDocTypeDecl any xmlMisc]
]
xmlDocTypeDecl: [ "<!DOCTYPE"
xmlS
xmlDocTypeDecl: [ (public-id: system-id: internal-subset: none)
"<!DOCTYPE" xmlS
copy document-type xmlName
opt [xmlS xmlExternalID]
any xmlSpace
"["
copy internal-subset
to "]"
"]"
opt [#"[" copy internal-subset to #"]" 1 skip] ;@@ this can be unsafe!
any xmlSpace ">"
(handler/document-type
document-type
Expand Down Expand Up @@ -907,16 +893,14 @@ register-codec [
]
)
]
xmlExternalID: [ ["SYSTEM" xmlSpace xmlSystemLiteral] |
["PUBLIC" xmlSpace xmlPubIDLiteral
xmlSpace xmlSystemLiteral
]
xmlExternalID: [ ["SYSTEM" xmlS xmlSystemLiteral] |
["PUBLIC" xmlS xmlPubIDLiteral xmlS xmlSystemLiteral]
]
xmlSystemLiteral: [ [#"^"" copy system-id to #"^"" #"^""] |
[#"'" copy system-id to #"'" #"'"]
xmlSystemLiteral: [ [#"^"" copy system-id to #"^"" 1 skip] |
[#"'" copy system-id to #"'" 1 skip]
]
xmlPubIDLiteral: [ [#"^"" copy public-id to #"^"" #"^""] |
[#"'" copy public-id to #"'" #"'"]
xmlPubIDLiteral: [ [#"^"" copy public-id to #"^"" 1 skip] |
[#"'" copy public-id to #"'" 1 skip]
]
xmlNDataDecl: [xmlS "NDATA" xmlS xmlNameProd]
xmlCDSect: [ "<![CDATA["
Expand Down Expand Up @@ -1008,13 +992,9 @@ register-codec [
"apos" [ return #"'" ]
][
either (first entity-ref) = #"#" [
either (second entity-ref) = #"x" [
to char! to integer! to issue!
skip entity-ref 2
][
to char! to integer!
skip entity-ref 1
]
to char! to integer! either (second entity-ref) = #"x" [
to issue! skip entity-ref 2
][ skip entity-ref 1 ]
][
none
]
Expand Down
32 changes: 32 additions & 0 deletions src/tests/units/codecs-test.r3
Original file line number Diff line number Diff line change
Expand Up @@ -227,4 +227,36 @@ if find codecs 'JSON [
===end-group===
]

if find codecs 'XML [
===start-group=== "XML codec"
--test-- "XML decode test1"
--assert block? data: load %units/files/test1.xml
--assert none? data/document/version
--assert none? data/document/encoding
--assert none? data/document/standalone
--assert none? data/document/pubid
--assert none? data/document/subset
--assert data/document/doctype = "document"
--assert data/document/sysid = "subjects.dtd"
--assert 1 = length? data/3
--assert data/3/1/1 = "document"
--assert 17 = length? data/3/1/3

--test-- "XML decode test2"
--assert block? data: load %units/files/test2.xml
--assert data/document/version = "1.0"
--assert none? data/document/encoding
--assert data/document/standalone = "no"
--assert data/document/doctype = "HTML"
--assert data/document/pubid = "-//W3C//DTD HTML 4.0 Transitional//EN"
--assert data/document/sysid = "http://www.w3.org/TR/REC-html40/loose.dtd"
--assert none? data/document/subset
--assert 1 = length? data/3
--assert data/3/1/1 = "HTML"
--assert 5 = length? data/3/1/3


===end-group===
]

~~~end-file~~~
32 changes: 32 additions & 0 deletions src/tests/units/files/test1.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
<!--inform the XML processor
that an external DTD is referenced-->
<?xml version="1.0" standalone="no" ?>

<!--define the location of the
external DTD using a relative URL address-->
<!DOCTYPE document SYSTEM "subjects.dtd">

<document>
<title>Subjects available in Mechanical Engineering.</title>
<subjectID>2.303</subjectID>
<subjectname>Fluid Mechanics</subjectname>
<prerequisite>
<subjectID>1.001</subjectID>
<subjectname>Mathematics</subjectname>
</prerequisite>
<classes>4 hours per week (lectures and tutorials) for one
semester.</classes>
<assessment>tutorial assignments and one 2hr exam at end of
course.</assessment>
<syllabus>
Fluid statics. The Bernoulli equation. Energy equation. Momentum
equation. Differential Continuity equation. Differential Energy
equation. Differential Momentum equation. Dimensional Analysis.
Similitude. Laminar flow. Turbulent flow. Lift and Drag. Boundary
layer theory.
</syllabus>
<textbooks>
<author>Foobar</author>
<booktitle>The Study of Fluid Mechanics</booktitle>
</textbooks>
</document>
13 changes: 13 additions & 0 deletions src/tests/units/files/test2.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?xml version="1.0" standalone="no" ?>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"
"http://www.w3.org/TR/REC-html40/loose.dtd">
<HTML>
<HEAD>
<TITLE>A typical HTML file</TITLE>
</HEAD>
<BODY>
This is the typical structure of an HTML file. It follows
the notation of the HTML 4.0 specification, including tags
that have been deprecated (hence the "transitional" label).
</BODY>
</HTML>

0 comments on commit 04cf273

Please sign in to comment.