-
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add XML namespace removal and element skipping extensions
Fixes #64
- Loading branch information
Showing
9 changed files
with
436 additions
and
80 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
using System.Xml; | ||
using System.Xml.Linq; | ||
using System.Xml.XPath; | ||
|
||
namespace Tests; | ||
|
||
public record XmlTests(ITestOutputHelper Output) | ||
{ | ||
[Fact] | ||
public void RemoveNamespacesFromElement() | ||
{ | ||
var doc = XDocument.Load("package.opf"); | ||
var nons = doc.Root!.RemoveNamespaces(); | ||
|
||
var xmlns = new XmlNamespaceManager(new NameTable()); | ||
xmlns.AddNamespace("opf", "http://www.idpf.org/2007/opf"); | ||
xmlns.AddNamespace("dc", "http://purl.org/dc/elements/1.1/"); | ||
|
||
var yearns = doc.XPathSelectElement("/opf:package/opf:metadata/opf:meta[@property='dcterms:date']", xmlns)?.Value; | ||
|
||
// NOTE: since we're at the element level now, we don't need to reference the root element | ||
var year = nons.XPathSelectElement("/metadata/meta[@property='dcterms:date']")?.Value; | ||
|
||
//Output.WriteLine(doc.Root!.Elements().First().ToString()); | ||
//Output.WriteLine(nons.Elements().First().ToString()); | ||
|
||
Assert.NotNull(yearns); | ||
Assert.NotNull(year); | ||
|
||
Assert.Equal(yearns, year); | ||
} | ||
|
||
[Fact] | ||
public void RemoveElementsFromReader() | ||
{ | ||
using var reader = XmlReader.Create("package.opf").SkipElements("manifest"); | ||
var doc = XDocument.Load(reader); | ||
|
||
var all = XDocument.Load("package.opf"); | ||
|
||
Assert.NotEqual(doc.Root!.Elements().Count(), all.Root!.Elements().Count()); | ||
} | ||
} |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
using System.Xml; | ||
|
||
namespace Devlooped.Web; | ||
|
||
/// <summary> | ||
/// Removes all XML namespaces, since for HTML content it's typically | ||
/// irrelevant. | ||
/// </summary> | ||
class IgnoreXmlNsReader : XmlWrappingReader | ||
{ | ||
const string XmlNsNamespace = "http://www.w3.org/2000/xmlns/"; | ||
|
||
public IgnoreXmlNsReader(XmlReader baseReader) : base(baseReader) { } | ||
|
||
public override int AttributeCount | ||
{ | ||
get | ||
{ | ||
var count = 0; | ||
for (var go = MoveToFirstAttribute(); go; go = MoveToNextAttribute()) | ||
count++; | ||
|
||
return count; | ||
} | ||
} | ||
|
||
public override bool MoveToFirstAttribute() | ||
{ | ||
var moved = base.MoveToFirstAttribute(); | ||
while (moved && (IsXmlNs || IsLocalXmlNs)) | ||
moved = MoveToNextAttribute(); | ||
|
||
if (!moved) | ||
base.MoveToElement(); | ||
|
||
return moved; | ||
} | ||
|
||
public override bool MoveToNextAttribute() | ||
{ | ||
var moved = base.MoveToNextAttribute(); | ||
while (moved && (IsXmlNs || IsLocalXmlNs)) | ||
moved = MoveToNextAttribute(); | ||
|
||
return moved; | ||
} | ||
|
||
/// <summary> | ||
/// We only support the <c>xml</c> prefix, used for <c>xml:lang</c> and <c>xml:space</c> | ||
/// built-in text handling in XHTML. | ||
/// </summary> | ||
public override string Prefix => base.Prefix == "xml" ? "xml" : ""; | ||
|
||
public override string NamespaceURI => Prefix == "xml" ? base.NamespaceURI : ""; | ||
|
||
bool IsXmlNs => base.NamespaceURI == XmlNsNamespace; | ||
|
||
bool IsLocalXmlNs => Prefix == "xmlns"; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
using System; | ||
using System.Collections.Generic; | ||
using System.Xml; | ||
|
||
namespace Devlooped.Web; | ||
|
||
/// <summary> | ||
/// Ignores specific elements from the input XML. | ||
/// </summary> | ||
class SkipElementsReader : XmlWrappingReader | ||
{ | ||
readonly HashSet<string> skipElements; | ||
|
||
public SkipElementsReader(XmlReader baseReader, string[] skipElements) : base(baseReader) | ||
{ | ||
this.skipElements = new HashSet<string>(skipElements, StringComparer.OrdinalIgnoreCase); | ||
} | ||
|
||
public override bool Read() | ||
{ | ||
var read = base.Read(); | ||
if (read && base.NodeType == XmlNodeType.Element && skipElements.Contains(LocalName)) | ||
base.Skip(); | ||
|
||
return read; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
using System.ComponentModel; | ||
using Devlooped.Web; | ||
|
||
namespace System.Xml | ||
{ | ||
/// <summary> | ||
/// Extension methods for <see cref="XmlReader"/>. | ||
/// </summary> | ||
[EditorBrowsable(EditorBrowsableState.Never)] | ||
public static class XmlReaderExtensions | ||
{ | ||
/// <summary> | ||
/// Creates a wrapping reader that ignores all XML namespace declarations, | ||
/// so that all resulting elements and attributes have no namespaces. | ||
/// </summary> | ||
public static XmlReader IgnoreNamespaces(this XmlReader reader) | ||
=> new IgnoreXmlNsReader(reader); | ||
|
||
/// <summary> | ||
/// Creates a wrapping reader that skips elements (and their child nodes) with | ||
/// the given local names (without namespace, if any). | ||
/// </summary> | ||
public static XmlReader SkipElements(this XmlReader reader, params string[] localNames) | ||
=> new SkipElementsReader(reader, localNames); | ||
} | ||
} | ||
|
||
namespace System.Xml.Linq | ||
{ | ||
/// <summary> | ||
/// Extension methods for <see cref="XElement"/>. | ||
/// </summary> | ||
[EditorBrowsable(EditorBrowsableState.Never)] | ||
public static class XElementExtensions | ||
{ | ||
/// <summary> | ||
/// Returns a clone of the node, with XML namespaces removed. | ||
/// </summary> | ||
public static XElement RemoveNamespaces(this XElement element) | ||
=> XElement.Load(element.CreateReader().IgnoreNamespaces()); | ||
|
||
/// <summary> | ||
/// Returns a clone of the root node, with XML namespaces removed. | ||
/// </summary> | ||
public static XElement? RemoveNamespaces(this XDocument document) | ||
=> document.Root == null ? null : XElement.Load(document.Root.CreateReader().IgnoreNamespaces()); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
<!-- include ..\..\readme.md#content --> | ||
<!-- include ..\..\readme.md#sponsors --> |