devlooped · kzu · Sep 3, 2022 · Sep 3, 2022
diff --git a/readme.md b/readme.md
@@ -5,6 +5,7 @@
 [![Downloads](https://img.shields.io/nuget/dt/Devlooped.Web.svg?color=green)](https://www.nuget.org/packages/Devlooped.Web)
 [![License](https://img.shields.io/github/license/devlooped/web.svg?color=blue)](https://github.com/devlooped/web/blob/main/license.txt)
 
+<!-- #content -->
 Read HTML as XML and query it with CSS over XLinq. 
 
 No need to learn an entirely new object model for a page 🤘. 
@@ -32,6 +33,29 @@ receiving an `HtmlReaderSettings`.
 The underlying parsing is performed by the amazing [SgmlReader](https://www.nuget.org/packages/Microsoft.Xml.SgmlReader) 
 library by Microsoft's [Chris Lovett](http://lovettsoftware.com/).
 
+In addition, the following extension methods make it easier to work 
+with XML documents where you want to query with CSS or XPath without 
+having to deal with XML namespaces:
+
+```csharp
+using System.Xml;
+using System.Xml.Linq;
+using Devlooped.Web;
+
+var doc = XDocument.Load("doc.xml")
+// Will remove all xmlns declarations, and allow querying elements 
+// as if none had namespaces, returns the root element
+XElement nons = doc.RemoveNamespaces();
+
+// Alternatively, you can also ignore at the XmlReader level
+using var reader = XmlReader.Create("doc.xml").IgnoreNamespaces();
+doc = XDocument.Load(reader);
+
+// Finally, you can also skip elements at the reader level
+using var reader = XmlReader.Create("doc.xml").SkipElements("foo", "bar");
+doc = XDocument.Load(reader);
+```
+
 ## CSS
 
 At the moment, supports the following CSS selector features: 
@@ -76,6 +100,8 @@ Non-CSS features:
     * `[text()*=val]`: Represents an element whose text contents contains at least one instance of the 
        substring "val". If "val" is the empty string then the selector does not represent anything.
 
+<!-- #content -->
+
 # Dogfooding
 
 [![CI Version](https://img.shields.io/endpoint?url=https://shields.kzu.io/vpre/Devlooped.Web/main&label=nuget.ci&color=brightgreen)](https://pkg.kzu.io/index.json)
@@ -91,6 +117,7 @@ The versioning scheme for packages is:
 - Branch builds: *42.42.42-*`[BRANCH]`.`[COMMITS]`
 
 
+<!-- #sponsors -->
 <!-- include docs/footer.md -->
 # Sponsors 
 

diff --git a/src/Tests/XmlTests.cs b/src/Tests/XmlTests.cs
@@ -0,0 +1,43 @@
+using System.Xml;
+using System.Xml.Linq;
+using System.Xml.XPath;
+
+namespace Tests;
+
+public record XmlTests(ITestOutputHelper Output)
+{
+    [Fact]
+    public void RemoveNamespacesFromElement()
+    {
+        var doc = XDocument.Load("package.opf");
+        var nons = doc.Root!.RemoveNamespaces();
+
+        var xmlns = new XmlNamespaceManager(new NameTable());
+        xmlns.AddNamespace("opf", "http://www.idpf.org/2007/opf");
+        xmlns.AddNamespace("dc", "http://purl.org/dc/elements/1.1/");
+
+        var yearns = doc.XPathSelectElement("/opf:package/opf:metadata/opf:meta[@property='dcterms:date']", xmlns)?.Value;
+
+        // NOTE: since we're at the element level now, we don't need to reference the root element
+        var year = nons.XPathSelectElement("/metadata/meta[@property='dcterms:date']")?.Value;
+
+        //Output.WriteLine(doc.Root!.Elements().First().ToString());
+        //Output.WriteLine(nons.Elements().First().ToString());
+
+        Assert.NotNull(yearns);
+        Assert.NotNull(year);
+
+        Assert.Equal(yearns, year);
+    }
+
+    [Fact]
+    public void RemoveElementsFromReader()
+    {
+        using var reader = XmlReader.Create("package.opf").SkipElements("manifest");
+        var doc = XDocument.Load(reader);
+
+        var all = XDocument.Load("package.opf");
+
+        Assert.NotEqual(doc.Root!.Elements().Count(), all.Root!.Elements().Count());
+    }
+}
diff --git a/src/Tests/package.opf b/src/Tests/package.opf
diff --git a/src/Web/HtmlDocument.cs b/src/Web/HtmlDocument.cs
@@ -197,84 +197,5 @@ static XmlReader Configure(SgmlReader reader, HtmlReaderSettings settings)
 
         return result;
     }
-
-    /// <summary>
-    /// Removes all XML namespaces, since for HTML content it's typically 
-    /// irrelevant.
-    /// </summary>
-    class IgnoreXmlNsReader : XmlWrappingReader
-    {
-        const string XmlNsNamespace = "http://www.w3.org/2000/xmlns/";
-
-        public IgnoreXmlNsReader(XmlReader baseReader) : base(baseReader) { }
-
-        public override int AttributeCount
-        {
-            get
-            {
-                var count = 0;
-                for (var go = MoveToFirstAttribute(); go; go = MoveToNextAttribute())
-                    count++;
-
-                return count;
-            }
-        }
-
-        public override bool MoveToFirstAttribute()
-        {
-            var moved = base.MoveToFirstAttribute();
-            while (moved && (IsXmlNs || IsLocalXmlNs))
-                moved = MoveToNextAttribute();
-
-            if (!moved)
-                base.MoveToElement();
-
-            return moved;
-        }
-
-        public override bool MoveToNextAttribute()
-        {
-            var moved = base.MoveToNextAttribute();
-            while (moved && (IsXmlNs || IsLocalXmlNs))
-                moved = MoveToNextAttribute();
-
-            return moved;
-        }
-
-        /// <summary>
-        /// We only support the <c>xml</c> prefix, used for <c>xml:lang</c> and <c>xml:space</c> 
-        /// built-in text handling in XHTML.
-        /// </summary>
-        public override string Prefix => base.Prefix == "xml" ? "xml" : "";
-
-        public override string NamespaceURI => Prefix == "xml" ? base.NamespaceURI : "";
-
-        bool IsXmlNs => base.NamespaceURI == XmlNsNamespace;
-
-        bool IsLocalXmlNs => Prefix == "xmlns";
-    }
-
-    /// <summary>
-    /// Removes all XML namespaces, since for HTML content it's typically 
-    /// irrelevant.
-    /// </summary>
-    class SkipElementsReader : XmlWrappingReader
-    {
-        readonly HashSet<string> skipElements;
-
-        public SkipElementsReader(XmlReader baseReader, string[] skipElements) : base(baseReader)
-        {
-            this.skipElements = new HashSet<string>(skipElements, StringComparer.OrdinalIgnoreCase);
-        }
-
-        public override bool Read()
-        {
-            var read = base.Read();
-            if (read && base.NodeType == XmlNodeType.Element && skipElements.Contains(LocalName))
-                base.Skip();
-
-            return read;
-        }
-    }
 }
 
diff --git a/src/Web/IgnoreXmlNsReader.cs b/src/Web/IgnoreXmlNsReader.cs
@@ -0,0 +1,60 @@
+using System.Xml;
+
+namespace Devlooped.Web;
+
+/// <summary>
+/// Removes all XML namespaces, since for HTML content it's typically 
+/// irrelevant.
+/// </summary>
+class IgnoreXmlNsReader : XmlWrappingReader
+{
+    const string XmlNsNamespace = "http://www.w3.org/2000/xmlns/";
+
+    public IgnoreXmlNsReader(XmlReader baseReader) : base(baseReader) { }
+
+    public override int AttributeCount
+    {
+        get
+        {
+            var count = 0;
+            for (var go = MoveToFirstAttribute(); go; go = MoveToNextAttribute())
+                count++;
+
+            return count;
+        }
+    }
+
+    public override bool MoveToFirstAttribute()
+    {
+        var moved = base.MoveToFirstAttribute();
+        while (moved && (IsXmlNs || IsLocalXmlNs))
+            moved = MoveToNextAttribute();
+
+        if (!moved)
+            base.MoveToElement();
+
+        return moved;
+    }
+
+    public override bool MoveToNextAttribute()
+    {
+        var moved = base.MoveToNextAttribute();
+        while (moved && (IsXmlNs || IsLocalXmlNs))
+            moved = MoveToNextAttribute();
+
+        return moved;
+    }
+
+    /// <summary>
+    /// We only support the <c>xml</c> prefix, used for <c>xml:lang</c> and <c>xml:space</c> 
+    /// built-in text handling in XHTML.
+    /// </summary>
+    public override string Prefix => base.Prefix == "xml" ? "xml" : "";
+
+    public override string NamespaceURI => Prefix == "xml" ? base.NamespaceURI : "";
+
+    bool IsXmlNs => base.NamespaceURI == XmlNsNamespace;
+
+    bool IsLocalXmlNs => Prefix == "xmlns";
+}
+
diff --git a/src/Web/SkipElementsReader.cs b/src/Web/SkipElementsReader.cs
@@ -0,0 +1,27 @@
+using System;
+using System.Collections.Generic;
+using System.Xml;
+
+namespace Devlooped.Web;
+
+/// <summary>
+/// Ignores specific elements from the input XML.
+/// </summary>
+class SkipElementsReader : XmlWrappingReader
+{
+    readonly HashSet<string> skipElements;
+
+    public SkipElementsReader(XmlReader baseReader, string[] skipElements) : base(baseReader)
+    {
+        this.skipElements = new HashSet<string>(skipElements, StringComparer.OrdinalIgnoreCase);
+    }
+
+    public override bool Read()
+    {
+        var read = base.Read();
+        if (read && base.NodeType == XmlNodeType.Element && skipElements.Contains(LocalName))
+            base.Skip();
+
+        return read;
+    }
+}
diff --git a/src/Web/Web.csproj b/src/Web/Web.csproj
@@ -20,7 +20,6 @@
 
   <ItemGroup>
     <InternalsVisibleTo Include="Devlooped.Tests" />
-    <None Include="..\..\readme.md" PackagePath="readme.md" />
   </ItemGroup>
 
 </Project>
diff --git a/src/Web/XmlExtensions.cs b/src/Web/XmlExtensions.cs
@@ -0,0 +1,48 @@
+using System.ComponentModel;
+using Devlooped.Web;
+
+namespace System.Xml
+{
+    /// <summary>
+    /// Extension methods for <see cref="XmlReader"/>.
+    /// </summary>
+    [EditorBrowsable(EditorBrowsableState.Never)]
+    public static class XmlReaderExtensions
+    {
+        /// <summary>
+        /// Creates a wrapping reader that ignores all XML namespace declarations, 
+        /// so that all resulting elements and attributes have no namespaces.
+        /// </summary>
+        public static XmlReader IgnoreNamespaces(this XmlReader reader)
+            => new IgnoreXmlNsReader(reader);
+
+        /// <summary>
+        /// Creates a wrapping reader that skips elements (and their child nodes) with 
+        /// the given local names (without namespace, if any).
+        /// </summary>
+        public static XmlReader SkipElements(this XmlReader reader, params string[] localNames)
+            => new SkipElementsReader(reader, localNames);
+    }
+}
+
+namespace System.Xml.Linq
+{
+    /// <summary>
+    /// Extension methods for <see cref="XElement"/>.
+    /// </summary>
+    [EditorBrowsable(EditorBrowsableState.Never)]
+    public static class XElementExtensions
+    {
+        /// <summary>
+        /// Returns a clone of the node, with XML namespaces removed.
+        /// </summary>
+        public static XElement RemoveNamespaces(this XElement element)
+            => XElement.Load(element.CreateReader().IgnoreNamespaces());
+
+        /// <summary>
+        /// Returns a clone of the root node, with XML namespaces removed.
+        /// </summary>
+        public static XElement? RemoveNamespaces(this XDocument document)
+            => document.Root == null ? null : XElement.Load(document.Root.CreateReader().IgnoreNamespaces());
+    }
+}
diff --git a/src/Web/readme.md b/src/Web/readme.md
@@ -0,0 +1,2 @@
+<!-- include ..\..\readme.md#content -->
+<!-- include ..\..\readme.md#sponsors -->