From 8995cd1f96af562dc8281c80a6a73d9af9b9cc98 Mon Sep 17 00:00:00 2001 From: rwecho Date: Wed, 12 Jul 2023 10:36:49 +0800 Subject: [PATCH 1/2] fix encapsulated item with corresponding ReturnType --- .../HtmlNode.Encapsulator.cs | 52 ++++++++++++------- .../EncapsulatorTests.cs | 49 +++++++++++++++++ 2 files changed, 82 insertions(+), 19 deletions(-) diff --git a/src/HtmlAgilityPack.Shared/HtmlNode.Encapsulator.cs b/src/HtmlAgilityPack.Shared/HtmlNode.Encapsulator.cs index 701ebd49..cdd3fa25 100644 --- a/src/HtmlAgilityPack.Shared/HtmlNode.Encapsulator.cs +++ b/src/HtmlAgilityPack.Shared/HtmlNode.Encapsulator.cs @@ -1,9 +1,9 @@ -// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators. +// Description: Html Agility Pack - HTML Parsers, selectors, traversors, manupulators. // Website & Documentation: http://html-agility-pack.net // Forum & Issues: https://github.com/zzzprojects/html-agility-pack // License: https://github.com/zzzprojects/html-agility-pack/blob/master/LICENSE // More projects: http://www.zzzprojects.com/ -// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved. +// Copyright © ZZZ Projects Inc. 2014 - 2017. All rights reserved. #if !METRO && !NETSTANDARD1_3 @@ -29,7 +29,7 @@ public partial class HtmlNode /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. - /// Why it's thrown. + /// Why it's thrown. /// Why it's thrown. public T GetEncapsulatedData() { @@ -50,7 +50,7 @@ public T GetEncapsulatedData() /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. - /// Why it's thrown. + /// Why it's thrown. /// Why it's thrown. public T GetEncapsulatedData(HtmlDocument htmlDocument) { @@ -72,7 +72,7 @@ public T GetEncapsulatedData(HtmlDocument htmlDocument) /// Why it's thrown. /// Why it's thrown. /// Why it's thrown. - /// Why it's thrown. + /// Why it's thrown. /// Why it's thrown. public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = null) { @@ -177,7 +177,7 @@ public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = n { HtmlDocument innerHtmlDocument = new HtmlDocument(); - innerHtmlDocument.LoadHtml(htmlNode.InnerHtml); + innerHtmlDocument.LoadHtml(GetEncapsulatedHtml(xPathAttribute.NodeReturnType, htmlNode)); object o = GetEncapsulatedData(propertyInfo.PropertyType, innerHtmlDocument); @@ -192,7 +192,7 @@ public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = n { string result = string.Empty; - if (xPathAttribute.AttributeName == null) // It target value of HTMLTag + if (xPathAttribute.AttributeName == null) // It target value of HTMLTag { result = Tools.GetNodeValueBasedOnXPathReturnType(htmlNode, xPathAttribute); } @@ -295,7 +295,7 @@ public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = n foreach (HtmlNode node in nodeCollection) { HtmlDocument innerHtmlDocument = new HtmlDocument(); - innerHtmlDocument.LoadHtml(node.InnerHtml); + innerHtmlDocument.LoadHtml(GetEncapsulatedHtml(xPathAttribute.NodeReturnType, node)); object o = GetEncapsulatedData(T_Types[0], innerHtmlDocument); @@ -384,6 +384,20 @@ public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = n + private static string GetEncapsulatedHtml(ReturnType returnType, HtmlNode node) + { + switch (returnType) + { + case ReturnType.InnerText: + return node.InnerText; + case ReturnType.InnerHtml: + return node.InnerHtml; + case ReturnType.OuterHtml: + return node.OuterHtml; + default: + throw new Exception("Unhandled ReturnType : " + returnType.ToString()); + }; + } } @@ -797,7 +811,7 @@ internal static int CountOfIEnumerable(this IEnumerable source) public enum ReturnType { /// - /// The text between the start and end tags of the object. + /// The text between the start and end tags of the object. /// InnerText, @@ -896,18 +910,18 @@ public sealed class SkipNodeNotFoundAttribute : Attribute public class NodeNotFoundException : Exception { /// - /// + /// /// public NodeNotFoundException() { } /// - /// + /// /// /// public NodeNotFoundException(string message) : base(message) { } /// - /// + /// /// /// /// @@ -921,18 +935,18 @@ public NodeNotFoundException(string message, Exception inner) : base(message, in public class NodeAttributeNotFoundException : Exception { /// - /// + /// /// public NodeAttributeNotFoundException() { } /// - /// + /// /// /// public NodeAttributeNotFoundException(string message) : base(message) { } /// - /// + /// /// /// /// @@ -947,18 +961,18 @@ public class MissingXPathException : Exception { /// - /// + /// /// public MissingXPathException() { } /// - /// + /// /// /// public MissingXPathException(string message) : base(message) { } /// - /// + /// /// /// /// @@ -967,7 +981,7 @@ public MissingXPathException(string message, Exception inner) : base(message, in } -#if FX20 +#if FX20 namespace System.Runtime.CompilerServices { [AttributeUsage(AttributeTargets.Method | diff --git a/src/Tests/HtmlAgilityPack.Tests.NetStandard2_0/EncapsulatorTests.cs b/src/Tests/HtmlAgilityPack.Tests.NetStandard2_0/EncapsulatorTests.cs index 4bee1f41..192d9edc 100644 --- a/src/Tests/HtmlAgilityPack.Tests.NetStandard2_0/EncapsulatorTests.cs +++ b/src/Tests/HtmlAgilityPack.Tests.NetStandard2_0/EncapsulatorTests.cs @@ -2,6 +2,7 @@ using System.Collections.Generic; using System.Diagnostics; using System.Linq; +using System.Reflection.Metadata; using Xunit; namespace HtmlAgilityPack.Tests.NetStandard2_0 @@ -29,8 +30,32 @@ public void Dictionary_Test() Assert.NotNull(wort); } + + [Fact] + public void EncapsulatedOuterHtml_Test() + { + var html = @" +
+3 +
hello +1 +2 +
+
world
+
+"; + var document = new HtmlDocument(); + document.LoadHtml(html); + var outerHtml = document.DocumentNode.GetEncapsulatedData(); + Assert.True(outerHtml.Item3.Href == "3.html"); + Assert.True(outerHtml.Item3.Name == "3"); + + Assert.True(outerHtml.Items.Count == 3); + Assert.True(outerHtml.Items.All(o => o.Href != null)); + } } + #region StackOverFlow_TestClasses [HasXPath] @@ -204,5 +229,29 @@ public class Example #endregion Dictionary_TestClasses + #region Encapsulated outer html test classes + + [HasXPath] + public class OuterHtml + { + [XPath("//a", ReturnType.OuterHtml)] + public List Items { get; set; } + + [XPath("//a[@class='single']", ReturnType.OuterHtml)] + public OuterHtmlItem Item3 { get; set; } + + [HasXPath] + public class OuterHtmlItem + { + [XPath("a", "href")] + [SkipNodeNotFound] + public string Href { get; set; } + + [XPath("a")] + [SkipNodeNotFound] + public string Name { get; set; } + } + } + #endregion } From 2930b385fc7ec131d586c14859721d082f4fa1a2 Mon Sep 17 00:00:00 2001 From: rwecho Date: Thu, 13 Jul 2023 11:01:14 +0800 Subject: [PATCH 2/2] extract GetHtmlForEncapsulation into Tools class. --- .../HtmlNode.Encapsulator.cs | 107 +++++------------- 1 file changed, 26 insertions(+), 81 deletions(-) diff --git a/src/HtmlAgilityPack.Shared/HtmlNode.Encapsulator.cs b/src/HtmlAgilityPack.Shared/HtmlNode.Encapsulator.cs index cdd3fa25..b1ed0c2e 100644 --- a/src/HtmlAgilityPack.Shared/HtmlNode.Encapsulator.cs +++ b/src/HtmlAgilityPack.Shared/HtmlNode.Encapsulator.cs @@ -177,7 +177,7 @@ public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = n { HtmlDocument innerHtmlDocument = new HtmlDocument(); - innerHtmlDocument.LoadHtml(GetEncapsulatedHtml(xPathAttribute.NodeReturnType, htmlNode)); + innerHtmlDocument.LoadHtml(Tools.GetHtmlForEncapsulation(htmlNode, xPathAttribute.NodeReturnType)); object o = GetEncapsulatedData(propertyInfo.PropertyType, innerHtmlDocument); @@ -295,7 +295,7 @@ public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = n foreach (HtmlNode node in nodeCollection) { HtmlDocument innerHtmlDocument = new HtmlDocument(); - innerHtmlDocument.LoadHtml(GetEncapsulatedHtml(xPathAttribute.NodeReturnType, node)); + innerHtmlDocument.LoadHtml(Tools.GetHtmlForEncapsulation(node, xPathAttribute.NodeReturnType)); object o = GetEncapsulatedData(T_Types[0], innerHtmlDocument); @@ -381,23 +381,6 @@ public object GetEncapsulatedData(Type targetType, HtmlDocument htmlDocument = n } #endregion targetObject_NOTDefined_XPath } - - - - private static string GetEncapsulatedHtml(ReturnType returnType, HtmlNode node) - { - switch (returnType) - { - case ReturnType.InnerText: - return node.InnerText; - case ReturnType.InnerHtml: - return node.InnerHtml; - case ReturnType.OuterHtml: - return node.OuterHtml; - default: - throw new Exception("Unhandled ReturnType : " + returnType.ToString()); - }; - } } @@ -616,34 +599,7 @@ internal static T GetNodeValueBasedOnXPathReturnType(HtmlNode htmlNode, XPath throw new ArgumentNullException("parameter xpathAttribute is null"); } - object result; - Type TType = typeof(T); - - switch (xPathAttribute.NodeReturnType) - { - case ReturnType.InnerHtml: - { - result = Convert.ChangeType(htmlNode.InnerHtml, TType); - } - break; - - - case ReturnType.InnerText: - { - result = Convert.ChangeType(htmlNode.InnerText, TType); - } - break; - - case ReturnType.OuterHtml: - { - result = Convert.ChangeType(htmlNode.OuterHtml, TType); - } - break; - - default: throw new Exception(); - } - - return (T)result; + return (T)Convert.ChangeType(GetHtmlForEncapsulation(htmlNode, xPathAttribute.NodeReturnType), typeof(T)); } @@ -668,41 +624,10 @@ internal static IList GetNodesValuesBasedOnXPathReturnType(HtmlNodeCollection ht IList result = listGenericType.CreateIListOfType(); - - switch (xPathAttribute.NodeReturnType) + foreach (HtmlNode node in htmlNodeCollection) { - - case ReturnType.InnerHtml: - { - foreach (HtmlNode node in htmlNodeCollection) - { - result.Add(Convert.ChangeType(node.InnerHtml, listGenericType)); - } - } - break; - - - case ReturnType.InnerText: - { - foreach (HtmlNode node in htmlNodeCollection) - { - result.Add(Convert.ChangeType(node.InnerText, listGenericType)); - } - } - break; - - - case ReturnType.OuterHtml: - { - foreach (HtmlNode node in htmlNodeCollection) - { - result.Add(Convert.ChangeType(node.OuterHtml, listGenericType)); - } - } - break; - + result.Add(Convert.ChangeType(GetHtmlForEncapsulation(node, xPathAttribute.NodeReturnType), listGenericType)); } - return result; } @@ -801,7 +726,27 @@ internal static int CountOfIEnumerable(this IEnumerable source) return counter; } - + /// + /// Return html part of based on + /// + /// A htmlNode instance. + /// + /// Html part + /// Out of range to the + internal static string GetHtmlForEncapsulation(HtmlNode node, ReturnType returnType) + { + switch (returnType) + { + case ReturnType.InnerText: + return node.InnerText; + case ReturnType.InnerHtml: + return node.InnerHtml; + case ReturnType.OuterHtml: + return node.OuterHtml; + default: + throw new IndexOutOfRangeException("Unhandled ReturnType : " + returnType.ToString()); + }; + } }