Skip to content

Commit

Permalink
fix: xml entity escape in documentation comment (#8596)
Browse files Browse the repository at this point in the history
  • Loading branch information
yufeih authored Apr 9, 2023
1 parent 378feab commit dcc61b8
Show file tree
Hide file tree
Showing 16 changed files with 488 additions and 55 deletions.
22 changes: 22 additions & 0 deletions samples/seed/dotnet/project/Project/Class1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -74,4 +74,26 @@ public void Issue4017() { }
/// ```
/// </example>
public void Issue2623() { }

/// <remarks>
/// > [!NOTE]
/// > This is a &lt;note&gt;. &amp; &quot; &apos;
///
/// [link](https://www.github.com "title")
///
/// ```csharp
/// for (var i = 0; i > 10; i++) // &amp; &quot; &apos;
/// var range = new Range&lt;int&gt; { Min = 0, Max = 10 };
/// ```
///
/// <code>
/// var range = new Range&lt;int&gt; { Min = 0, Max = 10 };
/// </code>
/// </remarks>
public void Issue2723() { }

/// <remarks>
/// <c>@"\\?\"</c> `@"\\?\"`
/// </remarks>
public void Issue4392() { }
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
<PackageReference Include="ICSharpCode.Decompiler" />
<PackageReference Include="IgnoresAccessChecksToGenerator" PrivateAssets="All" />
<PackageReference Include="System.Configuration.ConfigurationManager" />
<PackageReference Include="Markdig" />
<PackageReference Include="Microsoft.Build" ExcludeAssets="runtime" />
<PackageReference Include="Microsoft.Build.Locator" />
<PackageReference Include="Microsoft.CodeAnalysis" />
Expand Down
97 changes: 68 additions & 29 deletions src/Microsoft.DocAsCode.Dotnet/Parsers/XmlComment.cs
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
// Copyright (c) Microsoft. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using System.Globalization;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
using System.Xml;
using System.Xml.Linq;
using System.Xml.XPath;

using Markdig;
using Markdig.Helpers;
using Markdig.Renderers.Roundtrip;
using Markdig.Syntax;
using Markdig.Syntax.Inlines;

using Microsoft.DocAsCode.Common;
using Microsoft.DocAsCode.Plugins;
using Microsoft.DocAsCode.DataContracts.ManagedReference;
Expand Down Expand Up @@ -484,7 +489,10 @@ private string GetXmlValue(XPathNavigator node)
if (node is null)
return null;

return TrimEachLine(GetInnerXml(node));
if (_context.SkipMarkup)
return TrimEachLine(node.InnerXml);

return GetInnerXmlAsMarkdown(TrimEachLine(node.InnerXml));
}

private static string TrimEachLine(string text, string indent = "")
Expand Down Expand Up @@ -535,40 +543,71 @@ private static string TrimEachLine(string text, string indent = "")
return builder.ToString().TrimEnd();
}

/// <summary>
/// `>` is always encoded to `&gt;` in XML, when triple-slash-comments is considered as Markdown content, `>` is considered as blockquote
/// Decode `>` to enable the Markdown syntax considering `>` is not a Must-Encode in Text XElement
/// </summary>
/// <param name="node"></param>
/// <returns></returns>
private static string GetInnerXml(XPathNavigator node)
private static string GetInnerXmlAsMarkdown(string xml)
{
using var sw = new StringWriter(CultureInfo.InvariantCulture);
using (var tw = new XmlWriterWithGtDecoded(sw))
if (!xml.Contains('&'))
return xml;

xml = HandleBlockQuote(xml);
var markdown = Markdown.Parse(xml, trackTrivia: true);
DecodeMarkdownCode(markdown);
var sw = new StringWriter();
var rr = new RoundtripRenderer(sw);
rr.Write(markdown);
return sw.ToString();

static string HandleBlockQuote(string xml)
{
if (node.MoveToFirstChild())
{
do
{
tw.WriteNode(node, true);
} while (node.MoveToNext());
node.MoveToParent();
}
// > is encoded to &gt; in XML. When interpreted as markdown, > is as blockquote
// Decode standalone &gt; to > to enable the block quote markdown syntax
return Regex.Replace(xml, @"^(\s*)&gt;", "$1>", RegexOptions.Multiline);
}

return sw.ToString();
}

private sealed class XmlWriterWithGtDecoded : XmlTextWriter
{
public XmlWriterWithGtDecoded(TextWriter tw) : base(tw) { }
static void DecodeMarkdownCode(MarkdownObject node)
{
// Commonmark: Entity and numeric character references are treated as literal text in code spans and code blocks
switch (node)
{
case CodeInline codeInline:
codeInline.Content = XmlDecode(codeInline.Content);
break;

public XmlWriterWithGtDecoded(Stream w, Encoding encoding) : base(w, encoding) { }
case CodeBlock codeBlock:
var lines = new StringLineGroup(codeBlock.Lines.Count);
foreach (var line in codeBlock.Lines.Lines)
{
var newLine = line;
newLine.Slice = new StringSlice(XmlDecode(line.Slice.ToString()), line.Slice.NewLine);
lines.Add(newLine);
}
codeBlock.Lines = lines;
break;

case ContainerBlock containerBlock:
foreach (var child in containerBlock)
DecodeMarkdownCode(child);
break;

case ContainerInline containerInline:
foreach (var child in containerInline)
DecodeMarkdownCode(child);
break;

case LeafBlock leafBlock when leafBlock.Inline is not null:
foreach (var child in leafBlock.Inline)
DecodeMarkdownCode(child);
break;
}
}

public override void WriteString(string text)
static string XmlDecode(string xml)
{
var encoded = text.Replace("&", "&amp;").Replace("<", "&lt;").Replace("'", "&apos;").Replace("\"", "&quot;");
WriteRaw(encoded);
return xml
.Replace("&gt;", ">")
.Replace("&lt;", "<")
.Replace("&amp;", "&")
.Replace("&quot;", "\"")
.Replace("&apos;", "'");
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ namespace Microsoft.DocAsCode.Dotnet;

internal class XmlCommentParserContext
{
public bool SkipMarkup { get; init; }

public Action<string, string> AddReferenceDelegate { get; init; }

public Func<string, string> ResolveCode { get; init; }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ internal class SymbolVisitorAdapter : SymbolVisitor<MetadataItem>
private readonly YamlModelGenerator _generator;
private readonly Dictionary<string, ReferenceItem> _references = new();
private readonly IMethodSymbol[] _extensionMethods;
private readonly string _codeSourceBasePath;
private readonly ExtractMetadataConfig _config;
private readonly SymbolFilter _filter;

public SymbolVisitorAdapter(Compilation compilation, YamlModelGenerator generator, ExtractMetadataConfig options, SymbolFilter filter, IMethodSymbol[] extensionMethods)
public SymbolVisitorAdapter(Compilation compilation, YamlModelGenerator generator, ExtractMetadataConfig config, SymbolFilter filter, IMethodSymbol[] extensionMethods)
{
_compilation = compilation;
_generator = generator;
_filter = filter;
_config = config;
_extensionMethods = extensionMethods?.Where(_filter.IncludeApi).ToArray() ?? Array.Empty<IMethodSymbol>();
_codeSourceBasePath = options.CodeSourceBasePath;
}

public override MetadataItem DefaultVisit(ISymbol symbol)
Expand Down Expand Up @@ -728,6 +728,7 @@ private XmlCommentParserContext GetXmlCommentParserContext(MetadataItem item)
{
return new XmlCommentParserContext
{
SkipMarkup = _config.ShouldSkipMarkup,
AddReferenceDelegate = AddReferenceDelegate,
Source = item.Source,
ResolveCode = ResolveCode,
Expand All @@ -747,7 +748,7 @@ void AddReferenceDelegate(string id, string commentId)

string ResolveCode(string source)
{
var basePath = _codeSourceBasePath ?? (
var basePath = _config.CodeSourceBasePath ?? (
item.Source?.Path is {} sourcePath
? Path.GetDirectoryName(Path.GetFullPath(Path.Combine(EnvironmentContext.BaseDirectory, sourcePath)))
: null);
Expand Down
26 changes: 18 additions & 8 deletions test/Microsoft.DocAsCode.Dotnet.Tests/XmlCommentUnitTest.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,7 @@
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

using System.Text.RegularExpressions;
using System.Xml.Linq;

using Microsoft.DocAsCode.DataContracts.Common;

using Xunit;

namespace Microsoft.DocAsCode.Dotnet.Tests;
Expand Down Expand Up @@ -142,9 +139,9 @@ public void ExternalCodeBlockXaml()
Assert.Equal(
"""
This is an example using source reference in a xaml file.
<pre><code class="lang-xaml">&lt;Grid>
&lt;TextBlock Text=&quot;Hello World&quot; />
&lt;/Grid></code></pre>
<pre><code class="lang-xaml">&lt;Grid&gt;
&lt;TextBlock Text="Hello World" /&gt;
&lt;/Grid&gt;</code></pre>
""",
commentModel.Examples.Single(),
ignoreLineEndingDifferences: true);
Expand All @@ -168,6 +165,13 @@ public int Main(string[] args)
{
Console.HelloWorld();
}

```js
function main()
{
return 0
}
```
</remarks>
""");

Expand All @@ -185,6 +189,13 @@ public int Main(string[] args)
{
Console.HelloWorld();
}

```js
function main()
{
return 0
}
```
""", comment.Remarks, ignoreLineEndingDifferences: true);
}

Expand Down Expand Up @@ -304,7 +315,7 @@ Classes in assemblies are by definition complete.
<pre><code class="lang-csharp">public class XmlElement
: XmlLinkedNode</code></pre>
<ol><li>
word inside list->listItem->list->listItem->para.>
word inside list-&gt;listItem-&gt;list-&gt;listItem-&gt;para.&gt;
the second line.
</li><li>item2 in numbered list</li></ol>
</li><li>item2 in bullet list</li><li>
Expand Down Expand Up @@ -362,7 +373,6 @@ Check empty code.
public void SeeAltText()
{
string input = """

<member name='T:TestClass1.Partial1'>
<summary>
Class summary <see cref='T:System.AccessViolationException'>Exception type</see>
Expand Down
Loading

0 comments on commit dcc61b8

Please sign in to comment.