Skip to content

Commit

Permalink
Unescape URLs from EPUB 2 NCX and EPUB 3 nav docs (#98)
Browse files Browse the repository at this point in the history
  • Loading branch information
vers-one authored May 27, 2023
1 parent 88d0cb0 commit 1d3ae21
Show file tree
Hide file tree
Showing 6 changed files with 139 additions and 4 deletions.
52 changes: 52 additions & 0 deletions Source/VersOne.Epub.Test/Unit/Readers/Epub2NcxReaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,22 @@ public class Epub2NcxReaderTests
</ncx>
""";

private const string NCX_FILE_WITH_ESCAPED_CONTENT_SRC_ATTRIBUTE = """
<?xml version='1.0' encoding='utf-8'?>
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/">
<head />
<docTitle />
<navMap>
<navPoint id="navpoint-1">
<navLabel>
<text>Chapter 1</text>
</navLabel>
<content src="chapter%31.html" />
</navPoint>
</navMap>
</ncx>
""";

private const string NCX_FILE_WITHOUT_CONTENT_SRC_ATTRIBUTE = """
<?xml version='1.0' encoding='utf-8'?>
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/">
Expand Down Expand Up @@ -877,6 +893,42 @@ public async void ReadEpub2NcxAsyncWithoutNavLabelTextTest()
await TestFailingReadOperation(NCX_FILE_WITHOUT_NAVLABEL_TEXT_ELEMENT);
}

[Fact(DisplayName = "Reading an NCX file with a URI-escaped 'src' attribute in a 'content' XML element should succeed")]
public async void ReadEpub2NcxAsyncWithEscapedContentSrcTest()
{
Epub2Ncx expectedEpub2Ncx = new
(
filePath: NCX_FILE_PATH,
head: new Epub2NcxHead(),
docTitle: null,
docAuthors: null,
navMap: new Epub2NcxNavigationMap
(
items: new List<Epub2NcxNavigationPoint>()
{
new Epub2NcxNavigationPoint
(
id: "navpoint-1",
navigationLabels: new List<Epub2NcxNavigationLabel>()
{
new Epub2NcxNavigationLabel
(
text: "Chapter 1"
)
},
content: new Epub2NcxContent
(
source: "chapter1.html"
)
)
}
),
pageList: null,
navLists: null
);
await TestSuccessfulReadOperation(NCX_FILE_WITH_ESCAPED_CONTENT_SRC_ATTRIBUTE, expectedEpub2Ncx);
}

[Fact(DisplayName = "ReadEpub2NcxAsync should throw Epub2NcxException if a 'content' XML element has no 'src' attribute")]
public async void ReadEpub2NcxAsyncWithoutContentSrcTest()
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,20 @@ public class Epub3NavDocumentReaderTests
</html>
""";

private const string NAV_FILE_WITH_ESCAPED_HREF_IN_A_ELEMENT = """
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
<body>
<nav epub:type="toc">
<ol>
<li>
<a href="chapter%31.html">Chapter 1</a>
</li>
</ol>
</nav>
</body>
</html>
""";

private static EpubPackage MinimalEpubPackageWithNav =>
new
(
Expand Down Expand Up @@ -402,6 +416,37 @@ public async void ReadEpub3NavDocumentAsyncWithEmptyLiElement()
await TestFailingReadOperation(NAV_FILE_WITH_EMPTY_LI_ELEMENT);
}

[Fact(DisplayName = "Reading a NAV file with a URI-escaped 'href' attribute in an 'a' XML element should succeed")]
public async void ReadEpub3NavDocumentAsyncWithEscapedAHrefTest()
{
Epub3NavDocument expectedEpub3NavDocument = new
(
filePath: NAV_FILE_PATH,
navs: new List<Epub3Nav>()
{
new Epub3Nav
(
type: Epub3StructuralSemanticsProperty.TOC,
ol: new Epub3NavOl
(
lis: new List<Epub3NavLi>()
{
new Epub3NavLi
(
anchor: new Epub3NavAnchor
(
href: "chapter1.html",
text: "Chapter 1"
)
)
}
)
)
}
);
await TestSuccessfulReadOperation(NAV_FILE_WITH_ESCAPED_HREF_IN_A_ELEMENT, expectedEpub3NavDocument);
}

private static async Task TestSuccessfulReadOperation(string navFileContent, Epub3NavDocument expectedEpub3NavDocument, EpubReaderOptions? epubReaderOptions = null)
{
TestZipFile testZipFile = CreateTestZipFileWithNavFile(navFileContent);
Expand Down
38 changes: 38 additions & 0 deletions Source/VersOne.Epub.Test/Unit/Readers/PackageReaderTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,17 @@ public class PackageReaderTests
</package>
""";

private const string OPF_FILE_WITH_ESCAPED_HREF_IN_MANIFEST_ITEM = $"""
<?xml version='1.0' encoding='UTF-8'?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
<metadata />
<manifest>
<item id="item-1" href="chapter%31.html" media-type="application/xhtml+xml" />
</manifest>
<spine />
</package>
""";

private const string OPF_FILE_WITHOUT_HREF_IN_MANIFEST_ITEM = $"""
<?xml version='1.0' encoding='UTF-8'?>
<package xmlns="http://www.idpf.org/2007/opf" version="3.0">
Expand Down Expand Up @@ -743,6 +754,33 @@ public async void ReadPackageWithoutManifestItemIdWithSkippingInvalidManifestIte
await TestSuccessfulReadOperationWithSkippingInvalidManifestItems(OPF_FILE_WITHOUT_ID_IN_MANIFEST_ITEM, MinimalEpub3Package);
}

[Fact(DisplayName = "Read an OPF package with a URI-escaped 'href' attribute in a manifest item XML node should succeed")]
public async void ReadPackageWithEscapedManifestItemHrefTest()
{
EpubPackage expectedPackage = new
(
uniqueIdentifier: null,
epubVersion: EpubVersion.EPUB_3,
metadata: new EpubMetadata(),
manifest: new EpubManifest
(
id: null,
items: new List<EpubManifestItem>()
{
new EpubManifestItem
(
id: "item-1",
href: "chapter1.html",
mediaType: "application/xhtml+xml"
)
}
),
spine: new EpubSpine(),
guide: null
);
await TestSuccessfulReadOperationWithSkippingInvalidManifestItems(OPF_FILE_WITH_ESCAPED_HREF_IN_MANIFEST_ITEM, expectedPackage);
}

[Fact(DisplayName = "Trying to read OPF package without 'href' attribute in a manifest item XML node should fail with EpubPackageException")]
public async void ReadPackageWithoutManifestItemHrefTest()
{
Expand Down
2 changes: 1 addition & 1 deletion Source/VersOne.Epub/Readers/Epub2NcxReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ private static Epub2NcxContent ReadNavigationContent(XElement navigationContentN
id = attributeValue;
break;
case "src":
source = attributeValue;
source = Uri.UnescapeDataString(attributeValue);
break;
}
}
Expand Down
2 changes: 1 addition & 1 deletion Source/VersOne.Epub/Readers/Epub3NavDocumentReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ private static Epub3NavAnchor ReadEpub3NavAnchor(XElement epub3NavAnchorNode)
switch (navAnchorNodeAttribute.GetLowerCaseLocalName())
{
case "href":
href = attributeValue;
href = Uri.UnescapeDataString(attributeValue);
break;
case "title":
title = attributeValue;
Expand Down
4 changes: 2 additions & 2 deletions Source/VersOne.Epub/Readers/NavigationReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ private static List<EpubNavigationItemRef> GetNavigationItems(EpubSchema epubSch
Epub2NcxNavigationLabel? firstNavigationLabel = navigationPoint.NavigationLabels.FirstOrDefault() ??
throw new Epub2NcxException($"Incorrect EPUB 2 NCX: navigation point \"{navigationPoint.Id}\" should contain at least one navigation label.");
string title = firstNavigationLabel.Text;
string source = Uri.UnescapeDataString(navigationPoint.Content.Source);
string source = navigationPoint.Content.Source;
if (!ContentPathUtils.IsLocalPath(source))
{
throw new Epub2NcxException($"Incorrect EPUB 2 NCX: content source \"{source}\" cannot be a remote resource.");
Expand Down Expand Up @@ -100,7 +100,7 @@ private static List<EpubNavigationItemRef> GetNavigationItems(EpubSchema epubSch
List<EpubNavigationItemRef> nestedItems = GetNavigationItems(epubSchema, epubContentRef, epub3NavLi.ChildOl, epub3NavigationBaseDirectoryPath);
if (navAnchor.Href != null)
{
string href = Uri.UnescapeDataString(navAnchor.Href);
string href = navAnchor.Href;
if (!ContentPathUtils.IsLocalPath(href))
{
throw new Epub3NavException($"Incorrect EPUB 3 navigation document: anchor href \"{href}\" cannot be a remote resource.");
Expand Down

0 comments on commit 1d3ae21

Please sign in to comment.