diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..bbc6c1d --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,20 @@ +{ + "version": "0.2.0", + "configurations": [ + { + "name": "C#: Noted", + "type": "dotnet", + "request": "launch", + "projectPath": "${workspaceFolder}/src/Noted/Noted.csproj", + "launchConfigurationId": "TargetFramework=;Noted" + }, + { + "name": "C#: Launch (console)", + "type": "coreclr", + "request": "launch", + "preLaunchTask": "dotnet: build", + "program": "${workspaceFolder}/src/Noted/bin/Debug/net8.0/Noted.dll", + "args": ["test/assets/koreader", "/tmp"] + }, + ] +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..b1c79f6 --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,12 @@ +{ + "version": "2.0.0", + "tasks": [ + { + "type": "dotnet", + "task": "build", + "group": "build", + "problemMatcher": [], + "label": "dotnet: build" + } + ] +} \ No newline at end of file diff --git a/renovate.json b/renovate.json deleted file mode 100644 index f45d8f1..0000000 --- a/renovate.json +++ /dev/null @@ -1,5 +0,0 @@ -{ - "extends": [ - "config:base" - ] -} diff --git a/src/Noted/Core/Extensions/IDocumentReader.cs b/src/Noted/Core/Extensions/IDocumentReader.cs index a8d197d..67f1afe 100644 --- a/src/Noted/Core/Extensions/IDocumentReader.cs +++ b/src/Noted/Core/Extensions/IDocumentReader.cs @@ -7,7 +7,6 @@ namespace Noted.Core.Extensions using System.Collections.Generic; using System.IO; using System.Threading.Tasks; - using System.Xml.Linq; using Noted.Core.Models; /// diff --git a/src/Noted/Core/Models/DocumentSection.cs b/src/Noted/Core/Models/DocumentSection.cs index ae77aad..97e719d 100644 --- a/src/Noted/Core/Models/DocumentSection.cs +++ b/src/Noted/Core/Models/DocumentSection.cs @@ -10,5 +10,6 @@ namespace Noted.Core.Models public record DocumentSection( string Title, int Level, - int Location); + int Location, + DocumentSection? Parent); } \ No newline at end of file diff --git a/src/Noted/Core/Models/EpubXPathLocation.cs b/src/Noted/Core/Models/EpubXPathLocation.cs new file mode 100644 index 0000000..920b405 --- /dev/null +++ b/src/Noted/Core/Models/EpubXPathLocation.cs @@ -0,0 +1,83 @@ +// Copyright (c) Arun Mahapatra. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +namespace Noted.Core.Models; + +using System; +using System.Text.RegularExpressions; + +public struct EpubXPathLocation(string pos0, string pos1) : IComparable +{ + public EpubLocation Start { get; init; } = EpubLocation.FromString(pos0); + + public EpubLocation End { get; init; } = EpubLocation.FromString(pos1); + + public static EpubXPathLocation FromString(string location) + { + var range = new Uri(location).PathAndQuery.Split('-'); + return new EpubXPathLocation(range[0], range[1]); + } + + public override string ToString() => $"epubxpath://{this.Start}-{this.End}"; + + public int CompareTo(object? obj) + { + if (obj is not EpubXPathLocation other) + { + throw new ArgumentException(null, nameof(obj)); + } + + var startCompare = this.Start.CompareTo(other.Start); + return startCompare == 0 ? this.End.CompareTo(other.End) : startCompare; + } +} + +public partial record EpubLocation( + int DocumentFragmentId, + string XPath, + int CharacterLocation) : IComparable +{ + public int CompareTo(object? obj) + { + if (obj is not EpubLocation other) + { + throw new ArgumentException(null, nameof(obj)); + } + + var docFragmentCompare = this.DocumentFragmentId.CompareTo(other.DocumentFragmentId); + if (docFragmentCompare != 0) + { + return docFragmentCompare; + } + + // Comparing xpaths is impossible :( We'll take a chance to compare lexically, assuming the structure of book pages to be consistent. + // TODO: It is better to probably keep the original order of elements. + var xpathCompare = this.XPath.CompareTo(other.XPath); + if (xpathCompare != 0) + { + return xpathCompare; + } + + return this.CharacterLocation.CompareTo(other.CharacterLocation); + } + + public override string ToString() => $"/body/DocFragment[{this.DocumentFragmentId}]{this.XPath}.{this.CharacterLocation}"; + + public static EpubLocation FromString(string xpath) + { + var match = EpubXPathRegex().Match(xpath); + if (!match.Success || + !int.TryParse(match.Groups["docFragmentId"].Value, out var docFragmentId) || + string.IsNullOrEmpty(match.Groups["xpath"].Value) || + !int.TryParse(match.Groups["charIndex"].Value, out var charIndex)) + { + throw new ArgumentException( + $"Invalid xpath: {xpath}", nameof(xpath)); + } + + return new(docFragmentId, match.Groups["xpath"].Value, charIndex); + } + + [GeneratedRegex(@"/body/DocFragment\[(?\d+)\](?.*)\.(?.*)$", RegexOptions.Compiled)] + private static partial Regex EpubXPathRegex(); +} diff --git a/src/Noted/Extensions/Libraries/KOReader/KOReaderAnnotationProvider.cs b/src/Noted/Extensions/Libraries/KOReader/KOReaderAnnotationProvider.cs index 2b15d79..12667aa 100644 --- a/src/Noted/Extensions/Libraries/KOReader/KOReaderAnnotationProvider.cs +++ b/src/Noted/Extensions/Libraries/KOReader/KOReaderAnnotationProvider.cs @@ -3,18 +3,95 @@ namespace Noted.Extensions.Libraries.KOReader; +using System; using System.Collections.Generic; +using System.IO; using System.Linq; +using NLua; using Noted.Core.Extensions; using Noted.Core.Models; +using Noted.Core.Platform.IO; -public class KOReaderAnnotationProvider : IAnnotationProvider +public class KOReaderAnnotationProvider(IFileSystem fileSystem, ILogger logger) : IAnnotationProvider { - public bool IsAvailable(string sourcePath) => false; + private readonly ILogger logger = logger; + + private readonly IFileSystem fileSystem = fileSystem; + + public bool IsAvailable(string sourcePath) + { + return this.fileSystem.GetFiles(sourcePath, ".lua").Any(); + } public IEnumerable GetAnnotations(string sourcePath) { - // TODO see https://github.com/noembryo/KoHighlights/blob/0fedce43f88cdf6c8726b3cab6cb5cb1a74e815b/main.py - return Enumerable.Empty(); + if (!this.IsAvailable(sourcePath)) + { + yield break; + } + + var annotationFiles = this.fileSystem.GetFiles(sourcePath, ".lua"); + foreach (var annotation in annotationFiles) + { + using var lua = new Lua(); + var annotationTable = GetLuaTable(lua, lua.DoFile(annotation)[0]); + var bookmarksTable = GetLuaTable(lua, annotationTable["bookmarks"]); + var highlightTable = GetLuaTable(lua, annotationTable["highlight"]); + var highlights = highlightTable.Values + .SelectMany(h => GetLuaTable(lua, h).Values) + .Select(h => GetLuaTable(lua, h)["pos0"].ToString()) + .ToHashSet(); + var documentTable = GetLuaTable(lua, annotationTable["doc_props"]); + var document = new DocumentReference + { + Title = documentTable["title"].ToString() ?? Path.GetFileName(annotationTable["doc_path"].ToString()!), + Author = documentTable["authors"].ToString() ?? string.Empty + }; + + // Highlights are keyed to the page numbers on the device used for reading. + // Sort them by page numbers to preserve the reading order of annotations. + foreach (var bookmark in bookmarksTable.Values) + { + var bookmarkDict = GetLuaTable(lua, bookmark); + if (!bookmarkDict.TryGetValue("highlighted", out var highlighted) || highlighted is bool == false) + { + // Skip non-highlighted bookmarks + continue; + } + + // ["notes"] field is available for both notes and highlights. + // ["text"] field is available only for custom text attached to the note. + var notes = bookmarkDict["notes"].ToString()!; + var highlightDate = DateTime.Parse(bookmarkDict["datetime"].ToString()!); + var pos0 = bookmarkDict["pos0"].ToString(); + var pos1 = bookmarkDict["pos1"].ToString(); + bookmarkDict.TryGetValue("chapter", out var chapterTitle); + var context = new AnnotationContext() + { + SerializedLocation = new EpubXPathLocation(pos0!, pos1!).ToString(), + DocumentSection = new DocumentSection(chapterTitle?.ToString() ?? string.Empty, 0, 0, null) + }; + yield return new Annotation( + notes, + document, + AnnotationType.Highlight, + context, + highlightDate); + + // Notes are always attached to a highlight. We emit an extra annotation in this case. + bookmarkDict.TryGetValue("text", out var text); + if (highlights.Contains(pos0) && text != null && !text.ToString()!.StartsWith("Page ")) + { + yield return new Annotation( + text.ToString()!, + document, + AnnotationType.Note, + context, + highlightDate); + } + } + } } + + private static Dictionary GetLuaTable(Lua lua, object table) => table is LuaTable luaTable ? lua.GetTableDict(luaTable) : []; } \ No newline at end of file diff --git a/src/Noted/Extensions/Libraries/Kindle/ClippingExtensions.cs b/src/Noted/Extensions/Libraries/Kindle/ClippingExtensions.cs index fc37f2b..7653410 100644 --- a/src/Noted/Extensions/Libraries/Kindle/ClippingExtensions.cs +++ b/src/Noted/Extensions/Libraries/Kindle/ClippingExtensions.cs @@ -3,7 +3,6 @@ namespace Noted.Extensions.Libraries.Kindle { - using Noted.Core; using Noted.Core.Models; public static class ClippingExtensions diff --git a/src/Noted/Extensions/Readers/Common/HtmlSectionParser.cs b/src/Noted/Extensions/Readers/Common/HtmlSectionParser.cs index d096c89..cc6a7ba 100644 --- a/src/Noted/Extensions/Readers/Common/HtmlSectionParser.cs +++ b/src/Noted/Extensions/Readers/Common/HtmlSectionParser.cs @@ -5,6 +5,7 @@ namespace Noted.Extensions.Readers.Common { using System.Collections.Generic; using System.IO; + using System.Linq; using AngleSharp; using AngleSharp.Dom; using AngleSharp.Html.Dom; @@ -27,18 +28,11 @@ public static async IAsyncEnumerable Parse(Stream stream) // Note that both parent and child levels share a common root. Our // level calculation leverages this. var depth = 0; - foreach (var node in document.All) + var prevLevel = 0; + DocumentSection prevSection = null!; + foreach (var node in document.QuerySelectorAll("a")) { - // AngleSharp always inserts the provided fragment within a - // body element. We reset the depth accordingly. - // Alternatively, we could do node.GetAncestors().Count but that - // revisits the parent nodes multiple times. - depth = node.Parent == document.Body ? 0 : depth + 1; - if (node is not IHtmlAnchorElement) - { - continue; - } - + depth = node.GetAncestors().Count(); var fileOffset = node.GetAttribute("filepos"); if (!levelSet.TryGetValue(depth, out var level)) { @@ -46,10 +40,26 @@ public static async IAsyncEnumerable Parse(Stream stream) levelSet[depth] = level; } - yield return new DocumentSection( + var parent = level == 1 ? null : prevSection; // assume this node is a child + var count = prevLevel - level; + while (prevLevel >= level) + { + // if this node is a sibling instead + parent = prevSection?.Parent; + prevSection = parent ?? null!; + prevLevel--; + } + + var section = new DocumentSection( node.Text(), level, - string.IsNullOrEmpty(fileOffset) ? 0 : int.Parse(fileOffset)); + string.IsNullOrEmpty(fileOffset) ? 0 : int.Parse(fileOffset), + parent); + + yield return section; + + prevLevel = level; + prevSection = section; } } } diff --git a/src/Noted/Extensions/Readers/EpubReader.cs b/src/Noted/Extensions/Readers/EpubReader.cs index 199ce97..4f96068 100644 --- a/src/Noted/Extensions/Readers/EpubReader.cs +++ b/src/Noted/Extensions/Readers/EpubReader.cs @@ -8,12 +8,14 @@ namespace Noted.Extensions.Readers using System.IO; using System.Linq; using System.Threading.Tasks; + using AngleSharp.Dom; using AngleSharp.Html.Parser; using AngleSharp.XPath; using Noted.Core.Extensions; using Noted.Core.Models; using Noted.Core.Platform.IO; using VersOne.Epub; + using Document = Noted.Core.Models.Document; public class EpubReader(ILogger logger) : IDocumentReader { @@ -32,42 +34,116 @@ public async Task Read( Title = epub.Title, Author = epub.Author }; + var annotations = new List(); var externalAnnotations = fetchExternalAnnotations(docRef) .Select(a => ( - Location: LineLocation.FromString(a.Context.SerializedLocation), + Location: EpubXPathLocation.FromString(a.Context.SerializedLocation), Annotation: a)) - .OrderBy(p => p.Location) + .OrderBy(p => p.Location.Start.DocumentFragmentId) .ToList(); - + var sections = ParseNavigation(epub); var content = new Dictionary(); - var line = 1; - foreach (EpubLocalTextContentFile textContentFile in epub.ReadingOrder) + var parser = new HtmlParser(new HtmlParserOptions + { + IsKeepingSourceReferences = true + }); + foreach (var annotationTuple in externalAnnotations) + { + var docIndex = annotationTuple.Location.Start.DocumentFragmentId; + var document = await parser.ParseDocumentAsync(epub.ReadingOrder[docIndex - 1].Content); + + var annotation = annotationTuple.Annotation; + var allNodesInDocument = document.Body.SelectNodes("//*"); + var startNode = document.Body.SelectSingleNode($"/{annotationTuple.Location.Start.XPath}"); + var endNode = document.Body.SelectSingleNode($"/{annotationTuple.Location.End.XPath}"); + + var context = GetContext(allNodesInDocument, startNode, endNode); + annotation.Context.DocumentSection = sections[annotation.Context.DocumentSection!.Title]; + annotation.Context.Location = ((docIndex - 1) * 1000) + context.Item1; + annotation.Context.Content = context.Item2; + annotations.Add(annotation); + } + + var sortedSections = sections.Values.OrderBy(s => s.Location).ToList(); + return new Document + { + Title = docRef.Title, + Author = docRef.Author, + Annotations = annotations.OrderBy(a => a.Context.Location), + Sections = sortedSections + }; + } + + private static Tuple GetContext(List allNodes, INode start, INode end) + { + var startSelector = start.ParentElement!.GetSelector(); + var endSelector = end.ParentElement!.GetSelector(); + var nodesBetween = new List(); + var startLocation = 0; + + // Create context by selecting all nodes between the start and end node selectors. + // Assumes that allNodes is a depth-first traversal of the entire document. + var addNode = false; + var index = 0; + foreach (var node in allNodes) { - var parser = new HtmlParser(new HtmlParserOptions + var nodeElementSelector = (node as IElement)?.GetSelector() ?? string.Empty; + if (nodeElementSelector == startSelector) + { + startLocation = index; + addNode = true; + } + + if (addNode) { - IsKeepingSourceReferences = true - }); - var document = await parser.ParseDocumentAsync(textContentFile.Content); - foreach (var node in document.Body.SelectNodes("//text()")) + nodesBetween.Add(node.TextContent); + } + + index++; + + if (nodeElementSelector == endSelector) { - content.Add(line++, node.TextContent.Trim()); + break; } } - foreach (var pair in externalAnnotations) + return new(startLocation, string.Join(Environment.NewLine, nodesBetween)); + } + + private static void NavigationDfs( + EpubNavigationItem root, + Dictionary result, + DocumentSection parent, + int level, + ref int index) + { + var rootSection = new DocumentSection(root.Title, level, ++index * 1000, parent); + result.Add(root.Title, rootSection); + foreach (var nestedItem in root.NestedItems) { - Console.WriteLine(pair.Annotation.Content); - Console.WriteLine(":::"); - Console.WriteLine(content[pair.Location.Start]); - Console.WriteLine("------------------"); + NavigationDfs(nestedItem, result, rootSection, level + 1, ref index); } + } - return new Document + private static Dictionary ParseNavigation(EpubBook epub) + { + var result = new Dictionary(); + if (epub.Navigation == null) { - Title = docRef.Title, - Author = docRef.Author, - }; + return result; + } + + var index = 0; + foreach (var navItem in epub.Navigation) + { + // Level indicates the current branch level of a section. + // Index is monotonically increasing sequence number used to ensure 1, 1.1.1, 1.1.2, 1.2 ... + // follow the same order. It is stored as Location of the section. + NavigationDfs(navItem, result, null!, 1, ref index); + } + + return result; } } } \ No newline at end of file diff --git a/src/Noted/Extensions/Readers/KfxReader.cs b/src/Noted/Extensions/Readers/KfxReader.cs index d40e55c..13b9679 100644 --- a/src/Noted/Extensions/Readers/KfxReader.cs +++ b/src/Noted/Extensions/Readers/KfxReader.cs @@ -9,7 +9,6 @@ namespace Noted.Extensions.Readers using System.Linq; using System.Threading.Tasks; using Ephemerality.Unpack.KFX; - using Noted.Core; using Noted.Core.Extensions; using Noted.Core.Models; using Noted.Core.Platform.IO; diff --git a/src/Noted/Extensions/Readers/MobiReader.cs b/src/Noted/Extensions/Readers/MobiReader.cs index a9ad627..83ea11d 100644 --- a/src/Noted/Extensions/Readers/MobiReader.cs +++ b/src/Noted/Extensions/Readers/MobiReader.cs @@ -52,6 +52,7 @@ public async Task Read( var tocStream = Mobi7Parser.GetNavigationStream(mobi.GetRawMlStream()).Result; var sections = await HtmlSectionParser .Parse(tocStream) + .OrderBy(s => s.Location) .ToListAsync(); var rawMlStream = mobi.GetRawMlStream(); diff --git a/src/Noted/Extensions/Writers/MarkdownWriter.cs b/src/Noted/Extensions/Writers/MarkdownWriter.cs index d525adf..41a22a2 100644 --- a/src/Noted/Extensions/Writers/MarkdownWriter.cs +++ b/src/Noted/Extensions/Writers/MarkdownWriter.cs @@ -3,7 +3,9 @@ namespace Noted.Extensions.Writers { + using System.Collections.Generic; using System.IO; + using System.Speech.Recognition; using System.Text; using System.Threading.Tasks; using Noted.Core; @@ -28,28 +30,15 @@ public async Task Write(Configuration configuration, Document document, Stream o await writer.WriteLineAsync("---"); await writer.WriteLineAsync(); + var sectionHeaderPrinted = new HashSet(); var currentPage = 0; using var sectionIterator = document.Sections.GetEnumerator(); foreach (var annotation in document.Annotations) { // Print section header - if (configuration.ExtractDocumentSections) + if (annotation.Context.DocumentSection != null && configuration.ExtractDocumentSections) { - while (sectionIterator.MoveNext() && - sectionIterator.Current != null && - sectionIterator.Current.Location <= - annotation.Context.Location) - { - var (title, level, location) = sectionIterator.Current; - await writer.WriteLineAsync( - $"{new string('#', level)} {title}"); - if (configuration.Verbose) - { - await writer.WriteLineAsync($""); - } - - await writer.WriteLineAsync(); - } + await PrintSectionHeader(configuration, writer, annotation.Context.DocumentSection, sectionHeaderPrinted); } // Print page number @@ -83,5 +72,38 @@ await writer.WriteLineAsync( await writer.WriteLineAsync(); } } + + private static async Task PrintSectionHeader( + Configuration configuration, + StreamWriter writer, + DocumentSection section, + HashSet visited) + { + var headers = new List(); + var sectionPrint = section; + while (sectionPrint != null) + { + if (!visited.Contains(sectionPrint)) + { + headers.Insert(0, sectionPrint); + visited.Add(sectionPrint); + } + + sectionPrint = sectionPrint.Parent; + } + + foreach (var header in headers) + { + var (title, level, location, _) = header; + await writer.WriteLineAsync( + $"{new string('#', level)} {title}"); + if (configuration.Verbose) + { + await writer.WriteLineAsync($""); + } + + await writer.WriteLineAsync(); + } + } } } \ No newline at end of file diff --git a/src/Noted/Noted.csproj b/src/Noted/Noted.csproj index 4019ef2..a6b86f6 100644 --- a/src/Noted/Noted.csproj +++ b/src/Noted/Noted.csproj @@ -15,6 +15,7 @@ + diff --git a/src/Noted/Program.cs b/src/Noted/Program.cs index b6c92e6..7347062 100644 --- a/src/Noted/Program.cs +++ b/src/Noted/Program.cs @@ -14,7 +14,7 @@ namespace Noted using Noted.Extensions.Writers; using Noted.Infra; - internal static class Program + public static class Program { public static async Task Main(string[] args) { @@ -22,12 +22,12 @@ public static async Task Main(string[] args) .WithAnnotationProviders(config => new List { new ClippingAnnotationProvider(config.FileSystem, config.Logger), - new KOReaderAnnotationProvider() + new KOReaderAnnotationProvider(config.FileSystem, config.Logger) }) .WithReaders(config => new List { - // new EpubReader(config.Logger), // new KfxReader(config.Logger), + new EpubReader(config.Logger), new PdfReader(config.Logger), new MobiReader(config.Logger) }) diff --git a/test/Noted.Tests/Acceptance/AcceptanceTests.cs b/test/Noted.Tests/Acceptance/AcceptanceTests.cs new file mode 100644 index 0000000..ad6345d --- /dev/null +++ b/test/Noted.Tests/Acceptance/AcceptanceTests.cs @@ -0,0 +1,46 @@ +// Copyright (c) Arun Mahapatra. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +namespace Noted.Tests.Acceptance; + +using System.IO; +using System.Runtime.CompilerServices; +using System.Threading.Tasks; +using Microsoft.VisualStudio.TestTools.UnitTesting; + +[TestClass] +public class AcceptanceTests +{ + [TestMethod] + public async Task KindleLibraryShouldExtractAnnotationsWithDocumentSections() + { + var outputPath = Path.Join(AssetFactory.TestDataDir, "kindle"); + AssetFactory.EnsureDirectory(outputPath); + + var result = await Program.Main([AssetFactory.GetKindleLibrary(), outputPath]); + + Assert.AreEqual(0, result); + } + + [TestMethod] + public async Task KOReaderLibraryShouldExtractAnnotationsWithDocumentSections() + { + var outputPath = Path.Join(AssetFactory.TestDataDir, "koreader"); + AssetFactory.EnsureDirectory(outputPath); + + var result = await Program.Main([AssetFactory.GetKOReaderLibrary(), outputPath]); + + Assert.AreEqual(0, result); + } + + [TestMethod] + public async Task PdfLibraryShouldExtractAnnotations() + { + var outputPath = Path.Join(AssetFactory.TestDataDir, "pdf"); + AssetFactory.EnsureDirectory(outputPath); + + var result = await Program.Main([AssetFactory.GetPdfLibrary(), outputPath]); + + Assert.AreEqual(0, result); + } +} \ No newline at end of file diff --git a/test/Noted.Tests/AssetFactory.cs b/test/Noted.Tests/AssetFactory.cs index d3b988e..03ab159 100644 --- a/test/Noted.Tests/AssetFactory.cs +++ b/test/Noted.Tests/AssetFactory.cs @@ -7,9 +7,32 @@ namespace Noted.Tests public class AssetFactory { - public static Stream GetAsset(string fileName) + public static readonly string TestDataDir = Path.Join(Path.GetTempPath(), "notedtests"); + + static AssetFactory() + { + EnsureDirectory(TestDataDir); + } + + public static Stream GetAsset(params string[] fileNameParts) { - return File.OpenRead(fileName); + return File.OpenRead(Path.Join(fileNameParts)); + } + + public static string GetKindleLibrary() => Path.Join(".", "kindle"); + + public static string GetKOReaderLibrary() => Path.Join(".", "koreader"); + + public static string GetPdfLibrary() => Path.Join(".", "pdf"); + + public static void EnsureDirectory(string path) + { + if (Directory.Exists(path)) + { + Directory.Delete(path, true); + } + + Directory.CreateDirectory(path); } } } \ No newline at end of file diff --git a/test/Noted.Tests/Core/Models/EpubXPathLocationTests.cs b/test/Noted.Tests/Core/Models/EpubXPathLocationTests.cs new file mode 100644 index 0000000..27e8b18 --- /dev/null +++ b/test/Noted.Tests/Core/Models/EpubXPathLocationTests.cs @@ -0,0 +1,25 @@ +// Copyright (c) Arun Mahapatra. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +namespace Noted.Tests.Core.Models; + +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Noted.Core.Models; + +[TestClass] +public class EpubXPathLocationTests +{ + [TestMethod] + public void EpubXPathLocationShouldParseXPathLocation() + { + var pos0 = "/body/DocFragment[9]/body/article/p[3]/text().0"; + var pos1 = "/body/DocFragment[9]/body/article/p[3]/text().10"; + + var loc = new EpubXPathLocation(pos0, pos1); + + Assert.IsNotNull(loc); + Assert.AreEqual(9, loc.Start.DocumentFragmentId); + Assert.AreEqual("/body/article/p[3]/text()", loc.Start.XPath); + Assert.AreEqual(0, loc.Start.CharacterLocation); + } +} \ No newline at end of file diff --git a/test/Noted.Tests/Extensions/Libraries/KOReaderAnnotationProviderTests.cs b/test/Noted.Tests/Extensions/Libraries/KOReaderAnnotationProviderTests.cs index 44de950..ac932e0 100644 --- a/test/Noted.Tests/Extensions/Libraries/KOReaderAnnotationProviderTests.cs +++ b/test/Noted.Tests/Extensions/Libraries/KOReaderAnnotationProviderTests.cs @@ -6,26 +6,48 @@ namespace Noted.Tests.Extensions.Libraries using System.IO; using System.Linq; using Microsoft.VisualStudio.TestTools.UnitTesting; + using Moq; + using Noted.Core.Platform.IO; using Noted.Extensions.Libraries.KOReader; [TestClass] public class KOReaderAnnotationProviderTests { + private static readonly string DummyTempFile = Path.Join(Path.GetTempPath(), "pg42324.sdr", "metadata.epub.lua"); + private readonly Mock logger; + private readonly Mock fileSystem; + private readonly KOReaderAnnotationProvider annotationProvider; + + public KOReaderAnnotationProviderTests() + { + this.logger = new Mock(); + this.fileSystem = new Mock(); + this.fileSystem.Setup(f => f.GetFiles(It.IsAny(), It.IsAny())).Returns([DummyTempFile]); + + this.annotationProvider = new KOReaderAnnotationProvider(this.fileSystem.Object, this.logger.Object); + } + + [TestMethod] + public void KOReaderAnnotationProviderIsAvailable() + { + Assert.IsTrue(this.annotationProvider.IsAvailable(Path.GetTempPath())); + } + [TestMethod] public void KOReaderAnnotationProviderIsNotAvailable() { - var kap = new KOReaderAnnotationProvider(); + this.fileSystem.Setup(f => f.GetFiles(It.IsAny(), It.IsAny())).Returns([]); - Assert.IsFalse(kap.IsAvailable(Path.GetTempFileName())); + Assert.IsFalse(this.annotationProvider.IsAvailable(Path.GetTempPath())); } [TestMethod] public void KOReaderAnnotationProviderShouldReturnZeroAnnotations() { - var kap = new KOReaderAnnotationProvider(); + this.fileSystem.Setup(f => f.GetFiles(It.IsAny(), It.IsAny())).Returns([]); var annotations = - kap.GetAnnotations(Path.GetTempFileName()).ToList(); + this.annotationProvider.GetAnnotations(AssetFactory.GetKindleLibrary()).ToList(); Assert.AreEqual(0, annotations.Count); } diff --git a/test/Noted.Tests/Extensions/Readers/Common/HtmlContextParserTests.cs b/test/Noted.Tests/Extensions/Readers/Common/HtmlContextParserTests.cs index fd647b6..b32a0ed 100644 --- a/test/Noted.Tests/Extensions/Readers/Common/HtmlContextParserTests.cs +++ b/test/Noted.Tests/Extensions/Readers/Common/HtmlContextParserTests.cs @@ -25,8 +25,8 @@ public class HtmlContextParserTests : IDisposable private static readonly List SampleSections = [ - new("Ch1", 1, 0), - new("Ch2", 1, 141) + new("Ch1", 1, 0, null), + new("Ch2", 1, 141, null) ]; private readonly Stream sampleContentStream; diff --git a/test/Noted.Tests/Extensions/Readers/Common/HtmlSectionParserTests.cs b/test/Noted.Tests/Extensions/Readers/Common/HtmlSectionParserTests.cs index 13ccb67..5fda9ef 100644 --- a/test/Noted.Tests/Extensions/Readers/Common/HtmlSectionParserTests.cs +++ b/test/Noted.Tests/Extensions/Readers/Common/HtmlSectionParserTests.cs @@ -3,17 +3,17 @@ namespace Noted.Tests.Extensions.Readers.Common { - using System.IO; - using System.Linq; - using System.Text; - using System.Threading.Tasks; - using Microsoft.VisualStudio.TestTools.UnitTesting; - using Noted.Extensions.Readers.Common; + using System.IO; + using System.Linq; + using System.Text; + using System.Threading.Tasks; + using Microsoft.VisualStudio.TestTools.UnitTesting; + using Noted.Extensions.Readers.Common; - [TestClass] - public class HtmlSectionParserTests - { - private const string TocFragment = @" + [TestClass] + public class HtmlSectionParserTests + { + private const string TocFragment = @"

Table of Contents @@ -38,6 +38,10 @@ public class HtmlSectionParserTests

Section 1.1 +
+ Section 1.1.1 +
+ Section 1.2

@@ -55,26 +59,44 @@ public class HtmlSectionParserTests "; - private readonly HtmlSectionParser parser; + private readonly HtmlSectionParser parser; + + public HtmlSectionParserTests() + { + this.parser = new HtmlSectionParser(); + } + + [TestMethod] + public async Task ParseShouldReturnTableOfContentWithDepth() + { + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(TocFragment)); + var toc = await HtmlSectionParser.Parse(stream).ToListAsync(); - public HtmlSectionParserTests() - { - this.parser = new HtmlSectionParser(); - } + Assert.AreEqual(11, toc.Count); + Assert.AreEqual("Preface", toc[0].Title); + Assert.AreEqual(1, toc[0].Level); + Assert.AreEqual(3859, toc[0].Location); + Assert.AreEqual("Section 1.1", toc[4].Title); + Assert.AreEqual(2, toc[4].Level); + Assert.AreEqual(21076, toc[4].Location); + Assert.AreEqual("Section 1.1.1", toc[5].Title); + Assert.AreEqual(3, toc[5].Level); + Assert.AreEqual("Section 1.2", toc[6].Title); + Assert.AreEqual(2, toc[6].Level); + } - [TestMethod] - public async Task ParseShouldReturnTableOfContentWithDepth() - { - await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(TocFragment)); - var toc = await HtmlSectionParser.Parse(stream).ToListAsync(); + [TestMethod] + public async Task ParseShouldCreateDocumentSectionRelationships() + { + await using var stream = new MemoryStream(Encoding.UTF8.GetBytes(TocFragment)); + var toc = await HtmlSectionParser.Parse(stream).ToListAsync(); - Assert.AreEqual(9, toc.Count); - Assert.AreEqual("Preface", toc[0].Title); - Assert.AreEqual(1, toc[0].Level); - Assert.AreEqual(3859, toc[0].Location); - Assert.AreEqual("Section 1.1", toc[4].Title); - Assert.AreEqual(2, toc[4].Level); - Assert.AreEqual(21076, toc[4].Location); - } + Assert.AreEqual(null, toc[3].Parent); // 1 -> null + Assert.AreEqual("Section 1.1", toc[4].Title); + Assert.AreEqual(toc[3], toc[4].Parent); // 1.1 -> 1 + Assert.AreEqual(toc[4], toc[5].Parent); // 1.1.1 -> 1.1 + Assert.AreEqual(toc[3], toc[6].Parent); // 1.2 -> 1 + Assert.AreEqual(null, toc[7].Parent); // 2 -> null } + } } \ No newline at end of file diff --git a/test/Noted.Tests/Extensions/Readers/EpubReaderTests.cs b/test/Noted.Tests/Extensions/Readers/EpubReaderTests.cs new file mode 100644 index 0000000..ca1c6c7 --- /dev/null +++ b/test/Noted.Tests/Extensions/Readers/EpubReaderTests.cs @@ -0,0 +1,61 @@ +// Copyright (c) Arun Mahapatra. All rights reserved. +// Licensed under the MIT license. See LICENSE file in the project root for full license information. + +namespace Noted.Tests.Extensions.Readers; + +using System; +using System.Linq; +using System.Threading.Tasks; +using Microsoft.VisualStudio.TestTools.UnitTesting; +using Moq; +using Noted.Core.Models; +using Noted.Core.Platform.IO; +using Noted.Extensions.Readers; + +[TestClass] +public class EpubReaderTests +{ + private readonly ILogger logger; + + public EpubReaderTests() + { + this.logger = new Mock().Object; + } + + [TestMethod] + public async Task EpubReaderShouldParseDocumentWithSections() + { + using var stream = AssetFactory.GetAsset(AssetFactory.GetKOReaderLibrary(), "the_prophet.epub"); + + var document = await new EpubReader(this.logger).Read(stream, new Noted.Core.Extensions.ReaderOptions(), (_) => []); + + Assert.AreEqual(0, document.Annotations.Count()); + Assert.AreEqual(35, document.Sections.Count()); + Assert.AreEqual("The Prophet", document.Title); + Assert.AreEqual("Khalil Gibran", document.Author); + } + + [TestMethod] + public async Task EpubReaderShouldParseDocumentWithAnnotations() + { + using var stream = AssetFactory.GetAsset(AssetFactory.GetKOReaderLibrary(), "the_prophet.epub"); + var annotation = new Annotation( + "Test highlight", + new DocumentReference { Title = "The Prophet" }, + AnnotationType.Highlight, + new AnnotationContext + { + DocumentSection = new DocumentSection("On Giving", 0, 0, null), + SerializedLocation = "epubxpath:///body/DocFragment[9]/body/article/p[3]/text().0-/body/DocFragment[9]/body/article/p[3]/text().20" + }, + new DateTime(2023, 12, 23)); + + var document = await new EpubReader(this.logger).Read(stream, new Noted.Core.Extensions.ReaderOptions(), (_) => [annotation]); + + var annotations = document.Annotations.ToList(); + Assert.AreEqual(1, annotations.Count); + Assert.AreEqual("On Giving", annotations[0].Context.DocumentSection.Title); + Assert.AreNotEqual(0, annotations[0].Context.DocumentSection.Location); + Assert.AreNotEqual(0, annotations[0].Context.Location); + } +} \ No newline at end of file diff --git a/test/Noted.Tests/Extensions/Readers/Mobi/Mobi7ParserTests.cs b/test/Noted.Tests/Extensions/Readers/Mobi/Mobi7ParserTests.cs index c1e53f5..b98f724 100644 --- a/test/Noted.Tests/Extensions/Readers/Mobi/Mobi7ParserTests.cs +++ b/test/Noted.Tests/Extensions/Readers/Mobi/Mobi7ParserTests.cs @@ -14,7 +14,7 @@ public class Mobi7ParserTests [TestMethod] public async Task Mobi7ParserShouldExtractNavigationStream() { - await using var stream = AssetFactory.GetAsset("pg42324.mobi"); + await using var stream = AssetFactory.GetAsset("kindle", "pg42324.mobi"); await using var rawMlStream = new MobiMetadata(stream).GetRawMlStream(); var parser = new Mobi7Parser(); diff --git a/test/Noted.Tests/Extensions/Readers/MobiReaderTests.cs b/test/Noted.Tests/Extensions/Readers/MobiReaderTests.cs index 0e7b0f3..1846b4b 100644 --- a/test/Noted.Tests/Extensions/Readers/MobiReaderTests.cs +++ b/test/Noted.Tests/Extensions/Readers/MobiReaderTests.cs @@ -33,7 +33,7 @@ public void MobiReaderShouldSupportMobiFileExtensions() [TestMethod] public async Task MobiReaderShouldParseAnnotationsAndTableOfContent() { - await using var stream = AssetFactory.GetAsset("pg42324.mobi"); + await using var stream = AssetFactory.GetAsset("kindle", "pg42324.mobi"); var annotations = new List { new( diff --git a/test/Noted.Tests/Extensions/Readers/PdfReaderTests.cs b/test/Noted.Tests/Extensions/Readers/PdfReaderTests.cs index 69faa46..8b66f58 100644 --- a/test/Noted.Tests/Extensions/Readers/PdfReaderTests.cs +++ b/test/Noted.Tests/Extensions/Readers/PdfReaderTests.cs @@ -81,7 +81,7 @@ public async Task ReadShouldParseDocumentMetadata() [TestMethod] public async Task ReadShouldParseSingleColumnLayoutDocument() { - await using var fs = new FileStream("single column.pdf", FileMode.Open); + await using var fs = AssetFactory.GetAsset("pdf", "single column.pdf"); var document = await this.reader.Read(fs, new ReaderOptions(), this.emptyExternalAnnotations); @@ -99,7 +99,7 @@ public async Task ReadShouldParseSingleColumnLayoutDocument() [TestMethod] public async Task ReadShouldParseTwoColumnLayoutDocument() { - await using var fs = new FileStream("two column.pdf", FileMode.Open); + await using var fs = AssetFactory.GetAsset("pdf", "two column.pdf"); var document = await this.reader.Read(fs, new ReaderOptions(), this.emptyExternalAnnotations); diff --git a/test/Noted.Tests/Noted.Tests.csproj b/test/Noted.Tests/Noted.Tests.csproj index 5f44d01..ee169e3 100644 --- a/test/Noted.Tests/Noted.Tests.csproj +++ b/test/Noted.Tests/Noted.Tests.csproj @@ -32,7 +32,7 @@ - + Always diff --git a/test/assets/kindle/My Clippings.txt b/test/assets/kindle/My Clippings.txt new file mode 100644 index 0000000..35fe477 --- /dev/null +++ b/test/assets/kindle/My Clippings.txt @@ -0,0 +1,5 @@ +Frankenstein (Mary Wollstonecraft Shelley) +- Your Highlight on page 380 | Location 925-994 | Added on Saturday, March 13, 2021 12:11:21 AM + +Nothing is so painful to the human mind as a great and sudden change. +========== diff --git a/test/assets/pg42324.mobi b/test/assets/kindle/pg42324.mobi similarity index 100% rename from test/assets/pg42324.mobi rename to test/assets/kindle/pg42324.mobi diff --git a/test/assets/koreader/pg42324.epub b/test/assets/koreader/pg42324.epub new file mode 100644 index 0000000..6efcff6 Binary files /dev/null and b/test/assets/koreader/pg42324.epub differ diff --git a/test/assets/koreader/pg42324.sdr/metadata.epub.lua b/test/assets/koreader/pg42324.sdr/metadata.epub.lua new file mode 100644 index 0000000..63b8068 --- /dev/null +++ b/test/assets/koreader/pg42324.sdr/metadata.epub.lua @@ -0,0 +1,183 @@ +-- /home/arun/src/noted/test/assets/koreader/pg42324.sdr/metadata.epub.lua +return { + ["bookmarks"] = { + [1] = { + ["chapter"] = "Walton, in Continuation", + ["datetime"] = "2023-12-22 18:17:17", + ["highlighted"] = true, + ["notes"] = "You have read this strange and terrific story, Margaret;", + ["page"] = "/body/DocFragment[36]/body/p[30]/text().0", + ["pos0"] = "/body/DocFragment[36]/body/p[30]/text().0", + ["pos1"] = "/body/DocFragment[36]/body/p[30]/text().56", + ["text"] = "Page 281 You have read this strange and terrific story, Margaret; @ 2023-12-22 18:17:17", + }, + [2] = { + ["chapter"] = "Chapter XXIII", + ["datetime"] = "2023-12-19 17:24:26", + ["highlighted"] = true, + ["notes"] = "The sun might shine, or the clouds might lower: but nothing could appear to me as it had done the day before.", + ["page"] = "/body/DocFragment[35]/body/p[15]/text().995", + ["pos0"] = "/body/DocFragment[35]/body/p[15]/text().995", + ["pos1"] = "/body/DocFragment[35]/body/p[15]/text().1104", + ["text"] = "Sample note without highlight", + }, + [3] = { + ["chapter"] = "Chapter XXIII", + ["datetime"] = "2023-12-19 17:23:52", + ["highlighted"] = true, + ["notes"] = "Nothing is so painful to the human mind as a great and sudden change.", + ["page"] = "/body/DocFragment[35]/body/p[15]/text().925", + ["pos0"] = "/body/DocFragment[35]/body/p[15]/text().925", + ["pos1"] = "/body/DocFragment[35]/body/p[15]/text().994", + ["text"] = "Page 265 Nothing is so painful to the human mind as a great and sudden change. @ 2023-12-19 17:23:52", + }, + [4] = { + ["chapter"] = "Chapter XXIII", + ["datetime"] = "2023-12-23 08:47:28", + ["highlighted"] = true, + ["notes"] = "But the overflowing misery I now felt, and the excess of agitation that I endured, rendered me incapable of any exertion.", + ["page"] = "/body/DocFragment[35]/body/p[15]/text().330", + ["pos0"] = "/body/DocFragment[35]/body/p[15]/text().330", + ["pos1"] = "/body/DocFragment[35]/body/p[15]/text().451", + }, + [5] = { + ["chapter"] = "Titlepage", + ["datetime"] = "2023-12-22 18:17:37", + ["notes"] = "in Titlepage", + ["page"] = "/body/DocFragment[2]/body/p/img.0", + ["text"] = "Page 2 in Titlepage @ 2023-12-22 18:17:37", + }, + }, + ["bookmarks_sorted_20220106"] = true, + ["bookmarks_version"] = 20200615, + ["cache_file_path"] = "/home/arun/.config/koreader/cache/cr3cache/pg42324.epub.3a30c574.1.cr3", + ["config_panel_index"] = 1, + ["copt_b_page_margin"] = 15, + ["copt_block_rendering_mode"] = 3, + ["copt_cjk_width_scaling"] = 100, + ["copt_embedded_css"] = 1, + ["copt_embedded_fonts"] = 1, + ["copt_font_base_weight"] = 0, + ["copt_font_gamma"] = 15, + ["copt_font_hinting"] = 2, + ["copt_font_kerning"] = 3, + ["copt_font_size"] = 22, + ["copt_h_page_margins"] = { + [1] = 10, + [2] = 10, + }, + ["copt_line_spacing"] = 100, + ["copt_nightmode_images"] = 1, + ["copt_render_dpi"] = 96, + ["copt_rotation_mode"] = 0, + ["copt_smooth_scaling"] = 0, + ["copt_status_line"] = 1, + ["copt_sync_t_b_page_margins"] = 0, + ["copt_t_page_margin"] = 15, + ["copt_view_mode"] = 0, + ["copt_visible_pages"] = 1, + ["copt_word_expansion"] = 0, + ["copt_word_spacing"] = { + [1] = 95, + [2] = 75, + }, + ["cre_dom_version"] = 20210904, + ["css"] = "./data/epub.css", + ["doc_pages"] = 310, + ["doc_path"] = "/home/arun/src/noted/test/assets/koreader/pg42324.epub", + ["doc_props"] = { + ["authors"] = "Mary Shelley", + ["description"] = "A tragic scientist creates a monster in his laboratory.", + ["keywords"] = "Science fiction\ +Horror tales\ +Gothic fiction (Literary genre)\ +Scientists -- Fiction\ +Monsters -- Fiction\ +Frankenstein\ +Victor (Fictitious character) -- Fiction\ +Frankenstein's monster (Fictitious character) -- Fiction", + ["language"] = "en", + ["title"] = "Frankenstein", + }, + ["floating_punctuation"] = 0, + ["font_face"] = "Noto Serif", + ["font_family_fonts"] = {}, + ["handmade_flows_edit_enabled"] = true, + ["handmade_flows_enabled"] = false, + ["handmade_toc_edit_enabled"] = true, + ["handmade_toc_enabled"] = false, + ["header_font_face"] = "Noto Sans", + ["hide_nonlinear_flows"] = false, + ["highlight"] = { + [261] = { + [1] = { + ["chapter"] = "Chapter XXIII", + ["datetime"] = "2023-12-19 17:23:52", + ["drawer"] = "lighten", + ["pos0"] = "/body/DocFragment[35]/body/p[15]/text().925", + ["pos1"] = "/body/DocFragment[35]/body/p[15]/text().994", + ["text"] = "Nothing is so painful to the human mind as a great and sudden change.", + }, + [2] = { + ["chapter"] = "Chapter XXIII", + ["datetime"] = "2023-12-19 17:24:26", + ["drawer"] = "lighten", + ["pos0"] = "/body/DocFragment[35]/body/p[15]/text().995", + ["pos1"] = "/body/DocFragment[35]/body/p[15]/text().1104", + ["text"] = "The sun might shine, or the clouds might lower: but nothing could appear to me as it had done the day before.", + }, + }, + [265] = { + [1] = { + ["chapter"] = "Chapter XXIII", + ["datetime"] = "2023-12-23 08:47:28", + ["drawer"] = "lighten", + ["pos0"] = "/body/DocFragment[35]/body/p[15]/text().330", + ["pos1"] = "/body/DocFragment[35]/body/p[15]/text().451", + ["text"] = "But the overflowing misery I now felt, and the excess of agitation that I endured, rendered me incapable of any exertion.", + }, + }, + [281] = { + [1] = { + ["chapter"] = "Walton, in Continuation", + ["datetime"] = "2023-12-22 18:17:17", + ["drawer"] = "lighten", + ["pos0"] = "/body/DocFragment[36]/body/p[30]/text().0", + ["pos1"] = "/body/DocFragment[36]/body/p[30]/text().56", + ["text"] = "You have read this strange and terrific story, Margaret;", + }, + }, + }, + ["highlight_drawer"] = "lighten", + ["highlights_imported"] = true, + ["hyph_force_algorithmic"] = false, + ["hyph_soft_hyphens_only"] = false, + ["hyph_trust_soft_hyphens"] = false, + ["hyphenation"] = true, + ["inverse_reading_order"] = false, + ["last_xpointer"] = "/body/DocFragment[35]/body/p[15]/text().925", + ["page_overlap_style"] = "dim", + ["partial_md5_checksum"] = "9336eb2f430a0d0e71dd0882eb74d27c", + ["partial_rerendering"] = true, + ["percent_finished"] = 0.85483870967742, + ["preferred_dictionaries"] = {}, + ["readermenu_tab_index"] = 1, + ["show_overlap_enable"] = false, + ["stats"] = { + ["authors"] = "Mary Shelley", + ["highlights"] = 3, + ["language"] = "en", + ["notes"] = 1, + ["pages"] = 310, + ["performance_in_pages"] = {}, + ["series"] = "N/A", + ["title"] = "Frankenstein", + }, + ["summary"] = { + ["modified"] = "2023-12-19", + ["status"] = "reading", + }, + ["text_lang"] = "en-US", + ["text_lang_embedded_langs"] = true, + ["toc_ticks_ignored_levels"] = {}, +} diff --git a/test/assets/koreader/pg42324.sdr/metadata.epub.lua.old b/test/assets/koreader/pg42324.sdr/metadata.epub.lua.old new file mode 100644 index 0000000..9cc4aea --- /dev/null +++ b/test/assets/koreader/pg42324.sdr/metadata.epub.lua.old @@ -0,0 +1,161 @@ +-- /home/arun/src/noted/test/assets/koreader/pg42324.sdr/metadata.epub.lua +return { + ["bookmarks"] = { + [1] = { + ["chapter"] = "Walton, in Continuation", + ["datetime"] = "2023-12-22 18:17:17", + ["highlighted"] = true, + ["notes"] = "You have read this strange and terrific story, Margaret;", + ["page"] = "/body/DocFragment[36]/body/p[30]/text().0", + ["pos0"] = "/body/DocFragment[36]/body/p[30]/text().0", + ["pos1"] = "/body/DocFragment[36]/body/p[30]/text().56", + }, + [2] = { + ["chapter"] = "Chapter XXIII", + ["datetime"] = "2023-12-19 17:24:26", + ["highlighted"] = true, + ["notes"] = "The sun might shine, or the clouds might lower: but nothing could appear to me as it had done the day before.", + ["page"] = "/body/DocFragment[35]/body/p[15]/text().995", + ["pos0"] = "/body/DocFragment[35]/body/p[15]/text().995", + ["pos1"] = "/body/DocFragment[35]/body/p[15]/text().1104", + ["text"] = "Sample note without highlight", + }, + [3] = { + ["chapter"] = "Chapter XXIII", + ["datetime"] = "2023-12-19 17:23:52", + ["highlighted"] = true, + ["notes"] = "Nothing is so painful to the human mind as a great and sudden change.", + ["page"] = "/body/DocFragment[35]/body/p[15]/text().925", + ["pos0"] = "/body/DocFragment[35]/body/p[15]/text().925", + ["pos1"] = "/body/DocFragment[35]/body/p[15]/text().994", + }, + [4] = { + ["chapter"] = "Titlepage", + ["datetime"] = "2023-12-22 18:17:37", + ["notes"] = "in Titlepage", + ["page"] = "/body/DocFragment[2]/body/p/img.0", + }, + }, + ["bookmarks_sorted_20220106"] = true, + ["bookmarks_version"] = 20200615, + ["cache_file_path"] = "/home/arun/.config/koreader/cache/cr3cache/pg42324.epub.3a30c574.1.cr3", + ["config_panel_index"] = 1, + ["copt_b_page_margin"] = 15, + ["copt_block_rendering_mode"] = 3, + ["copt_cjk_width_scaling"] = 100, + ["copt_embedded_css"] = 1, + ["copt_embedded_fonts"] = 1, + ["copt_font_base_weight"] = 0, + ["copt_font_gamma"] = 15, + ["copt_font_hinting"] = 2, + ["copt_font_kerning"] = 3, + ["copt_font_size"] = 22, + ["copt_h_page_margins"] = { + [1] = 10, + [2] = 10, + }, + ["copt_line_spacing"] = 100, + ["copt_nightmode_images"] = 1, + ["copt_render_dpi"] = 96, + ["copt_rotation_mode"] = 0, + ["copt_smooth_scaling"] = 0, + ["copt_status_line"] = 1, + ["copt_sync_t_b_page_margins"] = 0, + ["copt_t_page_margin"] = 15, + ["copt_view_mode"] = 0, + ["copt_visible_pages"] = 1, + ["copt_word_expansion"] = 0, + ["copt_word_spacing"] = { + [1] = 95, + [2] = 75, + }, + ["cre_dom_version"] = 20210904, + ["css"] = "./data/epub.css", + ["doc_pages"] = 310, + ["doc_path"] = "/home/arun/src/noted/test/assets/koreader/pg42324.epub", + ["doc_props"] = { + ["authors"] = "Mary Shelley", + ["description"] = "A tragic scientist creates a monster in his laboratory.", + ["keywords"] = "Science fiction\ +Horror tales\ +Gothic fiction (Literary genre)\ +Scientists -- Fiction\ +Monsters -- Fiction\ +Frankenstein\ +Victor (Fictitious character) -- Fiction\ +Frankenstein's monster (Fictitious character) -- Fiction", + ["language"] = "en", + ["title"] = "Frankenstein", + }, + ["floating_punctuation"] = 0, + ["font_face"] = "Noto Serif", + ["font_family_fonts"] = {}, + ["handmade_flows_edit_enabled"] = true, + ["handmade_flows_enabled"] = false, + ["handmade_toc_edit_enabled"] = true, + ["handmade_toc_enabled"] = false, + ["header_font_face"] = "Noto Sans", + ["hide_nonlinear_flows"] = false, + ["highlight"] = { + [261] = { + [1] = { + ["chapter"] = "Chapter XXIII", + ["datetime"] = "2023-12-19 17:23:52", + ["drawer"] = "lighten", + ["pos0"] = "/body/DocFragment[35]/body/p[15]/text().925", + ["pos1"] = "/body/DocFragment[35]/body/p[15]/text().994", + ["text"] = "Nothing is so painful to the human mind as a great and sudden change.", + }, + [2] = { + ["chapter"] = "Chapter XXIII", + ["datetime"] = "2023-12-19 17:24:26", + ["drawer"] = "lighten", + ["pos0"] = "/body/DocFragment[35]/body/p[15]/text().995", + ["pos1"] = "/body/DocFragment[35]/body/p[15]/text().1104", + ["text"] = "The sun might shine, or the clouds might lower: but nothing could appear to me as it had done the day before.", + }, + }, + [281] = { + [1] = { + ["chapter"] = "Walton, in Continuation", + ["datetime"] = "2023-12-22 18:17:17", + ["drawer"] = "lighten", + ["pos0"] = "/body/DocFragment[36]/body/p[30]/text().0", + ["pos1"] = "/body/DocFragment[36]/body/p[30]/text().56", + ["text"] = "You have read this strange and terrific story, Margaret;", + }, + }, + }, + ["highlight_drawer"] = "lighten", + ["highlights_imported"] = true, + ["hyph_force_algorithmic"] = false, + ["hyph_soft_hyphens_only"] = false, + ["hyph_trust_soft_hyphens"] = false, + ["hyphenation"] = true, + ["inverse_reading_order"] = false, + ["last_xpointer"] = "/body/DocFragment[2]/body/p/img.0", + ["page_overlap_style"] = "dim", + ["partial_md5_checksum"] = "9336eb2f430a0d0e71dd0882eb74d27c", + ["partial_rerendering"] = true, + ["percent_finished"] = 0.0064516129032258, + ["preferred_dictionaries"] = {}, + ["readermenu_tab_index"] = 1, + ["show_overlap_enable"] = false, + ["stats"] = { + ["authors"] = "Mary Shelley", + ["highlights"] = 2, + ["language"] = "en", + ["notes"] = 1, + ["pages"] = 310, + ["performance_in_pages"] = {}, + ["series"] = "N/A", + ["title"] = "Frankenstein", + }, + ["summary"] = { + ["modified"] = "2023-12-19", + ["status"] = "reading", + }, + ["text_lang"] = "en-US", + ["text_lang_embedded_langs"] = true, + ["toc_ticks_ignored_levels"] = {}, +} diff --git a/test/assets/koreader/the_prophet.epub b/test/assets/koreader/the_prophet.epub new file mode 100644 index 0000000..fc5de5c Binary files /dev/null and b/test/assets/koreader/the_prophet.epub differ diff --git a/test/assets/koreader/the_prophet.sdr/metadata.epub.lua b/test/assets/koreader/the_prophet.sdr/metadata.epub.lua new file mode 100644 index 0000000..884c558 --- /dev/null +++ b/test/assets/koreader/the_prophet.sdr/metadata.epub.lua @@ -0,0 +1,164 @@ +-- /home/arun/src/noted/test/assets/koreader/the_prophet.sdr/metadata.epub.lua +return { + ["bookmarks"] = { + [1] = { + ["chapter"] = "On Giving", + ["datetime"] = "2023-12-20 21:02:40", + ["highlighted"] = true, + ["notes"] = "You give but little when you give of your possessions.\ +It is when you give of yourself that you truly give.", + ["page"] = "/body/DocFragment[9]/body/article/p[3]/text().0", + ["pos0"] = "/body/DocFragment[9]/body/article/p[3]/text().0", + ["pos1"] = "/body/DocFragment[9]/body/article/p[4]/text().52", + }, + [2] = { + ["chapter"] = "On Marriage", + ["datetime"] = "2023-12-20 21:02:16", + ["notes"] = "in On Marriage", + ["page"] = "/body/DocFragment[7]/body/article/p[10]/text().0", + }, + [3] = { + ["chapter"] = "On Love", + ["datetime"] = "2023-12-20 21:01:06", + ["highlighted"] = true, + ["notes"] = "Love gives naught but itself and takes naught but from itself.\ +Love possesses not nor would it be possessed;\ +For love is sufficient unto love.", + ["page"] = "/body/DocFragment[6]/body/article/p[22]/text().0", + ["pos0"] = "/body/DocFragment[6]/body/article/p[22]/text().0", + ["pos1"] = "/body/DocFragment[6]/body/article/p[24]/text().33", + ["text"] = "Sample note", + }, + [4] = { + ["chapter"] = "The Coming of the Ship", + ["datetime"] = "2023-12-20 20:59:31", + ["highlighted"] = true, + ["notes"] = "And then shall I come to you, a boundless drop to a boundless ocean.", + ["page"] = "/body/DocFragment[5]/body/article/p[24]/text().0", + ["pos0"] = "/body/DocFragment[5]/body/article/p[24]/text().0", + ["pos1"] = "/body/DocFragment[5]/body/article/p[24]/text().68", + ["text"] = "Page 7 And then shall I come to you, a boundless drop to a boundless ocean. @ 2023-12-20 20:59:31", + }, + }, + ["bookmarks_sorted_20220106"] = true, + ["bookmarks_version"] = 20200615, + ["cache_file_path"] = "/home/arun/.config/koreader/cache/cr3cache/the_prophet.epub.31deabf0.1.cr3", + ["config_panel_index"] = 1, + ["copt_b_page_margin"] = 15, + ["copt_block_rendering_mode"] = 3, + ["copt_cjk_width_scaling"] = 100, + ["copt_embedded_css"] = 1, + ["copt_embedded_fonts"] = 1, + ["copt_font_base_weight"] = 0, + ["copt_font_gamma"] = 15, + ["copt_font_hinting"] = 2, + ["copt_font_kerning"] = 3, + ["copt_font_size"] = 22, + ["copt_h_page_margins"] = { + [1] = 10, + [2] = 10, + }, + ["copt_line_spacing"] = 100, + ["copt_nightmode_images"] = 1, + ["copt_render_dpi"] = 96, + ["copt_rotation_mode"] = 0, + ["copt_smooth_scaling"] = 0, + ["copt_status_line"] = 1, + ["copt_sync_t_b_page_margins"] = 0, + ["copt_t_page_margin"] = 15, + ["copt_view_mode"] = 0, + ["copt_visible_pages"] = 1, + ["copt_word_expansion"] = 0, + ["copt_word_spacing"] = { + [1] = 95, + [2] = 75, + }, + ["cre_dom_version"] = 20210904, + ["css"] = "./data/epub.css", + ["doc_pages"] = 103, + ["doc_path"] = "/home/arun/src/noted/test/assets/koreader/the_prophet.epub", + ["doc_props"] = { + ["authors"] = "Khalil Gibran", + ["description"] = "A collection of prose poetry on themes such as love, beauty, and religion.", + ["keywords"] = "Mysticism⁠-Poetry\ +Prose poems, American", + ["language"] = "en-US", + ["title"] = "The Prophet", + }, + ["floating_punctuation"] = 0, + ["font_face"] = "Noto Serif", + ["font_family_fonts"] = {}, + ["handmade_flows_edit_enabled"] = true, + ["handmade_flows_enabled"] = false, + ["handmade_toc_edit_enabled"] = true, + ["handmade_toc_enabled"] = false, + ["header_font_face"] = "Noto Sans", + ["hide_nonlinear_flows"] = false, + ["highlight"] = { + [7] = { + [1] = { + ["chapter"] = "The Coming of the Ship", + ["datetime"] = "2023-12-20 20:59:31", + ["drawer"] = "lighten", + ["pos0"] = "/body/DocFragment[5]/body/article/p[24]/text().0", + ["pos1"] = "/body/DocFragment[5]/body/article/p[24]/text().68", + ["text"] = "And then shall I come to you, a boundless drop to a boundless ocean.", + }, + }, + [14] = { + [1] = { + ["chapter"] = "On Love", + ["datetime"] = "2023-12-20 21:01:06", + ["drawer"] = "lighten", + ["pos0"] = "/body/DocFragment[6]/body/article/p[22]/text().0", + ["pos1"] = "/body/DocFragment[6]/body/article/p[24]/text().33", + ["text"] = "Love gives naught but itself and takes naught but from itself.\ +Love possesses not nor would it be possessed;\ +For love is sufficient unto love.", + }, + }, + [20] = { + [1] = { + ["chapter"] = "On Giving", + ["datetime"] = "2023-12-20 21:02:40", + ["drawer"] = "lighten", + ["pos0"] = "/body/DocFragment[9]/body/article/p[3]/text().0", + ["pos1"] = "/body/DocFragment[9]/body/article/p[4]/text().52", + ["text"] = "You give but little when you give of your possessions.\ +It is when you give of yourself that you truly give.", + }, + }, + }, + ["highlight_drawer"] = "lighten", + ["highlights_imported"] = true, + ["hyph_force_algorithmic"] = false, + ["hyph_soft_hyphens_only"] = false, + ["hyph_trust_soft_hyphens"] = false, + ["hyphenation"] = true, + ["inverse_reading_order"] = false, + ["last_xpointer"] = "/body/DocFragment[9]/body/article/p[1]/text().0", + ["page_overlap_style"] = "dim", + ["partial_md5_checksum"] = "2107aa7e6601b1c5459134e763c87d5b", + ["partial_rerendering"] = true, + ["percent_finished"] = 0.19417475728155, + ["preferred_dictionaries"] = {}, + ["readermenu_tab_index"] = 3, + ["show_overlap_enable"] = false, + ["stats"] = { + ["authors"] = "Khalil Gibran", + ["highlights"] = 2, + ["language"] = "en-US", + ["notes"] = 1, + ["pages"] = 103, + ["performance_in_pages"] = {}, + ["series"] = "N/A", + ["title"] = "The Prophet", + }, + ["summary"] = { + ["modified"] = "2023-12-20", + ["status"] = "reading", + }, + ["text_lang"] = "en-US", + ["text_lang_embedded_langs"] = true, + ["toc_ticks_ignored_levels"] = {}, +} diff --git a/test/assets/koreader/the_prophet.sdr/metadata.epub.lua.old b/test/assets/koreader/the_prophet.sdr/metadata.epub.lua.old new file mode 100644 index 0000000..7fda07e --- /dev/null +++ b/test/assets/koreader/the_prophet.sdr/metadata.epub.lua.old @@ -0,0 +1,164 @@ +-- /home/arun/src/noted/test/assets/koreader/the_prophet.sdr/metadata.epub.lua +return { + ["bookmarks"] = { + [1] = { + ["chapter"] = "On Giving", + ["datetime"] = "2023-12-20 21:02:40", + ["highlighted"] = true, + ["notes"] = "You give but little when you give of your possessions.\ +It is when you give of yourself that you truly give.", + ["page"] = "/body/DocFragment[9]/body/article/p[3]/text().0", + ["pos0"] = "/body/DocFragment[9]/body/article/p[3]/text().0", + ["pos1"] = "/body/DocFragment[9]/body/article/p[4]/text().52", + }, + [2] = { + ["chapter"] = "On Marriage", + ["datetime"] = "2023-12-20 21:02:16", + ["notes"] = "in On Marriage", + ["page"] = "/body/DocFragment[7]/body/article/p[10]/text().0", + }, + [3] = { + ["chapter"] = "On Love", + ["datetime"] = "2023-12-20 21:01:06", + ["highlighted"] = true, + ["notes"] = "Love gives naught but itself and takes naught but from itself.\ +Love possesses not nor would it be possessed;\ +For love is sufficient unto love.", + ["page"] = "/body/DocFragment[6]/body/article/p[22]/text().0", + ["pos0"] = "/body/DocFragment[6]/body/article/p[22]/text().0", + ["pos1"] = "/body/DocFragment[6]/body/article/p[24]/text().33", + ["text"] = "Sample note", + }, + [4] = { + ["chapter"] = "The Coming of the Ship", + ["datetime"] = "2023-12-20 20:59:31", + ["highlighted"] = true, + ["notes"] = "And then shall I come to you, a boundless drop to a boundless ocean.", + ["page"] = "/body/DocFragment[5]/body/article/p[24]/text().0", + ["pos0"] = "/body/DocFragment[5]/body/article/p[24]/text().0", + ["pos1"] = "/body/DocFragment[5]/body/article/p[24]/text().68", + ["text"] = "Page 7 And then shall I come to you, a boundless drop to a boundless ocean. @ 2023-12-20 20:59:31", + }, + }, + ["bookmarks_sorted_20220106"] = true, + ["bookmarks_version"] = 20200615, + ["cache_file_path"] = "/home/arun/.config/koreader/cache/cr3cache/the_prophet.epub.31deabf0.1.cr3", + ["config_panel_index"] = 1, + ["copt_b_page_margin"] = 15, + ["copt_block_rendering_mode"] = 3, + ["copt_cjk_width_scaling"] = 100, + ["copt_embedded_css"] = 1, + ["copt_embedded_fonts"] = 1, + ["copt_font_base_weight"] = 0, + ["copt_font_gamma"] = 15, + ["copt_font_hinting"] = 2, + ["copt_font_kerning"] = 3, + ["copt_font_size"] = 22, + ["copt_h_page_margins"] = { + [1] = 10, + [2] = 10, + }, + ["copt_line_spacing"] = 100, + ["copt_nightmode_images"] = 1, + ["copt_render_dpi"] = 96, + ["copt_rotation_mode"] = 0, + ["copt_smooth_scaling"] = 0, + ["copt_status_line"] = 1, + ["copt_sync_t_b_page_margins"] = 0, + ["copt_t_page_margin"] = 15, + ["copt_view_mode"] = 0, + ["copt_visible_pages"] = 1, + ["copt_word_expansion"] = 0, + ["copt_word_spacing"] = { + [1] = 95, + [2] = 75, + }, + ["cre_dom_version"] = 20210904, + ["css"] = "./data/epub.css", + ["doc_pages"] = 103, + ["doc_path"] = "/home/arun/src/noted/test/assets/koreader/the_prophet.epub", + ["doc_props"] = { + ["authors"] = "Khalil Gibran", + ["description"] = "A collection of prose poetry on themes such as love, beauty, and religion.", + ["keywords"] = "Mysticism⁠-Poetry\ +Prose poems, American", + ["language"] = "en-US", + ["title"] = "The Prophet", + }, + ["floating_punctuation"] = 0, + ["font_face"] = "Noto Serif", + ["font_family_fonts"] = {}, + ["handmade_flows_edit_enabled"] = true, + ["handmade_flows_enabled"] = false, + ["handmade_toc_edit_enabled"] = true, + ["handmade_toc_enabled"] = false, + ["header_font_face"] = "Noto Sans", + ["hide_nonlinear_flows"] = false, + ["highlight"] = { + [7] = { + [1] = { + ["chapter"] = "The Coming of the Ship", + ["datetime"] = "2023-12-20 20:59:31", + ["drawer"] = "lighten", + ["pos0"] = "/body/DocFragment[5]/body/article/p[24]/text().0", + ["pos1"] = "/body/DocFragment[5]/body/article/p[24]/text().68", + ["text"] = "And then shall I come to you, a boundless drop to a boundless ocean.", + }, + }, + [14] = { + [1] = { + ["chapter"] = "On Love", + ["datetime"] = "2023-12-20 21:01:06", + ["drawer"] = "lighten", + ["pos0"] = "/body/DocFragment[6]/body/article/p[22]/text().0", + ["pos1"] = "/body/DocFragment[6]/body/article/p[24]/text().33", + ["text"] = "Love gives naught but itself and takes naught but from itself.\ +Love possesses not nor would it be possessed;\ +For love is sufficient unto love.", + }, + }, + [20] = { + [1] = { + ["chapter"] = "On Giving", + ["datetime"] = "2023-12-20 21:02:40", + ["drawer"] = "lighten", + ["pos0"] = "/body/DocFragment[9]/body/article/p[3]/text().0", + ["pos1"] = "/body/DocFragment[9]/body/article/p[4]/text().52", + ["text"] = "You give but little when you give of your possessions.\ +It is when you give of yourself that you truly give.", + }, + }, + }, + ["highlight_drawer"] = "lighten", + ["highlights_imported"] = true, + ["hyph_force_algorithmic"] = false, + ["hyph_soft_hyphens_only"] = false, + ["hyph_trust_soft_hyphens"] = false, + ["hyphenation"] = true, + ["inverse_reading_order"] = false, + ["last_xpointer"] = "/body/DocFragment[9]/body/article/hr[1].0", + ["page_overlap_style"] = "dim", + ["partial_md5_checksum"] = "2107aa7e6601b1c5459134e763c87d5b", + ["partial_rerendering"] = true, + ["percent_finished"] = 0.20388349514563, + ["preferred_dictionaries"] = {}, + ["readermenu_tab_index"] = 3, + ["show_overlap_enable"] = false, + ["stats"] = { + ["authors"] = "Khalil Gibran", + ["highlights"] = 2, + ["language"] = "en-US", + ["notes"] = 1, + ["pages"] = 103, + ["performance_in_pages"] = {}, + ["series"] = "N/A", + ["title"] = "The Prophet", + }, + ["summary"] = { + ["modified"] = "2023-12-20", + ["status"] = "reading", + }, + ["text_lang"] = "en-US", + ["text_lang_embedded_langs"] = true, + ["toc_ticks_ignored_levels"] = {}, +} diff --git a/test/assets/single column.odt b/test/assets/pdf/single column.odt similarity index 100% rename from test/assets/single column.odt rename to test/assets/pdf/single column.odt diff --git a/test/assets/single column.pdf b/test/assets/pdf/single column.pdf similarity index 100% rename from test/assets/single column.pdf rename to test/assets/pdf/single column.pdf diff --git a/test/assets/two column.odt b/test/assets/pdf/two column.odt similarity index 100% rename from test/assets/two column.odt rename to test/assets/pdf/two column.odt diff --git a/test/assets/two column.pdf b/test/assets/pdf/two column.pdf similarity index 100% rename from test/assets/two column.pdf rename to test/assets/pdf/two column.pdf