Skip to content

Commit

Permalink
feat: koreader library support.
Browse files Browse the repository at this point in the history
  • Loading branch information
codito committed Dec 23, 2023
1 parent 58c36ac commit f15115e
Show file tree
Hide file tree
Showing 38 changed files with 1,277 additions and 106 deletions.
20 changes: 20 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "C#: Noted",
"type": "dotnet",
"request": "launch",
"projectPath": "${workspaceFolder}/src/Noted/Noted.csproj",
"launchConfigurationId": "TargetFramework=;Noted"
},
{
"name": "C#: Launch (console)",
"type": "coreclr",
"request": "launch",
"preLaunchTask": "dotnet: build",
"program": "${workspaceFolder}/src/Noted/bin/Debug/net8.0/Noted.dll",
"args": ["test/assets/koreader", "/tmp"]
},
]
}
12 changes: 12 additions & 0 deletions .vscode/tasks.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
{
"version": "2.0.0",
"tasks": [
{
"type": "dotnet",
"task": "build",
"group": "build",
"problemMatcher": [],
"label": "dotnet: build"
}
]
}
5 changes: 0 additions & 5 deletions renovate.json

This file was deleted.

1 change: 0 additions & 1 deletion src/Noted/Core/Extensions/IDocumentReader.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ namespace Noted.Core.Extensions
using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
using System.Xml.Linq;
using Noted.Core.Models;

/// <summary>
Expand Down
3 changes: 2 additions & 1 deletion src/Noted/Core/Models/DocumentSection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ namespace Noted.Core.Models
public record DocumentSection(
string Title,
int Level,
int Location);
int Location,
DocumentSection? Parent);
}
83 changes: 83 additions & 0 deletions src/Noted/Core/Models/EpubXPathLocation.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright (c) Arun Mahapatra. All rights reserved.
// Licensed under the MIT license. See LICENSE file in the project root for full license information.

namespace Noted.Core.Models;

using System;
using System.Text.RegularExpressions;

public struct EpubXPathLocation(string pos0, string pos1) : IComparable
{
public EpubLocation Start { get; init; } = EpubLocation.FromString(pos0);

public EpubLocation End { get; init; } = EpubLocation.FromString(pos1);

public static EpubXPathLocation FromString(string location)
{
var range = new Uri(location).PathAndQuery.Split('-');
return new EpubXPathLocation(range[0], range[1]);
}

public override string ToString() => $"epubxpath://{this.Start}-{this.End}";

public int CompareTo(object? obj)
{
if (obj is not EpubXPathLocation other)
{
throw new ArgumentException(null, nameof(obj));
}

var startCompare = this.Start.CompareTo(other.Start);
return startCompare == 0 ? this.End.CompareTo(other.End) : startCompare;
}
}

public partial record EpubLocation(
int DocumentFragmentId,
string XPath,
int CharacterLocation) : IComparable
{
public int CompareTo(object? obj)
{
if (obj is not EpubLocation other)
{
throw new ArgumentException(null, nameof(obj));
}

var docFragmentCompare = this.DocumentFragmentId.CompareTo(other.DocumentFragmentId);
if (docFragmentCompare != 0)
{
return docFragmentCompare;
}

// Comparing xpaths is impossible :( We'll take a chance to compare lexically, assuming the structure of book pages to be consistent.
// TODO: It is better to probably keep the original order of elements.
var xpathCompare = this.XPath.CompareTo(other.XPath);
if (xpathCompare != 0)
{
return xpathCompare;
}

return this.CharacterLocation.CompareTo(other.CharacterLocation);
}

public override string ToString() => $"/body/DocFragment[{this.DocumentFragmentId}]{this.XPath}.{this.CharacterLocation}";

public static EpubLocation FromString(string xpath)
{
var match = EpubXPathRegex().Match(xpath);
if (!match.Success ||
!int.TryParse(match.Groups["docFragmentId"].Value, out var docFragmentId) ||
string.IsNullOrEmpty(match.Groups["xpath"].Value) ||
!int.TryParse(match.Groups["charIndex"].Value, out var charIndex))
{
throw new ArgumentException(
$"Invalid xpath: {xpath}", nameof(xpath));
}

return new(docFragmentId, match.Groups["xpath"].Value, charIndex);
}

[GeneratedRegex(@"/body/DocFragment\[(?<docFragmentId>\d+)\](?<xpath>.*)\.(?<charIndex>.*)$", RegexOptions.Compiled)]
private static partial Regex EpubXPathRegex();
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,95 @@

namespace Noted.Extensions.Libraries.KOReader;

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using NLua;
using Noted.Core.Extensions;
using Noted.Core.Models;
using Noted.Core.Platform.IO;

public class KOReaderAnnotationProvider : IAnnotationProvider
public class KOReaderAnnotationProvider(IFileSystem fileSystem, ILogger logger) : IAnnotationProvider
{
public bool IsAvailable(string sourcePath) => false;
private readonly ILogger logger = logger;

private readonly IFileSystem fileSystem = fileSystem;

public bool IsAvailable(string sourcePath)
{
return this.fileSystem.GetFiles(sourcePath, ".lua").Any();
}

public IEnumerable<Annotation> GetAnnotations(string sourcePath)
{
// TODO see https://github.com/noembryo/KoHighlights/blob/0fedce43f88cdf6c8726b3cab6cb5cb1a74e815b/main.py
return Enumerable.Empty<Annotation>();
if (!this.IsAvailable(sourcePath))
{
yield break;
}

var annotationFiles = this.fileSystem.GetFiles(sourcePath, ".lua");
foreach (var annotation in annotationFiles)
{
using var lua = new Lua();
var annotationTable = GetLuaTable(lua, lua.DoFile(annotation)[0]);
var bookmarksTable = GetLuaTable(lua, annotationTable["bookmarks"]);
var highlightTable = GetLuaTable(lua, annotationTable["highlight"]);
var highlights = highlightTable.Values
.SelectMany(h => GetLuaTable(lua, h).Values)
.Select(h => GetLuaTable(lua, h)["pos0"].ToString())
.ToHashSet();
var documentTable = GetLuaTable(lua, annotationTable["doc_props"]);
var document = new DocumentReference
{
Title = documentTable["title"].ToString() ?? Path.GetFileName(annotationTable["doc_path"].ToString()!),
Author = documentTable["authors"].ToString() ?? string.Empty
};

// Highlights are keyed to the page numbers on the device used for reading.
// Sort them by page numbers to preserve the reading order of annotations.
foreach (var bookmark in bookmarksTable.Values)
{
var bookmarkDict = GetLuaTable(lua, bookmark);
if (!bookmarkDict.TryGetValue("highlighted", out var highlighted) || highlighted is bool == false)
{
// Skip non-highlighted bookmarks
continue;
}

// ["notes"] field is available for both notes and highlights.
// ["text"] field is available only for custom text attached to the note.
var notes = bookmarkDict["notes"].ToString()!;
var highlightDate = DateTime.Parse(bookmarkDict["datetime"].ToString()!);
var pos0 = bookmarkDict["pos0"].ToString();
var pos1 = bookmarkDict["pos1"].ToString();
bookmarkDict.TryGetValue("chapter", out var chapterTitle);
var context = new AnnotationContext()
{
SerializedLocation = new EpubXPathLocation(pos0!, pos1!).ToString(),
DocumentSection = new DocumentSection(chapterTitle?.ToString() ?? string.Empty, 0, 0, null)
};
yield return new Annotation(
notes,
document,
AnnotationType.Highlight,
context,
highlightDate);

// Notes are always attached to a highlight. We emit an extra annotation in this case.
bookmarkDict.TryGetValue("text", out var text);
if (highlights.Contains(pos0) && text != null && !text.ToString()!.StartsWith("Page "))
{
yield return new Annotation(
text.ToString()!,
document,
AnnotationType.Note,
context,
highlightDate);
}
}
}
}

private static Dictionary<object, object> GetLuaTable(Lua lua, object table) => table is LuaTable luaTable ? lua.GetTableDict(luaTable) : [];
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

namespace Noted.Extensions.Libraries.Kindle
{
using Noted.Core;
using Noted.Core.Models;

public static class ClippingExtensions
Expand Down
36 changes: 23 additions & 13 deletions src/Noted/Extensions/Readers/Common/HtmlSectionParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ namespace Noted.Extensions.Readers.Common
{
using System.Collections.Generic;
using System.IO;
using System.Linq;
using AngleSharp;
using AngleSharp.Dom;
using AngleSharp.Html.Dom;
Expand All @@ -27,29 +28,38 @@ public static async IAsyncEnumerable<DocumentSection> Parse(Stream stream)
// Note that both parent and child levels share a common root. Our
// level calculation leverages this.
var depth = 0;
foreach (var node in document.All)
var prevLevel = 0;
DocumentSection prevSection = null!;
foreach (var node in document.QuerySelectorAll("a"))
{
// AngleSharp always inserts the provided fragment within a
// body element. We reset the depth accordingly.
// Alternatively, we could do node.GetAncestors().Count but that
// revisits the parent nodes multiple times.
depth = node.Parent == document.Body ? 0 : depth + 1;
if (node is not IHtmlAnchorElement)
{
continue;
}

depth = node.GetAncestors().Count();
var fileOffset = node.GetAttribute("filepos");
if (!levelSet.TryGetValue(depth, out var level))
{
level = levelSet.Count + 1;
levelSet[depth] = level;
}

yield return new DocumentSection(
var parent = level == 1 ? null : prevSection; // assume this node is a child
var count = prevLevel - level;
while (prevLevel >= level)
{
// if this node is a sibling instead
parent = prevSection?.Parent;
prevSection = parent ?? null!;
prevLevel--;
}

var section = new DocumentSection(
node.Text(),
level,
string.IsNullOrEmpty(fileOffset) ? 0 : int.Parse(fileOffset));
string.IsNullOrEmpty(fileOffset) ? 0 : int.Parse(fileOffset),
parent);

yield return section;

prevLevel = level;
prevSection = section;
}
}
}
Expand Down
Loading

0 comments on commit f15115e

Please sign in to comment.