From f935bc2f80935b6673a86d18cc0169ce38fb90c0 Mon Sep 17 00:00:00 2001
From: Maxim Karpov <makame@mail.ru>
Date: Thu, 5 Sep 2024 17:55:48 +0300
Subject: [PATCH] feat: add merge included optimisation

---
 src/transform/plugins/includes/collect.ts    | 134 ++++++++++---------
 src/transform/plugins/includes/types.ts      |  10 ++
 src/transform/plugins/links/index.ts         |  23 +++-
 test/mocks/include-included-3-deep.expect.md |  20 +--
 4 files changed, 108 insertions(+), 79 deletions(-)

diff --git a/src/transform/plugins/includes/collect.ts b/src/transform/plugins/includes/collect.ts
index 12badc39..3543dc91 100644
--- a/src/transform/plugins/includes/collect.ts
+++ b/src/transform/plugins/includes/collect.ts
@@ -2,37 +2,68 @@ import {relative} from 'path';
 import {bold} from 'chalk';
 
 import {getRelativePath, isFileExists, resolveRelativePath} from '../../utilsFS';
-import {MarkdownItPluginOpts} from '../typings';
+
+import {IncludeCollectOpts} from './types';
 
 const includesPaths: string[] = [];
 
-type Opts = MarkdownItPluginOpts & {
-    destPath: string;
-    copyFile(path: string, dest: string, opts: Opts): string | null | undefined;
-    singlePage: Boolean;
-    included: Boolean;
-    includedParentPath?: string;
-};
-
-const collect = (input: string, options: Opts) => {
-    const {
-        root,
-        path,
-        destPath = '',
-        log,
-        copyFile,
-        singlePage,
-        includedParentPath: includedParentPathNullable,
-        included,
-    } = options;
+function processRecursive(
+    includePath: string,
+    targetDestPath: string,
+    options: IncludeCollectOpts,
+    appendix: Map<string, string>,
+) {
+    const {path, log, copyFile, includedParentPath: includedParentPathNullable, included} = options;
     const includedParentPath = includedParentPathNullable || path;
 
-    const INCLUDE_REGEXP = /{%\s*include\s*(notitle)?\s*\[(.+?)]\((.+?)\)\s*%}/g;
+    const includeOptions = {
+        ...options,
+        path: includePath,
+        destPath: targetDestPath,
+    };
+
+    try {
+        const content = copyFile(includePath, targetDestPath, includeOptions);
+
+        // To reduce file reading we can include the file content into the generated content
+        if (included && content) {
+            const includedRelativePath = getRelativePath(includedParentPath, includePath);
+
+            // The appendix is the map that protects from multiple include files
+            if (!appendix.has(includedRelativePath)) {
+                // Recursive function to include the depth structure
+                const includeContent = collectRecursive(
+                    content,
+                    {
+                        ...options,
+                        path: includePath,
+                        includedParentPath,
+                    },
+                    appendix,
+                );
+
+                // Add to appendix set structure
+                appendix.set(
+                    includedRelativePath,
+                    `{% included (${includedRelativePath}) %}\n${includeContent}\n{% endincluded %}`,
+                );
+            }
+        }
+    } catch (e) {
+        log.error(`No such file or has no access to ${bold(includePath)} in ${bold(path)}`);
+    }
+}
 
-    let match,
-        result = input;
+function collectRecursive(
+    result: string,
+    options: IncludeCollectOpts,
+    appendix: Map<string, string>,
+) {
+    const {root, path, destPath = '', log, singlePage} = options;
 
-    const appendix: Map<string, string> = new Map();
+    const INCLUDE_REGEXP = /{%\s*include\s*(notitle)?\s*\[(.+?)]\((.+?)\)\s*%}/g;
+
+    let match: RegExpExecArray | null;
 
     while ((match = INCLUDE_REGEXP.exec(result)) !== null) {
         let [, , , relativePath] = match;
@@ -63,51 +94,26 @@ const collect = (input: string, options: Opts) => {
         }
 
         includesPaths.push(includePath);
-        const includeOptions = {
-            ...options,
-            path: includePath,
-            destPath: targetDestPath,
-        };
-
-        try {
-            const content = copyFile(includePath, targetDestPath, includeOptions);
-
-            // To reduce file reading we can include the file content into the generated content
-            if (included && content) {
-                const includedRelativePath = getRelativePath(includedParentPath, includePath);
-
-                // The appendix is the map that protects from multiple include files
-                if (!appendix.has(includedRelativePath)) {
-                    // Recursive function to include the depth structure
-                    const includeContent = collect(content, {
-                        ...options,
-                        path: includePath,
-                        includedParentPath,
-                    });
-                    // Add to appendix set structure
-                    appendix.set(
-                        includedRelativePath,
-                        `{% included (${includedRelativePath}) %}\n${includeContent}\n{% endincluded %}`,
-                    );
-                }
-            }
-        } catch (e) {
-            log.error(`No such file or has no access to ${bold(includePath)} in ${bold(path)}`);
-        } finally {
-            includesPaths.pop();
-        }
+
+        processRecursive(includePath, targetDestPath, options, appendix);
+
+        includesPaths.pop();
     }
 
+    return result;
+}
+
+function collect(input: string, options: IncludeCollectOpts) {
+    const appendix: Map<string, string> = new Map();
+
+    input = collectRecursive(input, options, appendix);
+
     // Appendix should be appended to the end of the file (it supports depth structure, so the included files will have included as well)
     if (appendix.size > 0) {
-        result += '\n' + [...appendix.values()].join('\n');
+        input += '\n' + [...appendix.values()].join('\n');
     }
 
-    if (singlePage) {
-        return result;
-    }
-
-    return result;
-};
+    return input;
+}
 
 export = collect;
diff --git a/src/transform/plugins/includes/types.ts b/src/transform/plugins/includes/types.ts
index 0e65e2fd..66eff785 100644
--- a/src/transform/plugins/includes/types.ts
+++ b/src/transform/plugins/includes/types.ts
@@ -1,7 +1,17 @@
 import {MarkdownIt} from '../../typings';
+import {MarkdownItPluginOpts} from '../typings';
 
 export interface MarkdownItIncluded extends MarkdownIt {
     included?: {
         [key: string]: string;
     };
 }
+
+export type IncludeCollectOpts = MarkdownItPluginOpts & {
+    destPath: string;
+    copyFile(path: string, dest: string, opts: IncludeCollectOpts): string | null | undefined;
+    singlePage: Boolean;
+    included: Boolean;
+    includedParentPath?: string;
+    additionalIncludedList?: string[];
+};
diff --git a/src/transform/plugins/links/index.ts b/src/transform/plugins/links/index.ts
index 4e976c13..1fc1ee29 100644
--- a/src/transform/plugins/links/index.ts
+++ b/src/transform/plugins/links/index.ts
@@ -16,6 +16,7 @@ import {
 import {getFileTokens, isFileExists} from '../../utilsFS';
 import {CacheContext, StateCore} from '../../typings';
 import {MarkdownItPluginCb, MarkdownItPluginOpts} from '../typings';
+import {MarkdownItIncluded} from '../includes/types';
 
 function getTitleFromTokens(tokens: Token[]) {
     let title = '';
@@ -49,10 +50,11 @@ type Options = {
     currentPath: string;
     log: Logger;
     cache?: CacheContext;
+    content?: string;
 };
 
 const getTitle = (id: string | null, options: Options) => {
-    const {file, state, opts} = options;
+    const {file, state, opts, content} = options;
 
     const fileTokens = getFileTokens(file, state, {
         ...opts,
@@ -60,6 +62,7 @@ const getTitle = (id: string | null, options: Options) => {
         disableTitleRefSubstitution: true,
         disableCircularError: true,
         inheritVars: false,
+        content, // The content forces the function to use it instead of reading from the disk
     });
     const sourceTokens = id ? findBlockTokens(fileTokens, id) : fileTokens;
     return getTitleFromTokens(sourceTokens);
@@ -111,7 +114,13 @@ function getDefaultPublicPath(
 }
 
 // eslint-disable-next-line complexity
-function processLink(state: StateCore, tokens: Token[], idx: number, opts: ProcOpts) {
+function processLink(
+    md: MarkdownItIncluded,
+    state: StateCore,
+    tokens: Token[],
+    idx: number,
+    opts: ProcOpts,
+) {
     const {
         path: startPath,
         root,
@@ -147,7 +156,7 @@ function processLink(state: StateCore, tokens: Token[], idx: number, opts: ProcO
 
     if (pathname) {
         file = resolve(path.parse(currentPath).dir, pathname);
-        fileExists = isFileExists(file);
+        fileExists = md.included?.[file] || isFileExists(file);
         isPageFile = PAGE_LINK_REGEXP.test(pathname);
 
         if (isPageFile && !fileExists) {
@@ -180,7 +189,11 @@ function processLink(state: StateCore, tokens: Token[], idx: number, opts: ProcO
         isPageFile &&
         !state.env.disableTitleRefSubstitution
     ) {
+        // Check the existed included store and extract it
+        const content = md.included?.[file];
+
         addTitle({
+            content,
             hash,
             file,
             state,
@@ -213,7 +226,7 @@ function processLink(state: StateCore, tokens: Token[], idx: number, opts: ProcO
     }
 }
 
-const index: MarkdownItPluginCb<ProcOpts & Options> = (md, opts) => {
+const index: MarkdownItPluginCb<ProcOpts & Options> = (md: MarkdownItIncluded, opts) => {
     const plugin = (state: StateCore) => {
         const tokens = state.tokens;
         let i = 0;
@@ -231,7 +244,7 @@ const index: MarkdownItPluginCb<ProcOpts & Options> = (md, opts) => {
                     const isYfmAnchor = tokenClass ? tokenClass.includes('yfm-anchor') : false;
 
                     if (isLinkOpenToken && !isYfmAnchor) {
-                        processLink(state, childrenTokens, j, opts);
+                        processLink(md, state, childrenTokens, j, opts);
                     }
 
                     j++;
diff --git a/test/mocks/include-included-3-deep.expect.md b/test/mocks/include-included-3-deep.expect.md
index 31a22a5a..1dd183d0 100644
--- a/test/mocks/include-included-3-deep.expect.md
+++ b/test/mocks/include-included-3-deep.expect.md
@@ -3,22 +3,22 @@ start main
 {% include [Text](included/file-1-deep.md) %}
 
 end main
-{% included (included/file-1-deep.md) %}
-start file 1
-
-{% include [Text](file-2-deep.md) %}
+{% included (included/file-3.md) %}
+start file 3
 
-end file 1
+end file 3
+{% endincluded %}
 {% included (included/file-2-deep.md) %}
 start file 2
 
 {% include [Text](file-3.md) %}
 
 end file 2
-{% included (included/file-3.md) %}
-start file 3
-
-end file 3
-{% endincluded %}
 {% endincluded %}
+{% included (included/file-1-deep.md) %}
+start file 1
+
+{% include [Text](file-2-deep.md) %}
+
+end file 1
 {% endincluded %}
\ No newline at end of file