esm7 · esm7 · Aug 4, 2024 · Aug 4, 2024 · Aug 4, 2024 · Aug 4, 2024
diff --git a/motions/jumpToHeading.ts b/motions/jumpToHeading.ts
@@ -1,19 +1,24 @@
-import { jumpToPattern } from "../utils/jumpToPattern";
+import { Editor as CodeMirrorEditor } from "codemirror";
+import { EditorPosition } from "obsidian";
+import { isWithinMatch, jumpToPattern } from "../utils/jumpToPattern";
 import { MotionFn } from "../utils/vimApi";
 
-const HEADING_REGEX = /^#+ /gm;
+/** Naive Regex for a Markdown heading (H1 through H6). "Naive" because it does not account for
+ * whether the match is within a codeblock (e.g. it could be a Python comment, not a heading).
+ */
+const NAIVE_HEADING_REGEX = /^#{1,6} /gm;
+
+/** Regex for a Markdown fenced codeblock, which begins with some number >=3 of backticks at the
+ * start of a line. It either ends on the nearest future line that starts with at least as many
+ * backticks (\1 back-reference), or extends to the end of the string if no such future line exists.
+ */
+const FENCED_CODEBLOCK_REGEX = /(^```+)(.*?^\1|.*)/gms;
 
 /**
  * Jumps to the repeat-th next heading.
  */
 export const jumpToNextHeading: MotionFn = (cm, cursorPosition, { repeat }) => {
-  return jumpToPattern({
-    cm,
-    cursorPosition,
-    repeat,
-    regex: HEADING_REGEX,
-    direction: "next",
-  });
+  return jumpToHeading({ cm, cursorPosition, repeat, direction: "next" });
 };
 
 /**
@@ -24,11 +29,47 @@ export const jumpToPreviousHeading: MotionFn = (
   cursorPosition,
   { repeat }
 ) => {
+  return jumpToHeading({ cm, cursorPosition, repeat, direction: "previous" });
+};
+
+/**
+ * Jumps to the repeat-th heading in the given direction.
+ *
+ * Under the hood, we use the naive heading regex to find all headings, and then filter out those
+ * that are within codeblocks. `codeblockMatches` is passed in a closure to avoid repeated
+ * computation.
+ */
+function jumpToHeading({
+  cm,
+  cursorPosition,
+  repeat,
+  direction,
+}: {
+  cm: CodeMirrorEditor;
+  cursorPosition: EditorPosition;
+  repeat: number;
+  direction: "next" | "previous";
+}): EditorPosition {
+  const codeblockMatches = findAllCodeblocks(cm);
+  const filterMatch = (match: RegExpExecArray) => !isMatchWithinCodeblock(match, codeblockMatches);
   return jumpToPattern({
     cm,
     cursorPosition,
     repeat,
-    regex: HEADING_REGEX,
-    direction: "previous",
+    regex: NAIVE_HEADING_REGEX,
+    filterMatch,
+    direction,
   });
-};
+}
+
+function findAllCodeblocks(cm: CodeMirrorEditor): RegExpExecArray[] {
+  const content = cm.getValue();
+  return [...content.matchAll(FENCED_CODEBLOCK_REGEX)];
+}
+
+function isMatchWithinCodeblock(
+  match: RegExpExecArray,
+  codeblockMatches: RegExpExecArray[]
+): boolean {
+  return codeblockMatches.some((codeblockMatch) => isWithinMatch(codeblockMatch, match.index));
+}
diff --git a/motions/jumpToLink.ts b/motions/jumpToLink.ts
@@ -1,13 +1,22 @@
 import { jumpToPattern } from "../utils/jumpToPattern";
 import { MotionFn } from "../utils/vimApi";
 
-const WIKILINK_REGEX_STRING = "\\[\\[[^\\]\\]]+?\\]\\]";
-const MARKDOWN_LINK_REGEX_STRING = "\\[[^\\]]+?\\]\\([^)]+?\\)";
-const LINK_REGEX_STRING = `${WIKILINK_REGEX_STRING}|${MARKDOWN_LINK_REGEX_STRING}`;
+const WIKILINK_REGEX_STRING = "\\[\\[.*?\\]\\]";
+const MARKDOWN_LINK_REGEX_STRING = "\\[.*?\\]\\(.*?\\)";
+const URL_REGEX_STRING = "\\w+://\\S+";
+
+/**
+ * Regex for a link (which can be a wikilink, a markdown link, or a standalone URL).
+ */
+const LINK_REGEX_STRING = `${WIKILINK_REGEX_STRING}|${MARKDOWN_LINK_REGEX_STRING}|${URL_REGEX_STRING}`;
 const LINK_REGEX = new RegExp(LINK_REGEX_STRING, "g");
 
 /**
  * Jumps to the repeat-th next link.
+ *
+ * Note that since `jumpToPattern` uses `String.matchAll`, which internally updates `lastIndex`
+ * after each match, it won't catch standalone URLs within wikilinks / markdown links
+ * (which should be a good thing in most cases).
 */
 export const jumpToNextLink: MotionFn = (cm, cursorPosition, { repeat }) => {
   return jumpToPattern({

diff --git a/utils/jumpToPattern.ts b/utils/jumpToPattern.ts
@@ -9,28 +9,34 @@ import { EditorPosition } from "obsidian";
  * Under the hood, to avoid repeated loops of the document: we get all matches at once, order them
  * according to `direction` and `cursorPosition`, and use modulo arithmetic to return the
  * appropriate match.
+ *
+ * @param cm The CodeMirror editor instance.
+ * @param cursorPosition The current cursor position.
+ * @param repeat The number of times to repeat the jump (e.g. 1 to jump to the very next match). Is
+ * modulo-ed for efficiency.
+ * @param regex The regex pattern to jump to.
+ * @param filterMatch Optional filter function to run on the regex matches. Return false to ignore
+ * a given match.
+ * @param direction The direction to jump in.
  */
 export function jumpToPattern({
   cm,
   cursorPosition,
   repeat,
   regex,
+  filterMatch = () => true,
   direction,
 }: {
   cm: CodeMirrorEditor;
   cursorPosition: EditorPosition;
   repeat: number;
   regex: RegExp;
+  filterMatch?: (match: RegExpExecArray) => boolean;
   direction: "next" | "previous";
 }): EditorPosition {
   const content = cm.getValue();
   const cursorIdx = cm.indexFromPos(cursorPosition);
-  const orderedMatches = getOrderedMatches({
-    content,
-    regex,
-    cursorIdx,
-    direction,
-  });
+  const orderedMatches = getOrderedMatches({ content, regex, cursorIdx, filterMatch, direction });
   const effectiveRepeat = (repeat % orderedMatches.length) || orderedMatches.length;
   const matchIdx = orderedMatches[effectiveRepeat - 1]?.index;
   if (matchIdx === undefined) {
@@ -48,17 +54,20 @@ function getOrderedMatches({
   content,
   regex,
   cursorIdx,
+  filterMatch,
   direction,
 }: {
   content: string;
   regex: RegExp;
   cursorIdx: number;
+  filterMatch: (match: RegExpExecArray) => boolean;
   direction: "next" | "previous";
 }): RegExpExecArray[] {
   const { previousMatches, currentMatches, nextMatches } = getAndGroupMatches(
     content,
     regex,
-    cursorIdx
+    cursorIdx,
+    filterMatch
   );
   if (direction === "next") {
     return [...nextMatches, ...previousMatches, ...currentMatches];
@@ -77,20 +86,19 @@ function getOrderedMatches({
 function getAndGroupMatches(
   content: string,
   regex: RegExp,
-  cursorIdx: number
+  cursorIdx: number,
+  filterMatch: (match: RegExpExecArray) => boolean
 ): {
   previousMatches: RegExpExecArray[];
   currentMatches: RegExpExecArray[];
   nextMatches: RegExpExecArray[];
 } {
   const globalRegex = makeGlobalRegex(regex);
-  const allMatches = [...content.matchAll(globalRegex)];
+  const allMatches = [...content.matchAll(globalRegex)].filter(filterMatch);
   const previousMatches = allMatches.filter(
-    (match) => match.index < cursorIdx && !isCursorOnMatch(match, cursorIdx)
-  );
-  const currentMatches = allMatches.filter((match) =>
-    isCursorOnMatch(match, cursorIdx)
+    (match) => match.index < cursorIdx && !isWithinMatch(match, cursorIdx)
   );
+  const currentMatches = allMatches.filter((match) => isWithinMatch(match, cursorIdx));
   const nextMatches = allMatches.filter((match) => match.index > cursorIdx);
   return { previousMatches, currentMatches, nextMatches };
 }
@@ -105,6 +113,6 @@ function getGlobalFlags(regex: RegExp): string {
   return flags.includes("g") ? flags : `${flags}g`;
 }
 
-function isCursorOnMatch(match: RegExpExecArray, cursorIdx: number): boolean {
-  return match.index <= cursorIdx && cursorIdx < match.index + match[0].length;
+export function isWithinMatch(match: RegExpExecArray, index: number): boolean {
+  return match.index <= index && index < match.index + match[0].length;
 }