From abf77777f5d8ad5a203b1b24688de354f7e375d6 Mon Sep 17 00:00:00 2001 From: Ben Brandt Date: Mon, 25 Mar 2024 23:33:12 +0100 Subject: [PATCH] Update documentation to remove extra level for text in markdown --- README.md | 15 +++++++-------- bindings/python/README.md | 15 +++++++-------- bindings/python/semantic_text_splitter.pyi | 15 +++++++-------- bindings/python/src/lib.rs | 15 +++++++-------- src/markdown.rs | 15 +++++++-------- 5 files changed, 35 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 9656531..b5803e7 100644 --- a/README.md +++ b/README.md @@ -164,14 +164,13 @@ Markdown is parsed according to the CommonMark spec, along with some optional fe 3. [Unicode Word Boundaries](https://www.unicode.org/reports/tr29/#Word_Boundaries) 4. [Unicode Sentence Boundaries](https://www.unicode.org/reports/tr29/#Sentence_Boundaries) 5. Soft line breaks (single newline) which isn't necessarily a new element in Markdown. -6. Text nodes within elements -7. Inline elements such as: emphasis, strong, strikethrough, link, image, table cells, inline code, footnote references, task list markers, and inline html. -8. Block elements suce as: paragraphs, code blocks, and footnote definitions. -9. Container blocks such as: table rows, block quotes, list items, and HTML blocks. -10. Meta containers such as: lists and tables. -11. Thematic breaks or horizontal rules. -12. Headings by level -13. Metadata at the beginning of the document +6. Inline elements such as: text nodes, emphasis, strong, strikethrough, link, image, table cells, inline code, footnote references, task list markers, and inline html. +7. Block elements suce as: paragraphs, code blocks, and footnote definitions. +8. Container blocks such as: table rows, block quotes, list items, and HTML blocks. +9. Meta containers such as: lists and tables. +10. Thematic breaks or horizontal rules. +11. Headings by level +12. Metadata at the beginning of the document Splitting doesn't occur below the character level, otherwise you could get partial bytes of a char, which may not be a valid unicode str. diff --git a/bindings/python/README.md b/bindings/python/README.md index 1699cd2..6cd8b50 100644 --- a/bindings/python/README.md +++ b/bindings/python/README.md @@ -121,14 +121,13 @@ Markdown is parsed according to the CommonMark spec, along with some optional fe 3. [Unicode Word Boundaries](https://www.unicode.org/reports/tr29/#Word_Boundaries) 4. [Unicode Sentence Boundaries](https://www.unicode.org/reports/tr29/#Sentence_Boundaries) 5. Soft line breaks (single newline) which isn't necessarily a new element in Markdown. -6. Text nodes within elements -7. Inline elements such as: emphasis, strong, strikethrough, link, image, table cells, inline code, footnote references, task list markers, and inline html. -8. Block elements suce as: paragraphs, code blocks, and footnote definitions. -9. Container blocks such as: table rows, block quotes, list items, and HTML blocks. -10. Meta containers such as: lists and tables. -11. Thematic breaks or horizontal rules. -12. Headings by level -13. Metadata at the beginning of the document +6. Inline elements such as: text nodes, emphasis, strong, strikethrough, link, image, table cells, inline code, footnote references, task list markers, and inline html. +7. Block elements suce as: paragraphs, code blocks, and footnote definitions. +8. Container blocks such as: table rows, block quotes, list items, and HTML blocks. +9. Meta containers such as: lists and tables. +10. Thematic breaks or horizontal rules. +11. Headings by level +12. Metadata at the beginning of the document Splitting doesn't occur below the character level, otherwise you could get partial bytes of a char, which may not be a valid unicode str. diff --git a/bindings/python/semantic_text_splitter.pyi b/bindings/python/semantic_text_splitter.pyi index 5237348..78f9fbd 100644 --- a/bindings/python/semantic_text_splitter.pyi +++ b/bindings/python/semantic_text_splitter.pyi @@ -392,14 +392,13 @@ class MarkdownSplitter: 3. [Unicode Word Boundaries](https://www.unicode.org/reports/tr29/#Word_Boundaries) 4. [Unicode Sentence Boundaries](https://www.unicode.org/reports/tr29/#Sentence_Boundaries) 5. Soft line breaks (single newline) which isn't necessarily a new element in Markdown. - 6. Text nodes within elements - 7. Inline elements such as: emphasis, strong, strikethrough, link, image, table cells, inline code, footnote references, task list markers, and inline html. - 8. Block elements suce as: paragraphs, code blocks, and footnote definitions. - 9. Container blocks such as: table rows, block quotes, list items, and HTML blocks. - 10. Meta containers such as: lists and tables. - 11. Thematic breaks or horizontal rules. - 12. Headings by level - 13. Metadata at the beginning of the document + 6. Inline elements such as: text nodes, emphasis, strong, strikethrough, link, image, table cells, inline code, footnote references, task list markers, and inline html. + 7. Block elements suce as: paragraphs, code blocks, and footnote definitions. + 8. Container blocks such as: table rows, block quotes, list items, and HTML blocks. + 9. Meta containers such as: lists and tables. + 10. Thematic breaks or horizontal rules. + 11. Headings by level + 12. Metadata at the beginning of the document Markdown is parsed according to the Commonmark spec, along with some optional features such as GitHub Flavored Markdown. diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index e3ea4f2..e68789c 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -632,14 +632,13 @@ impl PyMarkdownSplitter { 3. [Unicode Word Boundaries](https://www.unicode.org/reports/tr29/#Word_Boundaries) 4. [Unicode Sentence Boundaries](https://www.unicode.org/reports/tr29/#Sentence_Boundaries) 5. Soft line breaks (single newline) which isn't necessarily a new element in Markdown. - 6. Text nodes within elements - 7. Inline elements such as: emphasis, strong, strikethrough, link, image, table cells, inline code, footnote references, task list markers, and inline html. - 8. Block elements suce as: paragraphs, code blocks, and footnote definitions. - 9. Container blocks such as: table rows, block quotes, list items, and HTML blocks. - 10. Meta containers such as: lists and tables. - 11. Thematic breaks or horizontal rules. - 12. Headings by level - 13. Metadata at the beginning of the document + 6. Inline elements such as: text nodes, emphasis, strong, strikethrough, link, image, table cells, inline code, footnote references, task list markers, and inline html. + 7. Block elements suce as: paragraphs, code blocks, and footnote definitions. + 8. Container blocks such as: table rows, block quotes, list items, and HTML blocks. + 9. Meta containers such as: lists and tables. + 10. Thematic breaks or horizontal rules. + 11. Headings by level + 12. Metadata at the beginning of the document Markdown is parsed according to the Commonmark spec, along with some optional features such as GitHub Flavored Markdown. diff --git a/src/markdown.rs b/src/markdown.rs index dba0e00..1722eb9 100644 --- a/src/markdown.rs +++ b/src/markdown.rs @@ -93,14 +93,13 @@ where /// 3. [Unicode Word Boundaries](https://www.unicode.org/reports/tr29/#Word_Boundaries) /// 4. [Unicode Sentence Boundaries](https://www.unicode.org/reports/tr29/#Sentence_Boundaries) /// 5. Soft line breaks (single newline) which isn't necessarily a new element in Markdown. - /// 6. Text nodes within elements - /// 7. Inline elements such as: emphasis, strong, strikethrough, link, image, table cells, inline code, footnote references, task list markers, and inline html. - /// 8. Block elements suce as: paragraphs, code blocks, and footnote definitions. - /// 9. Container blocks such as: table rows, block quotes, list items, and HTML blocks. - /// 10. Meta containers such as: lists and tables. - /// 11. Thematic breaks or horizontal rules. - /// 12. Headings by level - /// 13. Metadata at the beginning of the document + /// 6. Inline elements such as: text nodes, emphasis, strong, strikethrough, link, image, table cells, inline code, footnote references, task list markers, and inline html. + /// 7. Block elements suce as: paragraphs, code blocks, and footnote definitions. + /// 8. Container blocks such as: table rows, block quotes, list items, and HTML blocks. + /// 9. Meta containers such as: lists and tables. + /// 10. Thematic breaks or horizontal rules. + /// 11. Headings by level + /// 12. Metadata at the beginning of the document /// /// Splitting doesn't occur below the character level, otherwise you could get partial bytes of a char, which may not be a valid unicode str. ///