From bd3fe6046ea3c305df1dc4c6a15a8a9daa814524 Mon Sep 17 00:00:00 2001 From: FullMetalMeowchemist <117529599+FullMetalMeowchemist@users.noreply.github.com> Date: Tue, 14 Nov 2023 00:42:52 -0600 Subject: [PATCH] Fix minor typo in readme (#53) --- bindings/python/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bindings/python/README.md b/bindings/python/README.md index 33435350..12ba4bdc 100644 --- a/bindings/python/README.md +++ b/bindings/python/README.md @@ -28,12 +28,12 @@ from semantic_text_splitter import HuggingFaceTextSplitter from tokenizers import Tokenizer # Maximum number of tokens in a chunk -max_characters = 1000 +max_tokens = 1000 # Optionally can also have the splitter not trim whitespace for you tokenizer = Tokenizer.from_pretrained("bert-base-uncased") splitter = HuggingFaceTextSplitter(tokenizer, trim_chunks=False) -chunks = splitter.chunks("your document text", max_characters) +chunks = splitter.chunks("your document text", max_tokens) ``` ### With Tiktoken Tokenizer