diff --git a/bindings/python/README.md b/bindings/python/README.md index 33435350..12ba4bdc 100644 --- a/bindings/python/README.md +++ b/bindings/python/README.md @@ -28,12 +28,12 @@ from semantic_text_splitter import HuggingFaceTextSplitter from tokenizers import Tokenizer # Maximum number of tokens in a chunk -max_characters = 1000 +max_tokens = 1000 # Optionally can also have the splitter not trim whitespace for you tokenizer = Tokenizer.from_pretrained("bert-base-uncased") splitter = HuggingFaceTextSplitter(tokenizer, trim_chunks=False) -chunks = splitter.chunks("your document text", max_characters) +chunks = splitter.chunks("your document text", max_tokens) ``` ### With Tiktoken Tokenizer