diff --git a/bindings/python/CHANGELOG.md b/bindings/python/CHANGELOG.md index 067d9926..4f40648f 100644 --- a/bindings/python/CHANGELOG.md +++ b/bindings/python/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## v0.1.3 + +Rename package to `semantic-text-splitter` so it can actually be uploaded to PyPi. + ## v0.1.2 Fix bad release diff --git a/bindings/python/Cargo.lock b/bindings/python/Cargo.lock index b577cffd..50cd18a1 100644 --- a/bindings/python/Cargo.lock +++ b/bindings/python/Cargo.lock @@ -243,6 +243,14 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd" +[[package]] +name = "semantic-text-splitter" +version = "0.1.3" +dependencies = [ + "pyo3", + "text-splitter", +] + [[package]] name = "smallvec" version = "1.10.0" @@ -280,14 +288,6 @@ dependencies = [ "unicode-segmentation", ] -[[package]] -name = "text-splitter-py" -version = "0.1.2" -dependencies = [ - "pyo3", - "text-splitter", -] - [[package]] name = "unicode-ident" version = "1.0.9" diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml index 8ae164e0..55af83b1 100644 --- a/bindings/python/Cargo.toml +++ b/bindings/python/Cargo.toml @@ -1,6 +1,6 @@ [package] -name = "text-splitter-py" -version = "0.1.2" +name = "semantic-text-splitter" +version = "0.1.3" authors = ["Ben Brandt "] edition = "2021" description = "Split text into semantic chunks, up to a desired chunk size. Supports calculating length by characters and tokens (when used with large language models)." @@ -10,7 +10,7 @@ keywords = ["text", "split", "tokenizer", "nlp", "ai"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [lib] -name = "text_splitter" +name = "semantic_text_splitter" crate-type = ["cdylib"] [dependencies] diff --git a/bindings/python/README.md b/bindings/python/README.md index d456fb56..824463b3 100644 --- a/bindings/python/README.md +++ b/bindings/python/README.md @@ -11,7 +11,7 @@ This crate provides methods for splitting longer pieces of text into smaller chu ### By Number of Characters ```python -from text_splitter import CharacterTextSplitter +from semantic_text_splitter import CharacterTextSplitter # Maximum number of characters in a chunk max_characters = 1000 @@ -30,7 +30,7 @@ Once a chunk has reached a length that falls within the range it will be returne It is always possible that a chunk may be returned that is less than the `start` value, as adding the next piece of text may have made it larger than the `end` capacity. ```python -from text_splitter import CharacterTextSplitter +from semantic_text_splitter import CharacterTextSplitter # Optionally can also have the splitter trim whitespace for you splitter = CharacterTextSplitter() diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml index 8c992f74..f1cc5026 100644 --- a/bindings/python/pyproject.toml +++ b/bindings/python/pyproject.toml @@ -3,7 +3,7 @@ requires = ["maturin>=1.0,<2.0"] build-backend = "maturin" [project] -name = "text-splitter" +name = "semantic-text-splitter" requires-python = ">=3.7" classifiers = [ "Programming Language :: Rust", diff --git a/bindings/python/text_splitter.pyi b/bindings/python/semantic_text_splitter.pyi similarity index 95% rename from bindings/python/text_splitter.pyi rename to bindings/python/semantic_text_splitter.pyi index 9d52e8a0..c630ad86 100644 --- a/bindings/python/text_splitter.pyi +++ b/bindings/python/semantic_text_splitter.pyi @@ -7,7 +7,7 @@ class CharacterTextSplitter: ### By Number of Characters ```python - from text_splitter import CharacterTextSplitter + from semantic_text_splitter import CharacterTextSplitter # Maximum number of characters in a chunk max_characters = 1000 @@ -26,7 +26,7 @@ class CharacterTextSplitter: It is always possible that a chunk may be returned that is less than the `start` value, as adding the next piece of text may have made it larger than the `end` capacity. ```python - from text_splitter import CharacterTextSplitter + from semantic_text_splitter import CharacterTextSplitter # Optionally can also have the splitter trim whitespace for you splitter = CharacterTextSplitter() diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs index e476588f..f6ca9aa8 100644 --- a/bindings/python/src/lib.rs +++ b/bindings/python/src/lib.rs @@ -14,8 +14,8 @@ // pyo3 uses this #![allow(elided_lifetimes_in_paths)] -use ::text_splitter::{Characters, ChunkCapacity, TextSplitter}; use pyo3::prelude::*; +use text_splitter::{Characters, ChunkCapacity, TextSplitter}; /// Custom chunk capacity for python to make it easier to work /// with python arguments @@ -48,7 +48,7 @@ Plain-text splitter. Recursively splits chunks into the largest semantic units t ### By Number of Characters ```python -from text_splitter import CharacterTextSplitter +from semantic_text_splitter import CharacterTextSplitter # Maximum number of characters in a chunk max_characters = 1000 @@ -67,7 +67,7 @@ Once a chunk has reached a length that falls within the range it will be returne It is always possible that a chunk may be returned that is less than the `start` value, as adding the next piece of text may have made it larger than the `end` capacity. ```python -from text_splitter import CharacterTextSplitter +from semantic_text_splitter import CharacterTextSplitter # Optionally can also have the splitter trim whitespace for you splitter = CharacterTextSplitter() @@ -142,7 +142,7 @@ This crate provides methods for splitting longer pieces of text into smaller chu ### By Number of Characters ```python -from text_splitter import CharacterTextSplitter +from semantic_text_splitter import CharacterTextSplitter # Maximum number of characters in a chunk max_characters = 1000 @@ -161,7 +161,7 @@ Once a chunk has reached a length that falls within the range it will be returne It is always possible that a chunk may be returned that is less than the `start` value, as adding the next piece of text may have made it larger than the `end` capacity. ```python -from text_splitter import CharacterTextSplitter +from semantic_text_splitter import CharacterTextSplitter # Optionally can also have the splitter trim whitespace for you splitter = CharacterTextSplitter() @@ -203,7 +203,7 @@ This crate was inspired by [LangChain's TextSplitter](https://python.langchain.c A big thank you to the unicode-rs team for their [unicode-segmentation](https://crates.io/crates/unicode-segmentation) crate that manages a lot of the complexity of matching the Unicode rules for words and sentences. **/ #[pymodule] -fn text_splitter(_py: Python, m: &PyModule) -> PyResult<()> { +fn semantic_text_splitter(_py: Python, m: &PyModule) -> PyResult<()> { m.add_class::()?; Ok(()) } diff --git a/bindings/python/tests/test_integration.py b/bindings/python/tests/test_integration.py index d86f7166..676a0a8f 100644 --- a/bindings/python/tests/test_integration.py +++ b/bindings/python/tests/test_integration.py @@ -1,4 +1,4 @@ -from text_splitter import CharacterTextSplitter +from semantic_text_splitter import CharacterTextSplitter def test_chunks():