diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 45f0c43..597cbf1 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -10,10 +10,10 @@ updates: schedule: interval: "daily" - # - package-ecosystem: "docker" - # directory: "/" - # schedule: - # interval: "daily" + - package-ecosystem: "cargo" + directory: "/bindings/python" + schedule: + interval: "daily" - package-ecosystem: "github-actions" directory: "/" diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f62a8cb..1eaaa2e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -97,3 +97,17 @@ jobs: toolchain: ${{ matrix.msrv }} - name: cargo +${{ matrix.msrv }} check run: cargo check + + minimal-versions: + name: Check minimal versions + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v3 + - uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly + - uses: Swatinem/rust-cache@v1 + + - run: cargo update --workspace -Zdirect-minimal-versions + - run: cargo test --workspace --all-features diff --git a/CHANGELOG.md b/CHANGELOG.md index 3504be0..337b6bb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## v0.4.2 + +### What's New + +- Loosen version requirement for peer dependencies (specifically `tiktoken-rs` now supports `>=v02.0, <0.6.0`) + ## v0.4.1 ### What's New diff --git a/Cargo.toml b/Cargo.toml index 3d137ac..1c24954 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-splitter" -version = "0.4.1" +version = "0.4.2" authors = ["Ben Brandt "] edition = "2021" description = "Split text into semantic chunks, up to a desired chunk size. Supports calculating length by characters and tokens (when used with large language models)." @@ -18,21 +18,21 @@ rustdoc-args = ["--cfg", "docsrs"] # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -auto_enums = "0.8.0" +auto_enums = "0.8.1" either = "1.8.1" -itertools = "0.10.5" -once_cell = "1.17.2" -regex = "1.8.3" -tiktoken-rs = { version = "0.4.2", optional = true } -tokenizers = { version = "0.13.3", default_features = false, features = [ +itertools = "0.11.0" +once_cell = "1.18.0" +regex = "1.8.4" +tiktoken-rs = { version = ">=0.2.0, <0.6.0", optional = true } +tokenizers = { version = ">=0.13.3, <0.14.0", default_features = false, features = [ "onig", ], optional = true } unicode-segmentation = "1.10.1" [dev-dependencies] fake = "2.6.1" -insta = { version = "1.29.0", features = ["glob", "yaml"] } -tokenizers = { version = "0.13.3", default-features = false, features = [ +insta = { version = "1.30.0", features = ["glob", "yaml"] } +tokenizers = { version = ">=0.13.3, <0.14.0", default-features = false, features = [ "onig", "http", ] } diff --git a/README.md b/README.md index bac7ecc..2338210 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ let chunks = splitter.chunks("your document text", max_characters); ### With Huggingface Tokenizer +Requires the `tokenizers` feature to be activated. + ```rust use text_splitter::TextSplitter; // Can also use anything else that implements the ChunkSizer @@ -45,6 +47,8 @@ let chunks = splitter.chunks("your document text", max_tokens); ### With Tiktoken Tokenizer +Requires the `tiktoken-rs` feature to be activated. + ```rust use text_splitter::TextSplitter; // Can also use anything else that implements the ChunkSizer diff --git a/src/lib.rs b/src/lib.rs index b8ada2d..66d0b79 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -29,6 +29,8 @@ let chunks = splitter.chunks("your document text", max_characters); ### With Huggingface Tokenizer +Requires the `tokenizers` feature to be activated. + ```rust use text_splitter::TextSplitter; // Can also use anything else that implements the ChunkSizer @@ -46,6 +48,8 @@ let chunks = splitter.chunks("your document text", max_tokens); ### With Tiktoken Tokenizer +Requires the `tiktoken-rs` feature to be activated. + ```rust use text_splitter::TextSplitter; // Can also use anything else that implements the ChunkSizer