From 9c5773fcac7ec8f670b2c08784215dbb103feb72 Mon Sep 17 00:00:00 2001 From: Ben Brandt Date: Sun, 11 Jun 2023 07:24:58 +0200 Subject: [PATCH] Remove unnecessary tokenizer features --- CHANGELOG.md | 6 ++++++ Cargo.toml | 10 ++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 307f53b..3504be0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # Changelog +## v0.4.1 + +### What's New + +- Removed unnecessary features for `tokenizers` crate to make cross-compilation easier (since tokenizer training helpers aren't needed). + ## v0.4.0 ### What's New diff --git a/Cargo.toml b/Cargo.toml index 0570ff4..3d137ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "text-splitter" -version = "0.4.0" +version = "0.4.1" authors = ["Ben Brandt "] edition = "2021" description = "Split text into semantic chunks, up to a desired chunk size. Supports calculating length by characters and tokens (when used with large language models)." @@ -24,12 +24,18 @@ itertools = "0.10.5" once_cell = "1.17.2" regex = "1.8.3" tiktoken-rs = { version = "0.4.2", optional = true } -tokenizers = { version = "0.13.3", optional = true } +tokenizers = { version = "0.13.3", default_features = false, features = [ + "onig", +], optional = true } unicode-segmentation = "1.10.1" [dev-dependencies] fake = "2.6.1" insta = { version = "1.29.0", features = ["glob", "yaml"] } +tokenizers = { version = "0.13.3", default-features = false, features = [ + "onig", + "http", +] } more-asserts = "0.3.1" [features]