Skip to content

Commit

Permalink
Remove unnecessary tokenizer features
Browse files Browse the repository at this point in the history
  • Loading branch information
benbrandt committed Jun 11, 2023
1 parent cb8c92e commit 9c5773f
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 2 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
# Changelog

## v0.4.1

### What's New

- Removed unnecessary features for `tokenizers` crate to make cross-compilation easier (since tokenizer training helpers aren't needed).

## v0.4.0

### What's New
Expand Down
10 changes: 8 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "text-splitter"
version = "0.4.0"
version = "0.4.1"
authors = ["Ben Brandt <[email protected]>"]
edition = "2021"
description = "Split text into semantic chunks, up to a desired chunk size. Supports calculating length by characters and tokens (when used with large language models)."
Expand All @@ -24,12 +24,18 @@ itertools = "0.10.5"
once_cell = "1.17.2"
regex = "1.8.3"
tiktoken-rs = { version = "0.4.2", optional = true }
tokenizers = { version = "0.13.3", optional = true }
tokenizers = { version = "0.13.3", default_features = false, features = [
"onig",
], optional = true }
unicode-segmentation = "1.10.1"

[dev-dependencies]
fake = "2.6.1"
insta = { version = "1.29.0", features = ["glob", "yaml"] }
tokenizers = { version = "0.13.3", default-features = false, features = [
"onig",
"http",
] }
more-asserts = "0.3.1"

[features]
Expand Down

0 comments on commit 9c5773f

Please sign in to comment.