-
Notifications
You must be signed in to change notification settings - Fork 3
Home
Minhao Chou edited this page Apr 5, 2022
·
3 revisions
tokenizers::BertTokenizer::Options options{};
options.vocab_file = /bert/vocab/file/path;
std::unique_ptr<BertTokenizer> tokenizer = tokenizers::BertTokenizer::CreateTokenizer(options);
std::vector<std::string> texts = {"bert tokenizer", "gpt tokenizer"};
std::vector<EncodeOutput> batch_outputs = tokenizer->BatchEncode(&texts, nullptr, /*max_length=*/512);
std::vector<std::string> a_texts = {"bert tokenizer", "gpt tokenizer"};
std::vector<std::string> b_texts = {"transformer encoder", "transformer decoder"};
std::vector<EncodeOutput> batch_outputs = tokenizer->BatchEncode(&a_texts, &b_texts, /*max_length=*/512);