Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make skip_tokens an input for VocabDecode (parametrize detokenization/decoding) #325

Merged
merged 2 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 4 additions & 6 deletions python/openvino_tokenizers/hf_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,8 +375,8 @@ def decoding(self) -> None:
if self.tokenizer_json["decoder"] is None or self.tokenizer_json["model"]["type"] == "WordPiece":
return

skip_tokens = parse_special_tokens(self.original_tokenizer) if self.skip_special_tokens else {}
self.pipeline.add_steps(VocabDecoderStep(skip_tokens=list(skip_tokens)))
skip_tokens = parse_special_tokens(self.original_tokenizer)
self.pipeline.add_steps(VocabDecoderStep(skip_tokens=list(skip_tokens), do_skip_tokens=self.skip_special_tokens))

if self.tokenizer_json["decoder"]["type"] == "Sequence":
for decoder_dict in self.tokenizer_json["decoder"]["decoders"]:
Expand Down Expand Up @@ -1013,9 +1013,7 @@ def convert_tiktoken_model_tokenizer(
split_pattern = encoding._pat_str

pipeline = TokenizerPipeline()
skip_tokens = []
if params.skip_special_tokens:
skip_tokens = list(parse_special_tokens(hf_tokenizer))
skip_tokens = list(parse_special_tokens(hf_tokenizer))

add_prefix_steps = []
if hasattr(hf_tokenizer, "get_prefix_tokens") and params.add_special_tokens:
Expand Down Expand Up @@ -1048,7 +1046,7 @@ def convert_tiktoken_model_tokenizer(

pipeline.add_steps(
[
VocabDecoderStep(vocab=decoder_vocab, skip_tokens=skip_tokens),
VocabDecoderStep(vocab=decoder_vocab, skip_tokens=skip_tokens, do_skip_tokens=params.skip_special_tokens),
FuseStep(),
]
)
Expand Down
16 changes: 14 additions & 2 deletions python/openvino_tokenizers/tokenizer_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from typing import Any, Dict, Iterable, List, Optional, Tuple, Union

import numpy as np
from openvino.runtime import Model, Output, PartialShape, Type, op
from openvino.runtime import Model, Output, PartialShape, Type, op, Shape
from openvino.runtime import opset12 as opset
from openvino.runtime.exceptions import OVTypeError, UserInputError
from openvino.runtime.utils.types import as_node, make_constant_node
Expand Down Expand Up @@ -1008,6 +1008,7 @@ class DecodingStep(BasePipelineStep):
class VocabDecoderStep(DecodingStep):
vocab: Optional[List[str]] = None
skip_tokens: Optional[List[int]] = None
do_skip_tokens: Optional[bool] = True

def finalize(self) -> None:
pipeline = self.get_pipeline()
Expand All @@ -1025,7 +1026,18 @@ def get_ov_subgraph(self, input_nodes: List[Output]) -> List[Output]:
else:
vocab_outputs = self.create_string_constant_node(self.vocab).outputs()
input_nodes.extend(vocab_outputs)
return _get_factory().create("VocabDecoder", input_nodes, {"skip_tokens": self.skip_tokens}).outputs()

# Put constant with skip tokens even if do_skip_tokens=False, so that it can be switched on/off at runtime.
# Slice through all skip tokens if flag is true, else slice to get an empty tensor.
stop_const = op.Constant(Type.i32, Shape([1]), [np.iinfo(np.int32).max if self.do_skip_tokens else 0])

zero_const = op.Constant(Type.i32, Shape([1]), [0])
one_const = op.Constant(Type.i32, Shape([1]), [1])
skip_tokens_const = op.Constant(Type.i32, Shape([len(self.skip_tokens)]), self.skip_tokens)
sliced_skips = opset.slice(skip_tokens_const, zero_const, stop_const, one_const).outputs()
input_nodes.extend(sliced_skips)

return _get_factory().create("VocabDecoder", input_nodes).outputs()


@dataclass
Expand Down
14 changes: 11 additions & 3 deletions src/vocab_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,20 @@ bool VocabDecoder::evaluate(ov::TensorVector& outputs, const ov::TensorVector& i
auto seq_len = inputs[0].get_shape()[1];
auto input_data = inputs[0].data<const int32_t>();

auto vocab_size = inputs[1].get_size();
auto vocab_begins = inputs[1].data<const int32_t>();
auto vocab_ends = inputs[2].data<const int32_t>();
auto vocab_chars = inputs[3].data<const uint8_t>();
auto vocab_size = inputs[1].get_size();

OPENVINO_ASSERT(inputs.size() == 4, "Too few inputs passed to VocabDecoder, it means it is not converted properly or it is not used in the supported pattern");
OPENVINO_ASSERT(inputs.size() == 4 || inputs.size() == 5, "Too few inputs passed to VocabDecoder, it means it is not converted properly or it is not used in the supported pattern");

// Use skip tokens from input if specified, otherwise use the attribute.
std::vector<int> skip_tokens;
if (inputs.size() == 5) {
skip_tokens = std::vector<int>(inputs[4].data<int32_t>(), inputs[4].data<int32_t>() + inputs[4].get_shape()[0]);
} else {
skip_tokens = m_skip_tokens;
}

// Set output shapes
outputs[0].set_shape({batch_size});
Expand Down Expand Up @@ -61,7 +69,7 @@ bool VocabDecoder::evaluate(ov::TensorVector& outputs, const ov::TensorVector& i
new_begins[seq] = buffer.size();
if (
token_id < vocab_size
&& std::find(m_skip_tokens.begin(), m_skip_tokens.end(), token_id) == m_skip_tokens.end()
&& std::find(skip_tokens.begin(), skip_tokens.end(), token_id) == skip_tokens.end()
) {
buffer.insert(
buffer.end(),
Expand Down
15 changes: 14 additions & 1 deletion src/vocab_decoder.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,24 @@
#include <vector>
#include <openvino/op/op.hpp>

/**
* @class VocabDecoder
* @brief The VocabDecoder class is an OpenVINO operation that decodes vocabulary tokens.
*
* This class inherits from the ov::op::Op base class and provides functionality to decode
* vocabulary tokens while skipping specified tokens.
* @param input_data
* @param vocab_begins
* @param vocab_ends
* @param vocab_chars
* @param skip_tokens input has priority over the attribute.
* @param skip_tokens attribute is used only when skip_tokens input is not provided.
*/
class VocabDecoder : public ov::op::Op {
public:
OPENVINO_OP("VocabDecoder");

VocabDecoder () = default;
VocabDecoder () = default;
VocabDecoder(
const ov::OutputVector& arguments,
std::vector<int> skip_tokens
Expand Down
Loading