Skip to content

Commit

Permalink
[TF FE] Add testing StringLower and TextVectorization operations on n…
Browse files Browse the repository at this point in the history
…on-ASCII sentences (openvinotoolkit#23641)

**Details:** Add testing non-ASCII sentences for StringLower operation.
Needs to be merged after
openvinotoolkit/openvino_tokenizers#80.

**Ticket:** 135752

---------

Signed-off-by: Kazantsev, Roman <[email protected]>
  • Loading branch information
rkazants authored and alvoron committed Apr 29, 2024
1 parent bdd1479 commit 8bc8e93
Show file tree
Hide file tree
Showing 3 changed files with 33 additions and 14 deletions.
7 changes: 6 additions & 1 deletion tests/layer_tests/common/utils/common_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,12 @@ def allclose(cur_array, ref_array, atol, rtol):
# so we have to align formats of both string tensors, for example, to unicode
if cur_array.dtype.type != ref_array.dtype.type:
cur_array = cur_array.astype('U')
ref_array = ref_array.astype('U')
try:
ref_array = ref_array.astype('U')
except:
# ref_array of object type and each element must be utf-8 decoded
utf8_decoded_elems = [elem.decode('UTF-8') for elem in ref_array.flatten()]
ref_array = np.array(utf8_decoded_elems, dtype=str).reshape(ref_array.shape)
return np.array_equal(cur_array, ref_array)
elif cur_array.dtype == bool:
abs_diff = np.absolute(cur_array ^ ref_array)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@ def _prepare_input(self, inputs_info):
assert 'text_input' in inputs_info
input_shape = inputs_info['text_input']
inputs_data = {}
strings_dictionary = ['hi OpenVINO here ', ' hello OpenVINO there', 'hello PyTorch here ',
' hi TensorFlow here', ' hi JAX here \t']
inputs_data['text_input'] = rng.choice(strings_dictionary, input_shape)
inputs_data['text_input'] = rng.choice(self.strings_dictionary, input_shape)
return inputs_data

def create_text_vectorization_net(self, input_shapes, vocabulary, output_mode, output_sequence_length):
def create_text_vectorization_net(self, input_shapes, vocabulary, output_mode, output_sequence_length,
strings_dictionary):
assert len(input_shapes) > 0
self.strings_dictionary = strings_dictionary
tf.keras.backend.clear_session()
text_input = tf.keras.Input(shape=input_shapes[0][1:], name='text_input',
dtype=tf.string)
Expand All @@ -36,13 +36,22 @@ def create_text_vectorization_net(self, input_shapes, vocabulary, output_mode, o
return tf2_net, None

@pytest.mark.parametrize('input_shapes', [[[1, 1]], [[3, 1]]])
@pytest.mark.parametrize('vocabulary', [['hello', 'there', 'OpenVINO', 'check']])
@pytest.mark.parametrize('strings_dictionary',
[['hi OpenVINO here ', ' hello OpenVINO there', 'hello PyTorch here ',
' hi TensorFlow here', ' hi JAX here \t'],
['привет ОПЕНВИНО здесь ', ' привет ОпенВИНО там', 'привет Пайторч здесь ',
' привет ТензорФлоу здесь', ' привет ДЖАКС там \t'],
['這裡你好 OpenVINO ', '你好 OpenVINO 那裡', '你好這裡 PyTorch ',
' 這裡是 TensorFlow', ' 這裡是 JAX \t']
])
@pytest.mark.parametrize('vocabulary', [['hello', 'there', 'OpenVINO', 'check', 'привет',
'ОПЕНВИНО', 'здесь', 'там', '你好', '那裡', '檢查']])
@pytest.mark.parametrize('output_mode', ['int'])
@pytest.mark.parametrize('output_sequence_length', [32, 64])
@pytest.mark.precommit_tf_fe
@pytest.mark.nightly
def test_text_vectorization(self, input_shapes, vocabulary, output_mode, output_sequence_length, ie_device,
precision, ir_version, temp_dir, use_legacy_frontend):
def test_text_vectorization(self, input_shapes, vocabulary, output_mode, output_sequence_length, strings_dictionary,
ie_device, precision, ir_version, temp_dir, use_legacy_frontend):
if platform.system() in ('Darwin') or platform.machine() in ['arm', 'armv7l',
'aarch64',
'arm64',
Expand All @@ -53,5 +62,6 @@ def test_text_vectorization(self, input_shapes, vocabulary, output_mode, output_
params['vocabulary'] = vocabulary
params['output_mode'] = output_mode
params['output_sequence_length'] = output_sequence_length
params['strings_dictionary'] = strings_dictionary
self._test(*self.create_text_vectorization_net(**params), ie_device, precision,
temp_dir=temp_dir, ir_version=ir_version, use_legacy_frontend=use_legacy_frontend, **params)
16 changes: 10 additions & 6 deletions tests/layer_tests/tensorflow_tests/test_tf_StringLower.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,13 @@ def _prepare_input(self, inputs_info):
assert 'input:0' in inputs_info
input_shape = inputs_info['input:0']
inputs_data = {}
# TODO: add non ASCII symbols, fix comparator for output string tensors
strings_dictionary = ['UPPER CASE SENTENCE', 'lower case sentence', ' UppEr LoweR CAse SENtence', ' ']
sample_data = rng.choice(strings_dictionary, input_shape)
sample_data = rng.choice(self.strings_dictionary, input_shape)
inputs_data['input:0'] = sample_data
return inputs_data

def create_string_lower_net(self, input_shape, encoding):
def create_string_lower_net(self, input_shape, encoding, strings_dictionary):
self.encoding = encoding
self.strings_dictionary = strings_dictionary

tf.compat.v1.reset_default_graph()
with tf.compat.v1.Session() as sess:
Expand All @@ -39,14 +38,19 @@ def create_string_lower_net(self, input_shape, encoding):

@pytest.mark.parametrize("encoding", [None, '', 'utf-8'])
@pytest.mark.parametrize("input_shape", [[], [2], [3, 4], [1, 3, 2]])
@pytest.mark.parametrize("strings_dictionary",
[['UPPER CASE SENTENCE', 'lower case sentence', ' UppEr LoweR CAse SENtence', ' '],
['Первое Предложение', 'второе предложение', ' ', ' ТРЕТЬЕ ПРЕДЛОЖЕНИЕ '],
['第一句話在這裡', '第二句話在這裡', '第三句話在這裡']])
@pytest.mark.precommit_tf_fe
@pytest.mark.nightly
@pytest.mark.xfail(condition=platform.system() in ('Darwin', 'Linux') and platform.machine() in ['arm', 'armv7l',
'aarch64',
'arm64', 'ARM64'],
reason='Ticket - 126314, 132699')
def test_string_lower(self, input_shape, encoding, ie_device, precision, ir_version, temp_dir,
def test_string_lower(self, input_shape, encoding, strings_dictionary, ie_device, precision, ir_version, temp_dir,
use_legacy_frontend):
self._test(*self.create_string_lower_net(input_shape=input_shape, encoding=encoding),
self._test(*self.create_string_lower_net(input_shape=input_shape, encoding=encoding,
strings_dictionary=strings_dictionary),
ie_device, precision, ir_version, temp_dir=temp_dir,
use_legacy_frontend=use_legacy_frontend)

0 comments on commit 8bc8e93

Please sign in to comment.