Skip to content

Commit

Permalink
Implement reshapeable CTCGreedyDecoderPlusSparseToDense transformatio…
Browse files Browse the repository at this point in the history
…n and test (#1906)

* Implement reshapeable CTCGreedyDecoderPlusSparseToDense transformation and test

Signed-off-by: Roman Kazantsev <[email protected]>

* Fix consts (after code-review #1)

Signed-off-by: Roman Kazantsev <[email protected]>

* Add CTCGreedyDecoderTransformation with more generic pattern

Also it adds new middle-replacer for transforming sequence length to a mask
along with tests.

Signed-off-by: Roman Kazantsev <[email protected]>

* Do fixes after review #2

Signed-off-by: Roman Kazantsev <[email protected]>

* Fix after review #3

Signed-off-by: Roman Kazantsev <[email protected]>

* Fix after review #4

Signed-off-by: Roman Kazantsev <[email protected]>
  • Loading branch information
rkazants authored Aug 28, 2020
1 parent 3735735 commit 990f4e2
Show file tree
Hide file tree
Showing 6 changed files with 499 additions and 41 deletions.
1 change: 1 addition & 0 deletions model-optimizer/automation/package_BOM.txt
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,7 @@ extensions/middle/ReverseTransposeNormalization.py
extensions/middle/ReverseV2ToReverseSequence.py
extensions/middle/RNNSequenceNormalizeToIE.py
extensions/middle/ScaleInput.py
extensions/middle/SequenceLengthToMask.py
extensions/middle/SharedWeightsDuplication.py
extensions/middle/SliceConverter.py
extensions/middle/SliceLikeToStridedSlice.py
Expand Down
222 changes: 187 additions & 35 deletions model-optimizer/extensions/front/tf/CTCGreedyDecoderReplacement.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,67 +14,219 @@
limitations under the License.
"""

import logging as log

import numpy as np

from extensions.ops.Cast import Cast
from extensions.front.Pack import Pack
from extensions.front.FillToBroadcast import FillToBroadcast
from mo.front.common.partial_infer.utils import int64_array
from mo.front.common.replacement import FrontReplacementSubgraph
from mo.graph.graph import Graph
from mo.ops.const import Const
from mo.utils.error import Error
from mo.front.tf.graph_utils import create_op_with_const_inputs
from mo.graph.graph import Graph, rename_nodes
from mo.ops.broadcast import Broadcast
from mo.ops.concat import Concat
from mo.ops.squeeze import Squeeze
from mo.ops.unsqueeze import Unsqueeze


class CTCGreedyDecoderReplacement(FrontReplacementSubgraph):
"""
TensorFlow CTCGreedyDecoder produces output in a sparse tensor that is not supported by Inference Engine and
Inference Engine's CTCGreedyDecoder has different output that is in a dense format. So this transformation
intents to replace TF CTCGreedyDecoder+SparseToDense with IE one.
Also Inference Engine's CTCGreedyDecoder has a specific format for the second input tensor, a sequence length,
different from TF's one so this transformation cares about transformation of its format.
The second input to the CTCGreedyDecoder in the TensorFlow is a 1D tensor with sequence lengths. In the Inference
Engine the second input to the CTCGreedyDecoder is a 2D tensor, a sequence mask, where the first element
in each row is equal to 1 and all others in the tail are equal to 0. The number of ones represents
a sequence length.
"""
enabled = True

def run_after(self):
# CTCGreedyDecoderReplacement is not reshape-able transformation
# so reshape-able CTCGreedyDecoderReplacement2 transformation is applied first
return [CTCGreedyDecoderReplacement2]

@staticmethod
def pattern(**kwargs):
return dict(
nodes=[('decoder', dict(op='CTCGreedyDecoder')),
('cast', dict(op='Cast')),
('sparse_to_dense', dict(op='SparseToDense'))
],
edges=[('decoder', 'sparse_to_dense', {'out': 0}),
('decoder', 'cast', {'out': 1}),
('cast', 'sparse_to_dense', {'out': 0})
]
)

def replace_sub_graph(self, graph: Graph, match: dict):
# TODO: Once Inference Engine's CTCGreedyDecoder starts to support sequence length format like in TensorFlow,
# CTCGreedyDecoderReplacement2 needs to be removed and CTCGreedyDecoderReplacement, a more generic
# transformation, needs to be adopted for all cases
ctc_greedy_decoder = match['decoder']
cast = match['cast']
sparse_to_dense = match['sparse_to_dense']
sparse_to_dense_name = sparse_to_dense.soft_get('name', sparse_to_dense.id)

# disconnect SparseToDense and Cast nodes
sparse_to_dense.in_port(0).disconnect()
cast.in_port(0).disconnect()

# transform CTCGreedyDecoder output to TensorFlow's one:
# 1. squeeze the output to [N, T] shape
# 2. cast it to integer
squeeze_dec_seq = create_op_with_const_inputs(graph, Squeeze, {1: int64_array([2, 3])},
{'name': sparse_to_dense_name})
squeeze_dec_seq.in_port(0).connect(ctc_greedy_decoder.out_port(0))
cast_to_int = Cast(graph, {'name': sparse_to_dense_name + '/CastToInt',
'dst_type': np.int32}).create_node()
cast_to_int.in_port(0).connect(squeeze_dec_seq.out_port(0))

# preserve output name from original graph
rename_nodes([(sparse_to_dense, sparse_to_dense_name + '/AbandonedName'),
(cast_to_int, sparse_to_dense_name)])

# set output of the new sub-graph as a source for SparseToDense consumer
sparse_to_dense.out_port(0).get_connection().set_source(cast_to_int.out_port(0))

# remove no longer needed nodes
graph.remove_nodes_from([sparse_to_dense.id, cast.id])

# mark CTCGreedyDecoder node as a node that requires transformation of sequence length to a mask format
# in the middle phase
ctc_greedy_decoder['use_mask_format'] = True

# unless the second input of CTCGreedyDecoder is a parameter, it enforces MO to use --static-shape
# to try getting the second input with a value
sequence_length_node = ctc_greedy_decoder.in_node(1)
if sequence_length_node.soft_get('op') != 'Parameter' and not graph.graph['cmd_params'].static_shape:
log.error(
"Model can not be translated in a reshape-able way.\n"
"Model Optimizer key static_shape was turned on to prevent related errors.\n"
"There will be no success changing input shapes of the model with the help of "
"InferenceEngine reshape method", extra={'is_warning': True})
graph.graph['cmd_params'].static_shape = True


class CTCGreedyDecoderReplacement2(FrontReplacementSubgraph):
"""
The TF implementation of the CTCGreedyDecoder produces a tuple with two tensors. The first element in the tuple is
the SparseTensor which is converted to a regular tensor with the SparseToDense operation. This replacer matches
CTCGreedyDecoder and SparseToDense operations and removes the SparseToDense and Cast operation which is also used
in the SparseToDense operation, because Inference Engine implementation of the CTCGreedyDecoder produces regular
tensor as output.
The second input to the CTCGreedyDecoder in the TensorFlow is a 1D tensor with sequence lengths. In the Inference
Engine the second input to the CTCGreedyDecoder is a 2D tensor where the first element in each row is equal to 0
and all others are equal to 1. The length of the row is equal to the sequence length. The replacer modifies the
second input to be compatible with the Inference Engine CTCGreedyDecoder layer implementation.
Also, Inference Engine CTCGreedyDecoder requires a mask format for sequence lengths that is a different from
original one. Hence, this transformation changes a format of sequence length to a mask by replacing Fill and Pack
nodes with a special graph that produces a tensor of ones with shape [T, N] accepted by opset CTCGreedyDecoder.
"""
enabled = True

def run_before(self):
return [Pack, FillToBroadcast]

@staticmethod
def pattern(**kwargs):
return dict(
nodes=[
('transpose', dict(op='Transpose')),
('shape', dict(op='ShapeOf')),
('shape_1', dict(op='ShapeOf')),
('strided_slice', dict(op='StridedSlice')),
('stack', dict(op='Const', value=lambda v: v is not None and np.array_equal(v, [1]))),
('stack1', dict(op='Const', value=lambda v: v is not None and np.array_equal(v, [2]))),
('stack2', dict(op='Const', value=lambda v: v is not None and np.array_equal(v, [1]))),
('strided_slice_1', dict(op='StridedSlice')),
('stack_1', dict(op='Const', value=lambda v: v is not None and np.array_equal(v, [0]))),
('stack1_1', dict(op='Const', value=lambda v: v is not None and np.array_equal(v, [1]))),
('stack2_1', dict(op='Const', value=lambda v: v is not None and np.array_equal(v, [1]))),
('dims', dict(op='Pack')),
('fill', dict(op='Fill')),
('decoder', dict(op='CTCGreedyDecoder')),
('cast', dict(op='Cast')),
('sparse_to_dense', dict(op='SparseToDense')),
],
edges=[
('decoder', 'sparse_to_dense', {'out': 0}),
('decoder', 'cast', {'out': 1}),
('transpose', 'shape', {'out': 0}),
('transpose', 'shape_1', {'out': 0}),
('transpose', 'decoder', {'out': 0, 'in': 0}),
('shape', 'strided_slice', {'out': 0, 'in': 0}),
('stack', 'strided_slice', {'out': 0, 'in': 1}),
('stack1', 'strided_slice', {'out': 0, 'in': 2}),
('stack2', 'strided_slice', {'out': 0, 'in': 3}),
('shape_1', 'strided_slice_1', {'out': 0, 'in': 0}),
('stack_1', 'strided_slice_1', {'out': 0, 'in': 1}),
('stack1_1', 'strided_slice_1', {'out': 0, 'in': 2}),
('stack2_1', 'strided_slice_1', {'out': 0, 'in': 3}),
('strided_slice', 'dims', {'out': 0, 'in': 0}),
('dims', 'fill', {'out': 0, 'in': 0}),
('strided_slice_1', 'fill', {'out': 0, 'in': 1}),
('fill', 'decoder', {'out': 0, 'in': 1}),
('decoder', 'sparse_to_dense', {'out': 0, 'in': 0}),
('decoder', 'cast', {'out': 1, 'in': 0}),
('cast', 'sparse_to_dense', {'out': 0}),
]
)

def nodes_to_remove(self, graph: Graph, match: dict):
return [match['cast'].id, match['sparse_to_dense']]

def replace_sub_graph(self, graph: Graph, match: dict):
# TODO: it requires further refactoring and improvement to provide reshape-ability
decoder_node = match['decoder']
decoder_node_name = decoder_node.soft_get('name', decoder_node.id)
graph.remove_edge(decoder_node.id, match['sparse_to_dense'].id)
graph.remove_edge(decoder_node.id, match['cast'].id)
match['sparse_to_dense'].replace_node(decoder_node)

sequence_length_node = decoder_node.in_node(1)
if sequence_length_node.value is None:
raise Error('The second input to the CTCGreedyDecoder node "{}" is not constant. This case is not '
'supported with the Inference Engine.'.format(decoder_node_name))

# the batch size is the dimension with index 1 for the layer CTCGreedyDecoder
mask_value = np.ones([decoder_node.in_node(0).shape[1], sequence_length_node.value[0]])
mask_value[:, 0] = 0
mask_value = np.transpose(mask_value)
mask_node = Const(graph, {'name': decoder_node_name + '/Mask',
'value': mask_value}).create_node()
decoder_node.in_port(1).disconnect()
decoder_node.in_port(1).connect(mask_node.out_port(0))

return {}
# obtain references to necessary nodes and their names
fill = match['fill']
dims = match['dims']
strided_slice = match['strided_slice']
strided_slice_1 = match['strided_slice_1']
ctc_greedy_decoder = match['decoder']
cast = match['cast']
sparse_to_dense = match['sparse_to_dense']
strided_slice_name = strided_slice.soft_get('name', strided_slice.id)
strided_slice_1_name = strided_slice_1.soft_get('name', strided_slice_1.id)
ctc_greedy_decoder_name = ctc_greedy_decoder.soft_get('name', ctc_greedy_decoder.id)
sparse_to_dense_name = sparse_to_dense.soft_get('name', sparse_to_dense.id)

# unsqueeze scalar values with batch size and time dimension
unsqueeze_batch_size = create_op_with_const_inputs(graph, Unsqueeze, {1: int64_array(0)},
{'name': strided_slice_name + '/Unsqueeze'})
dims.in_port(0).get_connection().set_destination(unsqueeze_batch_size.in_port(0))
unsqueeze_time_size = create_op_with_const_inputs(graph, Unsqueeze, {1: int64_array(0)},
{'name': strided_slice_1_name + '/Unsqueeze'})
fill.in_port(1).get_connection().set_destination(unsqueeze_time_size.in_port(0))

# compute a sequence mask shape [T, N] required for CTCGreedyDecoder
seq_mask_shape = Concat(graph, {'axis': 0, 'in_ports_count': 2,
'name': ctc_greedy_decoder_name + '/SequenceMaskShape'}).create_node()
seq_mask_shape.in_port(0).connect(unsqueeze_time_size.out_port(0))
seq_mask_shape.in_port(1).connect(unsqueeze_batch_size.out_port(0))

# compute a sequence mask
sequence_mask = create_op_with_const_inputs(graph, Broadcast, {0: np.array([1.0], dtype=np.float)},
{'mode': 'numpy',
'name': ctc_greedy_decoder_name + '/SequenceMask'})
sequence_mask.in_port(1).connect(seq_mask_shape.out_port(0))

# create CTCGreedyDecoder with the sequence mask instead of sequence length
ctc_greedy_decoder.in_port(1).disconnect()
ctc_greedy_decoder.in_port(1).connect(sequence_mask.out_port(0))

# remove fill and pack nodes since they are now in unconnected component
graph.remove_nodes_from([fill.id, dims.id])

# transform opset CTCGreedyDecoder output to TensorFlow's one that has a shape [N, T]
# opset CTCGreedyDecoder has an output with a shape [N, T, 1, 1]
squeeze_dec_seq = create_op_with_const_inputs(graph, Squeeze, {1: int64_array([2, 3])},
{'name': sparse_to_dense_name})
squeeze_dec_seq.in_port(0).connect(ctc_greedy_decoder.out_port(0))
cast_to_int = Cast(graph, {'name': sparse_to_dense_name + '/CastToInt',
'dst_type': np.int32}).create_node()
cast_to_int.in_port(0).connect(squeeze_dec_seq.out_port(0))

# preserve output name from original graph
rename_nodes([(sparse_to_dense, sparse_to_dense_name + '/AbandonedName'),
(cast_to_int, sparse_to_dense_name)])

# set output of the new sub-graph as a source for SparseToDense consumer
sparse_to_dense.out_port(0).get_connection().set_source(cast_to_int.out_port(0))

# cleanup a graph
graph.remove_nodes_from([cast.id, sparse_to_dense.id])
Loading

0 comments on commit 990f4e2

Please sign in to comment.