Skip to content

Commit

Permalink
migrate rnnt to API 2.x (#1097)
Browse files Browse the repository at this point in the history
Signed-off-by: Cheng, Zixuan <[email protected]>
  • Loading branch information
violetch24 authored Sep 19, 2023
1 parent 6c26635 commit 059a754
Show file tree
Hide file tree
Showing 46 changed files with 3,792 additions and 5 deletions.
8 changes: 3 additions & 5 deletions examples/.config/model_params_pytorch.json
Original file line number Diff line number Diff line change
Expand Up @@ -317,13 +317,11 @@
"main_script": "run.py"
},
"rnnt": {
"model_src_dir": "speech_recognition/rnnt/quantization/ptq_dynamic/eager",
"model_src_dir": "speech_recognition/rnnt/quantization/ptq_dynamic/fx",
"dataset_location": "/tf_dataset/pytorch/rnnt/convert_dataset/",
"input_model": "/tf_dataset/pytorch/rnnt/rnnt.pt",
"yaml": "conf.yaml",
"strategy": "basic",
"batch_size": 100,
"new_benchmark": false
"main_script": "run_tune.py",
"batch_size": 100
},
"wav2vec2_dynamic":{
"model_src_dir": "speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx",
Expand Down
6 changes: 6 additions & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,12 @@ Intel® Neural Compressor validated examples with multiple compression technique
<td>Post-Training Dynamic Quantization</td>
<td><a href="./pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx">fx</a></td>
</tr>
<tr>
<td>RNNT</td>
<td>Speech Recognition</td>
<td>Post-Training Dynamic Quantization</td>
<td><a href="./pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx">fx</a></td>
</tr>
<tr>
<td>BlendCNN</td>
<td>Natural Language Processing</td>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import sys
import os
sys.path.insert(0, os.path.join(os.getcwd(), "pytorch"))

from parts.manifest import Manifest
from parts.segment import AudioSegment

import numpy as np

import mlperf_loadgen as lg


class AudioQSL:
def __init__(self, dataset_dir, manifest_filepath, labels,
sample_rate=16000, perf_count=None):
m_paths = [manifest_filepath]
self.manifest = Manifest(dataset_dir, m_paths, labels, len(labels),
normalize=True, max_duration=15.0)
self.sample_rate = sample_rate
self.count = len(self.manifest)
perf_count = self.count if perf_count is None else perf_count
self.sample_id_to_sample = {}
self.qsl = lg.ConstructQSL(self.count, perf_count,
self.load_query_samples,
self.unload_query_samples)
print(
"Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours. Number of samples: {2}".format(
self.manifest.duration / 3600,
self.manifest.filtered_duration / 3600,
self.count))

def load_query_samples(self, sample_list):
for sample_id in sample_list:
self.sample_id_to_sample[sample_id] = self._load_sample(sample_id)

def unload_query_samples(self, sample_list):
for sample_id in sample_list:
del self.sample_id_to_sample[sample_id]

def _load_sample(self, index):
sample = self.manifest[index]
segment = AudioSegment.from_file(sample['audio_filepath'][0],
target_sr=self.sample_rate)
waveform = segment.samples
assert isinstance(waveform, np.ndarray) and waveform.dtype == np.float32
return waveform

def __getitem__(self, index):
return self.sample_id_to_sample[index]

def __del__(self):
lg.DestroyQSL(self.qsl)
print("Finished destroying QSL.")

# We have no problem fitting all data in memory, so we do that, in
# order to speed up execution of the benchmark.
class AudioQSLInMemory(AudioQSL):
def __init__(self, dataset_dir, manifest_filepath, labels,
sample_rate=16000, perf_count=None):
super().__init__(dataset_dir, manifest_filepath, labels,
sample_rate, perf_count)
super().load_query_samples(range(self.count))

def load_query_samples(self, sample_list):
pass

def unload_query_samples(self, sample_list):
pass
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
Step-by-Step
============

This document lists steps of reproducing Intel Optimized PyTorch RNNT models tuning results via Neural Compressor.

Our example comes from MLPerf Inference Benchmark Suite.


# Prerequisite

## 1. Environment
Python 3.6 or higher version is recommended.

```shell
cd examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx
pip install -r requirements.txt
```
Check your gcc version with the command: **gcc -v**

GCC5 or above is required.

```shell
# install mlperf
bash prepare_loadgen.sh
```

## 2. Prepare Dataset

```shell
bash prepare_dataset.sh --download_dir=origin_dataset --convert_dir=convert_dataset
```

prepare_dataset.sh contains two stages:
- stage1: download LibriSpeech/dev-clean dataset and extract it.
- stage2: convert .flac file to .wav file

## 3. Prepare Pre-trained Model

```shell
wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt?download=1 -O rnnt.pt
```

# Run

## 1. Enable RNNT example with the auto dynamic quantization strategy of Neural Compressor.

The changes made are as follows:
1. pytorch_SUT.py:
Removed jit script conversion.
2. pytorch/decoders.py:
Removed assertion of torch.jit.ScriptModule.

## 2. Tuning command:
```shell
bash run_tuning.sh --dataset_location=convert_dataset --input_model=./rnnt.pt --output_model=saved_results
```
## 3. Benchmark command:
```shell
# fp32
bash run_benchmark.sh --dataset_location=convert_dataset --input_model=./rnnt.pt --mode=performance/accuracy --int8=false
# int8
bash run_benchmark.sh --dataset_location=convert_dataset --input_model=./rnnt.pt --mode=performance/accuracy --int8=true
```
## 4. Brief output information:

The first part is accuracy/percentage, right part is time_usage/second.

- FP32 baseline is: [92.5477, 796.7552].
- Tune 1 result is: [91.5872, 1202.2529]
- Tune 2 result is: [91.5894, 1201.3231]
- Tune 3 result is: [91.5195, 1211.5965]
- Tune 4 result is: [91.6030, 1218.2211]
- Tune 5 result is: [91.4812, 1169.5080]
- ...

Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/usr/bin/env python

import argparse
import array
import json
import sys
import os

sys.path.insert(0, os.path.join(os.path.dirname(__file__), "pytorch"))

from helpers import process_evaluation_epoch, __gather_predictions
from parts.manifest import Manifest

dtype_map = {
"int8": 'b',
"int16": 'h',
"int32": 'l',
"int64": 'q',
}

def get_args():
parser = argparse.ArgumentParser()
parser.add_argument("--log_dir", required=True)
parser.add_argument("--dataset_dir", required=True)
parser.add_argument("--manifest", required=True)
parser.add_argument("--output_dtype", default="int64", choices=dtype_map.keys(), help="Output data type")
args = parser.parse_args()
return args

def main():
args = get_args()
labels = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]
manifest = Manifest(args.dataset_dir, [args.manifest], labels, len(labels), normalize=True, max_duration=15.0)
with open(os.path.join(args.log_dir, "mlperf_log_accuracy.json")) as fh:
results = json.load(fh)
hypotheses = []
references = []
for result in results:
hypotheses.append(array.array(dtype_map[args.output_dtype], bytes.fromhex(result["data"])).tolist())
references.append(manifest[result["qsl_idx"]]["transcript"])

references = __gather_predictions([references], labels=labels)
hypotheses = __gather_predictions([hypotheses], labels=labels)

d = dict(predictions=hypotheses,
transcripts=references)
wer = process_evaluation_epoch(d)
print("Word Error Rate: {:}%, accuracy={:}%".format(wer * 100, (1 - wer) * 100))

if __name__ == '__main__':
main()
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# The format of this config file is 'key = value'.
# The key has the format 'model.scenario.key'. Value is mostly int64_t.
# Model maybe '*' as wildcard. In that case the value applies to all models.
# All times are in milli seconds

# Set performance_sample_count for each model.
# User can optionally set this to higher values in user.conf.
mobilenet.*.performance_sample_count_override = 1024
gnmt.*.performance_sample_count_override = 3903900
resnet50.*.performance_sample_count_override = 1024
ssd-mobilenet.*.performance_sample_count_override = 256
ssd-resnet34.*.performance_sample_count_override = 64
bert.*.performance_sample_count_override = 10833
dlrm.*.performance_sample_count_override = 204800
rnnt.*.performance_sample_count_override = 2513
3d-unet.*.performance_sample_count_override = 16

# Set seeds. The seeds will be distributed two weeks before the submission.
*.*.qsl_rng_seed = 12786827339337101903
*.*.sample_index_rng_seed = 12640797754436136668
*.*.schedule_rng_seed = 3135815929913719677

*.SingleStream.target_latency_percentile = 90
*.SingleStream.min_duration = 60000
*.SingleStream.min_query_count = 1024

*.MultiStream.target_qps = 20
*.MultiStream.target_latency_percentile = 99
*.MultiStream.max_async_queries = 1
*.MultiStream.target_latency = 50
*.MultiStream.min_duration = 60000
*.MultiStream.min_query_count = 270336
ssd-resnet34.MultiStream.target_qps = 15
ssd-resnet34.MultiStream.target_latency = 66
gnmt.MultiStream.min_query_count = 90112
gnmt.MultiStream.target_latency = 100
gnmt.MultiStream.target_qps = 10
gnmt.MultiStream.target_latency_percentile = 97

*.Server.target_latency = 10
*.Server.target_latency_percentile = 99
*.Server.target_duration = 0
*.Server.min_duration = 60000
*.Server.min_query_count = 270336
resnet50.Server.target_latency = 15
ssd-resnet34.Server.target_latency = 100
gnmt.Server.min_query_count = 90112
gnmt.Server.target_latency = 250
gnmt.Server.target_latency_percentile = 97
bert.Server.target_latency = 130
dlrm.Server.target_latency = 30
rnnt.Server.target_latency = 1000

*.Offline.target_latency_percentile = 90
*.Offline.min_duration = 60000
# In Offline scenario, we always have one query. But LoadGen maps this to
# min_sample_count internally in Offline scenario, so set this to 24576 since
# the rule requires that Offline scenario run for at least 24576 samples.
*.Offline.min_query_count = 24576

# These fields should be defined and overridden by user.conf.
*.SingleStream.target_latency = 10
*.Server.target_qps = 1.0
*.Offline.target_qps = 1.0
*.MultiStream.samples_per_query = 4
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash
set -x

function main {

init_params "$@"
prepare_dataset

}

# init params
function init_params {
for var in "$@"
do
case $var in
--download_dir=*)
download_dir=$(echo $var |cut -f2 -d=)
;;
--convert_dir=*)
convert_dir=$(echo $var |cut -f2 -d=)
;;
*)
echo "Error: No such parameter: ${var}"
exit 1
;;
esac
done

mkdir -p $download_dir $convert_dir
}

# prepare_dataset
function prepare_dataset {
# if you already have origin dataset, set stage=2, make sure to extract it \
# and change the origin dataset path to your path
stage=1

# Download dataset
if [[ $stage -le 1 ]]; then
python pytorch/utils/download_librispeech.py \
pytorch/utils/librispeech-inference.csv \
$download_dir \
-e $download_dir
fi

# Convert dataset
if [[ $stage -le 2 ]]; then
python pytorch/utils/convert_librispeech.py \
--input_dir $download_dir/LibriSpeech/dev-clean \
--dest_dir $convert_dir/dev-clean-wav \
--output_json $convert_dir/dev-clean-wav.json
fi
}

main "$@"
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
pushd .
echo "Install loadgen"
git clone --recurse-submodules https://github.com/mlcommons/inference.git mlperf_inference
cd mlperf_inference
git checkout r2.1
git log -1
git submodule update --init --recursive
cd loadgen
CFLAGS="-std=c++14" python setup.py install
popd
Loading

0 comments on commit 059a754

Please sign in to comment.