-
Notifications
You must be signed in to change notification settings - Fork 259
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Cheng, Zixuan <[email protected]>
- Loading branch information
1 parent
6c26635
commit 059a754
Showing
46 changed files
with
3,792 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
68 changes: 68 additions & 0 deletions
68
examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/QSL.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
import sys | ||
import os | ||
sys.path.insert(0, os.path.join(os.getcwd(), "pytorch")) | ||
|
||
from parts.manifest import Manifest | ||
from parts.segment import AudioSegment | ||
|
||
import numpy as np | ||
|
||
import mlperf_loadgen as lg | ||
|
||
|
||
class AudioQSL: | ||
def __init__(self, dataset_dir, manifest_filepath, labels, | ||
sample_rate=16000, perf_count=None): | ||
m_paths = [manifest_filepath] | ||
self.manifest = Manifest(dataset_dir, m_paths, labels, len(labels), | ||
normalize=True, max_duration=15.0) | ||
self.sample_rate = sample_rate | ||
self.count = len(self.manifest) | ||
perf_count = self.count if perf_count is None else perf_count | ||
self.sample_id_to_sample = {} | ||
self.qsl = lg.ConstructQSL(self.count, perf_count, | ||
self.load_query_samples, | ||
self.unload_query_samples) | ||
print( | ||
"Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours. Number of samples: {2}".format( | ||
self.manifest.duration / 3600, | ||
self.manifest.filtered_duration / 3600, | ||
self.count)) | ||
|
||
def load_query_samples(self, sample_list): | ||
for sample_id in sample_list: | ||
self.sample_id_to_sample[sample_id] = self._load_sample(sample_id) | ||
|
||
def unload_query_samples(self, sample_list): | ||
for sample_id in sample_list: | ||
del self.sample_id_to_sample[sample_id] | ||
|
||
def _load_sample(self, index): | ||
sample = self.manifest[index] | ||
segment = AudioSegment.from_file(sample['audio_filepath'][0], | ||
target_sr=self.sample_rate) | ||
waveform = segment.samples | ||
assert isinstance(waveform, np.ndarray) and waveform.dtype == np.float32 | ||
return waveform | ||
|
||
def __getitem__(self, index): | ||
return self.sample_id_to_sample[index] | ||
|
||
def __del__(self): | ||
lg.DestroyQSL(self.qsl) | ||
print("Finished destroying QSL.") | ||
|
||
# We have no problem fitting all data in memory, so we do that, in | ||
# order to speed up execution of the benchmark. | ||
class AudioQSLInMemory(AudioQSL): | ||
def __init__(self, dataset_dir, manifest_filepath, labels, | ||
sample_rate=16000, perf_count=None): | ||
super().__init__(dataset_dir, manifest_filepath, labels, | ||
sample_rate, perf_count) | ||
super().load_query_samples(range(self.count)) | ||
|
||
def load_query_samples(self, sample_list): | ||
pass | ||
|
||
def unload_query_samples(self, sample_list): | ||
pass |
75 changes: 75 additions & 0 deletions
75
examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
Step-by-Step | ||
============ | ||
|
||
This document lists steps of reproducing Intel Optimized PyTorch RNNT models tuning results via Neural Compressor. | ||
|
||
Our example comes from MLPerf Inference Benchmark Suite. | ||
|
||
|
||
# Prerequisite | ||
|
||
## 1. Environment | ||
Python 3.6 or higher version is recommended. | ||
|
||
```shell | ||
cd examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx | ||
pip install -r requirements.txt | ||
``` | ||
Check your gcc version with the command: **gcc -v** | ||
|
||
GCC5 or above is required. | ||
|
||
```shell | ||
# install mlperf | ||
bash prepare_loadgen.sh | ||
``` | ||
|
||
## 2. Prepare Dataset | ||
|
||
```shell | ||
bash prepare_dataset.sh --download_dir=origin_dataset --convert_dir=convert_dataset | ||
``` | ||
|
||
prepare_dataset.sh contains two stages: | ||
- stage1: download LibriSpeech/dev-clean dataset and extract it. | ||
- stage2: convert .flac file to .wav file | ||
|
||
## 3. Prepare Pre-trained Model | ||
|
||
```shell | ||
wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt?download=1 -O rnnt.pt | ||
``` | ||
|
||
# Run | ||
|
||
## 1. Enable RNNT example with the auto dynamic quantization strategy of Neural Compressor. | ||
|
||
The changes made are as follows: | ||
1. pytorch_SUT.py: | ||
Removed jit script conversion. | ||
2. pytorch/decoders.py: | ||
Removed assertion of torch.jit.ScriptModule. | ||
|
||
## 2. Tuning command: | ||
```shell | ||
bash run_tuning.sh --dataset_location=convert_dataset --input_model=./rnnt.pt --output_model=saved_results | ||
``` | ||
## 3. Benchmark command: | ||
```shell | ||
# fp32 | ||
bash run_benchmark.sh --dataset_location=convert_dataset --input_model=./rnnt.pt --mode=performance/accuracy --int8=false | ||
# int8 | ||
bash run_benchmark.sh --dataset_location=convert_dataset --input_model=./rnnt.pt --mode=performance/accuracy --int8=true | ||
``` | ||
## 4. Brief output information: | ||
|
||
The first part is accuracy/percentage, right part is time_usage/second. | ||
|
||
- FP32 baseline is: [92.5477, 796.7552]. | ||
- Tune 1 result is: [91.5872, 1202.2529] | ||
- Tune 2 result is: [91.5894, 1201.3231] | ||
- Tune 3 result is: [91.5195, 1211.5965] | ||
- Tune 4 result is: [91.6030, 1218.2211] | ||
- Tune 5 result is: [91.4812, 1169.5080] | ||
- ... | ||
|
51 changes: 51 additions & 0 deletions
51
examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/accuracy_eval.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
#!/usr/bin/env python | ||
|
||
import argparse | ||
import array | ||
import json | ||
import sys | ||
import os | ||
|
||
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "pytorch")) | ||
|
||
from helpers import process_evaluation_epoch, __gather_predictions | ||
from parts.manifest import Manifest | ||
|
||
dtype_map = { | ||
"int8": 'b', | ||
"int16": 'h', | ||
"int32": 'l', | ||
"int64": 'q', | ||
} | ||
|
||
def get_args(): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("--log_dir", required=True) | ||
parser.add_argument("--dataset_dir", required=True) | ||
parser.add_argument("--manifest", required=True) | ||
parser.add_argument("--output_dtype", default="int64", choices=dtype_map.keys(), help="Output data type") | ||
args = parser.parse_args() | ||
return args | ||
|
||
def main(): | ||
args = get_args() | ||
labels = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"] | ||
manifest = Manifest(args.dataset_dir, [args.manifest], labels, len(labels), normalize=True, max_duration=15.0) | ||
with open(os.path.join(args.log_dir, "mlperf_log_accuracy.json")) as fh: | ||
results = json.load(fh) | ||
hypotheses = [] | ||
references = [] | ||
for result in results: | ||
hypotheses.append(array.array(dtype_map[args.output_dtype], bytes.fromhex(result["data"])).tolist()) | ||
references.append(manifest[result["qsl_idx"]]["transcript"]) | ||
|
||
references = __gather_predictions([references], labels=labels) | ||
hypotheses = __gather_predictions([hypotheses], labels=labels) | ||
|
||
d = dict(predictions=hypotheses, | ||
transcripts=references) | ||
wer = process_evaluation_epoch(d) | ||
print("Word Error Rate: {:}%, accuracy={:}%".format(wer * 100, (1 - wer) * 100)) | ||
|
||
if __name__ == '__main__': | ||
main() |
65 changes: 65 additions & 0 deletions
65
examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/mlperf.conf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
# The format of this config file is 'key = value'. | ||
# The key has the format 'model.scenario.key'. Value is mostly int64_t. | ||
# Model maybe '*' as wildcard. In that case the value applies to all models. | ||
# All times are in milli seconds | ||
|
||
# Set performance_sample_count for each model. | ||
# User can optionally set this to higher values in user.conf. | ||
mobilenet.*.performance_sample_count_override = 1024 | ||
gnmt.*.performance_sample_count_override = 3903900 | ||
resnet50.*.performance_sample_count_override = 1024 | ||
ssd-mobilenet.*.performance_sample_count_override = 256 | ||
ssd-resnet34.*.performance_sample_count_override = 64 | ||
bert.*.performance_sample_count_override = 10833 | ||
dlrm.*.performance_sample_count_override = 204800 | ||
rnnt.*.performance_sample_count_override = 2513 | ||
3d-unet.*.performance_sample_count_override = 16 | ||
|
||
# Set seeds. The seeds will be distributed two weeks before the submission. | ||
*.*.qsl_rng_seed = 12786827339337101903 | ||
*.*.sample_index_rng_seed = 12640797754436136668 | ||
*.*.schedule_rng_seed = 3135815929913719677 | ||
|
||
*.SingleStream.target_latency_percentile = 90 | ||
*.SingleStream.min_duration = 60000 | ||
*.SingleStream.min_query_count = 1024 | ||
|
||
*.MultiStream.target_qps = 20 | ||
*.MultiStream.target_latency_percentile = 99 | ||
*.MultiStream.max_async_queries = 1 | ||
*.MultiStream.target_latency = 50 | ||
*.MultiStream.min_duration = 60000 | ||
*.MultiStream.min_query_count = 270336 | ||
ssd-resnet34.MultiStream.target_qps = 15 | ||
ssd-resnet34.MultiStream.target_latency = 66 | ||
gnmt.MultiStream.min_query_count = 90112 | ||
gnmt.MultiStream.target_latency = 100 | ||
gnmt.MultiStream.target_qps = 10 | ||
gnmt.MultiStream.target_latency_percentile = 97 | ||
|
||
*.Server.target_latency = 10 | ||
*.Server.target_latency_percentile = 99 | ||
*.Server.target_duration = 0 | ||
*.Server.min_duration = 60000 | ||
*.Server.min_query_count = 270336 | ||
resnet50.Server.target_latency = 15 | ||
ssd-resnet34.Server.target_latency = 100 | ||
gnmt.Server.min_query_count = 90112 | ||
gnmt.Server.target_latency = 250 | ||
gnmt.Server.target_latency_percentile = 97 | ||
bert.Server.target_latency = 130 | ||
dlrm.Server.target_latency = 30 | ||
rnnt.Server.target_latency = 1000 | ||
|
||
*.Offline.target_latency_percentile = 90 | ||
*.Offline.min_duration = 60000 | ||
# In Offline scenario, we always have one query. But LoadGen maps this to | ||
# min_sample_count internally in Offline scenario, so set this to 24576 since | ||
# the rule requires that Offline scenario run for at least 24576 samples. | ||
*.Offline.min_query_count = 24576 | ||
|
||
# These fields should be defined and overridden by user.conf. | ||
*.SingleStream.target_latency = 10 | ||
*.Server.target_qps = 1.0 | ||
*.Offline.target_qps = 1.0 | ||
*.MultiStream.samples_per_query = 4 |
55 changes: 55 additions & 0 deletions
55
examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/prepare_dataset.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#!/bin/bash | ||
set -x | ||
|
||
function main { | ||
|
||
init_params "$@" | ||
prepare_dataset | ||
|
||
} | ||
|
||
# init params | ||
function init_params { | ||
for var in "$@" | ||
do | ||
case $var in | ||
--download_dir=*) | ||
download_dir=$(echo $var |cut -f2 -d=) | ||
;; | ||
--convert_dir=*) | ||
convert_dir=$(echo $var |cut -f2 -d=) | ||
;; | ||
*) | ||
echo "Error: No such parameter: ${var}" | ||
exit 1 | ||
;; | ||
esac | ||
done | ||
|
||
mkdir -p $download_dir $convert_dir | ||
} | ||
|
||
# prepare_dataset | ||
function prepare_dataset { | ||
# if you already have origin dataset, set stage=2, make sure to extract it \ | ||
# and change the origin dataset path to your path | ||
stage=1 | ||
|
||
# Download dataset | ||
if [[ $stage -le 1 ]]; then | ||
python pytorch/utils/download_librispeech.py \ | ||
pytorch/utils/librispeech-inference.csv \ | ||
$download_dir \ | ||
-e $download_dir | ||
fi | ||
|
||
# Convert dataset | ||
if [[ $stage -le 2 ]]; then | ||
python pytorch/utils/convert_librispeech.py \ | ||
--input_dir $download_dir/LibriSpeech/dev-clean \ | ||
--dest_dir $convert_dir/dev-clean-wav \ | ||
--output_json $convert_dir/dev-clean-wav.json | ||
fi | ||
} | ||
|
||
main "$@" |
10 changes: 10 additions & 0 deletions
10
examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/prepare_loadgen.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
pushd . | ||
echo "Install loadgen" | ||
git clone --recurse-submodules https://github.com/mlcommons/inference.git mlperf_inference | ||
cd mlperf_inference | ||
git checkout r2.1 | ||
git log -1 | ||
git submodule update --init --recursive | ||
cd loadgen | ||
CFLAGS="-std=c++14" python setup.py install | ||
popd |
Oops, something went wrong.