migrate rnnt to API 2.x (#1097)

Signed-off-by: Cheng, Zixuan <[email protected]>
intel · Sep 19, 2023 · 059a754 · 059a754
1 parent 6c26635
commit 059a754
Show file tree

Hide file tree

Showing 46 changed files with 3,792 additions and 5 deletions.
diff --git a/examples/.config/model_params_pytorch.json b/examples/.config/model_params_pytorch.json
@@ -317,13 +317,11 @@
       "main_script": "run.py"
     },
     "rnnt": {
-      "model_src_dir": "speech_recognition/rnnt/quantization/ptq_dynamic/eager",
+      "model_src_dir": "speech_recognition/rnnt/quantization/ptq_dynamic/fx",
       "dataset_location": "/tf_dataset/pytorch/rnnt/convert_dataset/",
       "input_model": "/tf_dataset/pytorch/rnnt/rnnt.pt",
-      "yaml": "conf.yaml",
-      "strategy": "basic",
-      "batch_size": 100,
-      "new_benchmark": false
+      "main_script": "run_tune.py",
+      "batch_size": 100
     },
     "wav2vec2_dynamic":{
       "model_src_dir": "speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx",

diff --git a/examples/README.md b/examples/README.md
@@ -508,6 +508,12 @@ Intel® Neural Compressor validated examples with multiple compression technique
     <td>Post-Training Dynamic Quantization</td>
     <td><a href="./pytorch/speech_recognition/torchaudio_models/quantization/ptq_dynamic/fx">fx</a></td>
   </tr>
+  <tr>
+    <td>RNNT</td>
+    <td>Speech Recognition</td>
+    <td>Post-Training Dynamic Quantization</td>
+    <td><a href="./pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx">fx</a></td>
+  </tr>
   <tr>
     <td>BlendCNN</td>
     <td>Natural Language Processing</td>

diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/QSL.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/QSL.py
@@ -0,0 +1,68 @@
+import sys
+import os
+sys.path.insert(0, os.path.join(os.getcwd(), "pytorch"))
+
+from parts.manifest import Manifest
+from parts.segment import AudioSegment
+
+import numpy as np
+
+import mlperf_loadgen as lg
+
+
+class AudioQSL:
+    def __init__(self, dataset_dir, manifest_filepath, labels,
+                 sample_rate=16000, perf_count=None):
+        m_paths = [manifest_filepath]
+        self.manifest = Manifest(dataset_dir, m_paths, labels, len(labels),
+                                 normalize=True, max_duration=15.0)
+        self.sample_rate = sample_rate
+        self.count = len(self.manifest)
+        perf_count = self.count if perf_count is None else perf_count
+        self.sample_id_to_sample = {}
+        self.qsl = lg.ConstructQSL(self.count, perf_count,
+                                   self.load_query_samples,
+                                   self.unload_query_samples)
+        print(
+            "Dataset loaded with {0:.2f} hours. Filtered {1:.2f} hours. Number of samples: {2}".format(
+                self.manifest.duration / 3600,
+                self.manifest.filtered_duration / 3600,
+                self.count))
+
+    def load_query_samples(self, sample_list):
+        for sample_id in sample_list:
+            self.sample_id_to_sample[sample_id] = self._load_sample(sample_id)
+
+    def unload_query_samples(self, sample_list):
+        for sample_id in sample_list:
+            del self.sample_id_to_sample[sample_id]
+
+    def _load_sample(self, index):
+        sample = self.manifest[index]
+        segment = AudioSegment.from_file(sample['audio_filepath'][0],
+                                         target_sr=self.sample_rate)
+        waveform = segment.samples
+        assert isinstance(waveform, np.ndarray) and waveform.dtype == np.float32
+        return waveform
+
+    def __getitem__(self, index):
+        return self.sample_id_to_sample[index]
+
+    def __del__(self):
+        lg.DestroyQSL(self.qsl)
+        print("Finished destroying QSL.")
+
+# We have no problem fitting all data in memory, so we do that, in
+# order to speed up execution of the benchmark.
+class AudioQSLInMemory(AudioQSL):
+    def __init__(self, dataset_dir, manifest_filepath, labels,
+                 sample_rate=16000, perf_count=None):
+        super().__init__(dataset_dir, manifest_filepath, labels,
+                         sample_rate, perf_count)
+        super().load_query_samples(range(self.count))
+
+    def load_query_samples(self, sample_list):
+        pass
+
+    def unload_query_samples(self, sample_list):
+        pass
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/README.md b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/README.md
@@ -0,0 +1,75 @@
+Step-by-Step
+============
+
+This document lists steps of reproducing Intel Optimized PyTorch RNNT models tuning results via Neural Compressor.
+
+Our example comes from MLPerf Inference Benchmark Suite.
+
+
+# Prerequisite
+
+## 1. Environment
+  Python 3.6 or higher version is recommended.
+
+  ```shell
+  cd examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx
+  pip install -r requirements.txt
+  ```
+  Check your gcc version with the command: **gcc -v**
+
+  GCC5 or above is required.
+
+  ```shell
+  # install mlperf
+  bash prepare_loadgen.sh
+  ```
+
+## 2. Prepare Dataset
+
+  ```shell
+  bash prepare_dataset.sh --download_dir=origin_dataset --convert_dir=convert_dataset
+  ```
+
+  prepare_dataset.sh contains two stages:
+  - stage1: download LibriSpeech/dev-clean dataset and extract it.
+  - stage2: convert .flac file to .wav file
+
+## 3. Prepare Pre-trained Model
+
+  ```shell
+  wget https://zenodo.org/record/3662521/files/DistributedDataParallel_1576581068.9962234-epoch-100.pt?download=1 -O rnnt.pt
+  ```
+
+# Run
+
+## 1. Enable RNNT example with the auto dynamic quantization strategy of Neural Compressor.
+
+  The changes made are as follows:
+  1. pytorch_SUT.py:
+    Removed jit script conversion.
+  2. pytorch/decoders.py:
+    Removed assertion of torch.jit.ScriptModule.
+
+## 2. Tuning command: 
+```shell
+bash run_tuning.sh --dataset_location=convert_dataset --input_model=./rnnt.pt --output_model=saved_results
+```
+## 3. Benchmark command: 
+```shell
+# fp32
+bash run_benchmark.sh --dataset_location=convert_dataset --input_model=./rnnt.pt --mode=performance/accuracy --int8=false
+# int8
+bash run_benchmark.sh --dataset_location=convert_dataset --input_model=./rnnt.pt --mode=performance/accuracy --int8=true
+```
+## 4. Brief output information:
+
+The first part is accuracy/percentage, right part is time_usage/second.
+
+  - FP32 baseline is: [92.5477, 796.7552]. 
+  - Tune 1 result is: [91.5872, 1202.2529]
+  - Tune 2 result is: [91.5894, 1201.3231]
+  - Tune 3 result is: [91.5195, 1211.5965]
+  - Tune 4 result is: [91.6030, 1218.2211]
+  - Tune 5 result is: [91.4812, 1169.5080]
+  - ...
+
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/accuracy_eval.py b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/accuracy_eval.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python
+
+import argparse
+import array
+import json
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "pytorch"))
+
+from helpers import process_evaluation_epoch, __gather_predictions
+from parts.manifest import Manifest
+
+dtype_map = {
+    "int8": 'b',
+    "int16": 'h',
+    "int32": 'l',
+    "int64": 'q',
+}
+
+def get_args():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--log_dir", required=True)
+    parser.add_argument("--dataset_dir", required=True)
+    parser.add_argument("--manifest", required=True)
+    parser.add_argument("--output_dtype", default="int64", choices=dtype_map.keys(), help="Output data type")
+    args = parser.parse_args()
+    return args
+
+def main():
+    args = get_args()
+    labels = [" ", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "'"]
+    manifest = Manifest(args.dataset_dir, [args.manifest], labels, len(labels), normalize=True, max_duration=15.0)
+    with open(os.path.join(args.log_dir, "mlperf_log_accuracy.json")) as fh:
+        results = json.load(fh)
+    hypotheses = []
+    references = []
+    for result in results:
+        hypotheses.append(array.array(dtype_map[args.output_dtype], bytes.fromhex(result["data"])).tolist())
+        references.append(manifest[result["qsl_idx"]]["transcript"])
+
+    references = __gather_predictions([references], labels=labels)
+    hypotheses = __gather_predictions([hypotheses], labels=labels)
+
+    d = dict(predictions=hypotheses,
+             transcripts=references)
+    wer = process_evaluation_epoch(d)
+    print("Word Error Rate: {:}%, accuracy={:}%".format(wer * 100, (1 - wer) * 100))
+
+if __name__ == '__main__':
+    main()
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/mlperf.conf b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/mlperf.conf
@@ -0,0 +1,65 @@
+# The format of this config file is 'key = value'.
+# The key has the format 'model.scenario.key'. Value is mostly int64_t.
+# Model maybe '*' as wildcard. In that case the value applies to all models.
+# All times are in milli seconds
+
+# Set performance_sample_count for each model.
+# User can optionally set this to higher values in user.conf.
+mobilenet.*.performance_sample_count_override = 1024
+gnmt.*.performance_sample_count_override = 3903900
+resnet50.*.performance_sample_count_override = 1024
+ssd-mobilenet.*.performance_sample_count_override = 256
+ssd-resnet34.*.performance_sample_count_override = 64
+bert.*.performance_sample_count_override = 10833
+dlrm.*.performance_sample_count_override = 204800
+rnnt.*.performance_sample_count_override = 2513
+3d-unet.*.performance_sample_count_override = 16
+
+# Set seeds. The seeds will be distributed two weeks before the submission.
+*.*.qsl_rng_seed = 12786827339337101903
+*.*.sample_index_rng_seed = 12640797754436136668
+*.*.schedule_rng_seed = 3135815929913719677
+
+*.SingleStream.target_latency_percentile = 90
+*.SingleStream.min_duration = 60000
+*.SingleStream.min_query_count = 1024
+
+*.MultiStream.target_qps = 20
+*.MultiStream.target_latency_percentile = 99
+*.MultiStream.max_async_queries = 1
+*.MultiStream.target_latency = 50
+*.MultiStream.min_duration = 60000
+*.MultiStream.min_query_count = 270336
+ssd-resnet34.MultiStream.target_qps = 15
+ssd-resnet34.MultiStream.target_latency = 66
+gnmt.MultiStream.min_query_count = 90112
+gnmt.MultiStream.target_latency = 100
+gnmt.MultiStream.target_qps = 10
+gnmt.MultiStream.target_latency_percentile = 97
+
+*.Server.target_latency = 10
+*.Server.target_latency_percentile = 99
+*.Server.target_duration = 0
+*.Server.min_duration = 60000
+*.Server.min_query_count = 270336
+resnet50.Server.target_latency = 15
+ssd-resnet34.Server.target_latency = 100
+gnmt.Server.min_query_count = 90112
+gnmt.Server.target_latency = 250
+gnmt.Server.target_latency_percentile = 97
+bert.Server.target_latency = 130
+dlrm.Server.target_latency = 30
+rnnt.Server.target_latency = 1000
+
+*.Offline.target_latency_percentile = 90
+*.Offline.min_duration = 60000
+# In Offline scenario, we always have one query. But LoadGen maps this to
+# min_sample_count internally in Offline scenario, so set this to 24576 since
+# the rule requires that Offline scenario run for at least 24576 samples.
+*.Offline.min_query_count = 24576
+
+# These fields should be defined and overridden by user.conf.
+*.SingleStream.target_latency = 10
+*.Server.target_qps = 1.0
+*.Offline.target_qps = 1.0
+*.MultiStream.samples_per_query = 4
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/prepare_dataset.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/prepare_dataset.sh
@@ -0,0 +1,55 @@
+#!/bin/bash
+set -x
+
+function main {
+
+  init_params "$@"
+  prepare_dataset
+
+}
+
+# init params
+function init_params {
+  for var in "$@"
+  do
+    case $var in
+      --download_dir=*)
+          download_dir=$(echo $var |cut -f2 -d=)
+      ;;
+      --convert_dir=*)
+          convert_dir=$(echo $var |cut -f2 -d=)
+      ;;
+      *)
+          echo "Error: No such parameter: ${var}"
+          exit 1
+      ;;
+    esac
+  done
+
+  mkdir -p $download_dir $convert_dir
+}
+
+# prepare_dataset
+function prepare_dataset {
+  # if you already have origin dataset, set stage=2, make sure to extract it \
+  # and change the origin dataset path to your path
+  stage=1
+
+  # Download dataset
+  if [[ $stage -le 1 ]]; then
+    python pytorch/utils/download_librispeech.py \
+            pytorch/utils/librispeech-inference.csv \
+            $download_dir \
+            -e $download_dir
+  fi
+
+  # Convert dataset
+  if [[ $stage -le 2 ]]; then
+    python pytorch/utils/convert_librispeech.py \
+        --input_dir $download_dir/LibriSpeech/dev-clean \
+        --dest_dir $convert_dir/dev-clean-wav \
+        --output_json $convert_dir/dev-clean-wav.json
+  fi
+}
+
+main "$@"
diff --git a/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/prepare_loadgen.sh b/examples/pytorch/speech_recognition/rnnt/quantization/ptq_dynamic/fx/prepare_loadgen.sh
@@ -0,0 +1,10 @@
+pushd .
+echo "Install loadgen"
+git clone --recurse-submodules https://github.com/mlcommons/inference.git mlperf_inference
+cd mlperf_inference
+git checkout r2.1
+git log -1
+git submodule update --init --recursive
+cd loadgen
+CFLAGS="-std=c++14" python setup.py install
+popd