diff --git a/convert.py b/convert.py
index a5cfbc2..ff3ed7e 100644
--- a/convert.py
+++ b/convert.py
@@ -38,7 +38,7 @@
                           dynamic_axes={
                               "inputs": {1: "phoneme"},
                               # ideally, this works but repeat_interleave is fixed
-                              "outputs": {1: "wav"}
+                              "outputs": {0: "wav", 1: "lengths", 2: "duration"}
                           })
     elif args.jit is not None:
         with torch.no_grad():
diff --git a/demo.py b/demo.py
index 3564cb2..35ab51d 100644
--- a/demo.py
+++ b/demo.py
@@ -124,8 +124,14 @@ def tts(lexicon, g2p, preprocess_config, model, is_onnx, args, verbose=False):
         model = model.to(args.infer_device)
         model.eval()
         
+        # default number of threads is 128 on AMD
+        # this is too high and causes the model to run slower
+        # set it to a lower number eg --threads 24 
+        # https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html
+        if args.threads is not None:
+            torch.set_num_threads(args.threads)
         if args.compile:
-            model = torch.compile(model)
+            model = torch.compile(model, mode="reduce-overhead", backend="inductor")
 
     if args.text is not None:
         rtf = []
diff --git a/requirements.txt b/requirements.txt
index 4867db7..b5c6d06 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -12,7 +12,8 @@ g2p-en
 validators
 onnx
 onnxruntime
-protobuf==3.20
+protobuf==3.20.2
+numpy==1.24.3
 # needed for data preparation
 librosa
 unidecode
diff --git a/utils/tools.py b/utils/tools.py
index 4dac6ab..7a4f68d 100644
--- a/utils/tools.py
+++ b/utils/tools.py
@@ -321,6 +321,7 @@ def get_args():
     
     parser.add_argument("--devices", type=int, default=1)
     parser.add_argument("--iter", type=int, default=1)
+    parser.add_argument("--threads", type=int, default=24)
     
     #choices = ["bf16-mixed", "16-mixed", 16, 32, 64]
     parser.add_argument("--precision", default=16)
@@ -436,7 +437,7 @@ def get_args():
                         help='Convert to onnx model')
     parser.add_argument('--onnx-insize',
                         type=int,
-                        default=128,
+                        default=None,
                         help='Max input size for the onnx model')
     parser.add_argument('--onnx-opset',
                         type=int,