diff --git a/convert.py b/convert.py index a5cfbc2..ff3ed7e 100644 --- a/convert.py +++ b/convert.py @@ -38,7 +38,7 @@ dynamic_axes={ "inputs": {1: "phoneme"}, # ideally, this works but repeat_interleave is fixed - "outputs": {1: "wav"} + "outputs": {0: "wav", 1: "lengths", 2: "duration"} }) elif args.jit is not None: with torch.no_grad(): diff --git a/demo.py b/demo.py index 3564cb2..35ab51d 100644 --- a/demo.py +++ b/demo.py @@ -124,8 +124,14 @@ def tts(lexicon, g2p, preprocess_config, model, is_onnx, args, verbose=False): model = model.to(args.infer_device) model.eval() + # default number of threads is 128 on AMD + # this is too high and causes the model to run slower + # set it to a lower number eg --threads 24 + # https://pytorch.org/docs/stable/notes/cpu_threading_torchscript_inference.html + if args.threads is not None: + torch.set_num_threads(args.threads) if args.compile: - model = torch.compile(model) + model = torch.compile(model, mode="reduce-overhead", backend="inductor") if args.text is not None: rtf = [] diff --git a/requirements.txt b/requirements.txt index 4867db7..b5c6d06 100644 --- a/requirements.txt +++ b/requirements.txt @@ -12,7 +12,8 @@ g2p-en validators onnx onnxruntime -protobuf==3.20 +protobuf==3.20.2 +numpy==1.24.3 # needed for data preparation librosa unidecode diff --git a/utils/tools.py b/utils/tools.py index 4dac6ab..7a4f68d 100644 --- a/utils/tools.py +++ b/utils/tools.py @@ -321,6 +321,7 @@ def get_args(): parser.add_argument("--devices", type=int, default=1) parser.add_argument("--iter", type=int, default=1) + parser.add_argument("--threads", type=int, default=24) #choices = ["bf16-mixed", "16-mixed", 16, 32, 64] parser.add_argument("--precision", default=16) @@ -436,7 +437,7 @@ def get_args(): help='Convert to onnx model') parser.add_argument('--onnx-insize', type=int, - default=128, + default=None, help='Max input size for the onnx model') parser.add_argument('--onnx-opset', type=int,