Skip to content

Commit

Permalink
Merge pull request #42 from shivammehta25/dev
Browse files Browse the repository at this point in the history
Merging dev adding another dataset, piper phonemizer and refractoring
  • Loading branch information
shivammehta25 authored Jan 12, 2024
2 parents 5a2a893 + 95ec24b commit 47a629f
Show file tree
Hide file tree
Showing 9 changed files with 54 additions and 10 deletions.
14 changes: 14 additions & 0 deletions configs/data/hi-fi_en-US_female.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
defaults:
- ljspeech
- _self_

# Dataset URL: https://ast-astrec.nict.go.jp/en/release/hi-fi-captain/
_target_: matcha.data.text_mel_datamodule.TextMelDataModule
name: hi-fi_en-US_female
train_filelist_path: data/filelists/hi-fi-captain-en-us-female_train.txt
valid_filelist_path: data/filelists/hi-fi-captain-en-us-female_val.txt
batch_size: 32
cleaners: [english_cleaners_piper]
data_statistics: # Computed for this dataset
mel_mean: -6.38385
mel_std: 2.541796
14 changes: 14 additions & 0 deletions configs/experiment/hifi_dataset_piper_phonemizer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
# @package _global_

# to execute this experiment run:
# python train.py experiment=multispeaker

defaults:
- override /data: hi-fi_en-US_female.yaml

# all parameters below will be merged with parameters from default configurations set above
# this allows you to overwrite only specified parameters

tags: ["hi-fi", "single_speaker", "piper_phonemizer", "en_US", "female"]

run_name: hi-fi_en-US_female_piper_phonemizer
1 change: 1 addition & 0 deletions configs/model/matcha.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,4 @@ spk_emb_dim: 64
n_feats: 80
data_statistics: ${data.data_statistics}
out_size: null # Must be divisible by 4
prior_loss: true
2 changes: 1 addition & 1 deletion matcha/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.0.4
0.0.5
2 changes: 1 addition & 1 deletion matcha/models/baselightningmodule.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def training_step(self, batch: Any, batch_idx: int):
"step",
float(self.global_step),
on_step=True,
on_epoch=True,
prog_bar=True,
logger=True,
sync_dist=True,
)
Expand Down
8 changes: 3 additions & 5 deletions matcha/models/components/flow_matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,16 +73,14 @@ def solve_euler(self, x, t_span, mu, mask, spks, cond):
# Or in future might add like a return_all_steps flag
sol = []

steps = 1
while steps <= len(t_span) - 1:
for step in range(1, len(t_span)):
dphi_dt = self.estimator(x, mask, mu, t, spks, cond)

x = x + dt * dphi_dt
t = t + dt
sol.append(x)
if steps < len(t_span) - 1:
dt = t_span[steps + 1] - t
steps += 1
if step < len(t_span) - 1:
dt = t_span[step + 1] - t

return sol[-1]

Expand Down
9 changes: 7 additions & 2 deletions matcha/models/matcha_tts.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ def __init__(
out_size,
optimizer=None,
scheduler=None,
prior_loss=True,
):
super().__init__()

Expand All @@ -44,6 +45,7 @@ def __init__(
self.spk_emb_dim = spk_emb_dim
self.n_feats = n_feats
self.out_size = out_size
self.prior_loss = prior_loss

if n_spks > 1:
self.spk_emb = torch.nn.Embedding(n_spks, spk_emb_dim)
Expand Down Expand Up @@ -228,7 +230,10 @@ def forward(self, x, x_lengths, y, y_lengths, spks=None, out_size=None, cond=Non
# Compute loss of the decoder
diff_loss, _ = self.decoder.compute_loss(x1=y, mask=y_mask, mu=mu_y, spks=spks, cond=cond)

prior_loss = torch.sum(0.5 * ((y - mu_y) ** 2 + math.log(2 * math.pi)) * y_mask)
prior_loss = prior_loss / (torch.sum(y_mask) * self.n_feats)
if self.prior_loss:
prior_loss = torch.sum(0.5 * ((y - mu_y) ** 2 + math.log(2 * math.pi)) * y_mask)
prior_loss = prior_loss / (torch.sum(y_mask) * self.n_feats)
else:
prior_loss = 0

return dur_loss, prior_loss, diff_loss
11 changes: 11 additions & 0 deletions matcha/text/cleaners.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import re

import phonemizer
import piper_phonemize
from unidecode import unidecode

# To avoid excessive logging we set the log level of the phonemizer package to Critical
Expand Down Expand Up @@ -103,3 +104,13 @@ def english_cleaners2(text):
phonemes = global_phonemizer.phonemize([text], strip=True, njobs=1)[0]
phonemes = collapse_whitespace(phonemes)
return phonemes


def english_cleaners_piper(text):
"""Pipeline for English text, including abbreviation expansion. + punctuation + stress"""
text = convert_to_ascii(text)
text = lowercase(text)
text = expand_abbreviations(text)
phonemes = "".join(piper_phonemize.phonemize_espeak(text=text, voice="en-US")[0])
phonemes = collapse_whitespace(phonemes)
return phonemes
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,11 @@ torchaudio
matplotlib
pandas
conformer==0.3.2
diffusers==0.21.3
diffusers==0.25.0
notebook
ipywidgets
gradio
gdown
wget
seaborn
piper_phonemize

0 comments on commit 47a629f

Please sign in to comment.