-
Notifications
You must be signed in to change notification settings - Fork 296
/
G2pDictionary.cs
154 lines (135 loc) · 5.4 KB
/
G2pDictionary.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace OpenUtau.Api {
public class G2pDictionary : IG2p {
/// <summary>
/// Dictionaries are stored as a trie for compact footprint and quick access.
/// See https://en.wikipedia.org/wiki/Trie.
/// </summary>
class TrieNode {
public Dictionary<char, TrieNode> children = new Dictionary<char, TrieNode>();
public string[] symbols;
}
TrieNode root;
Dictionary<string, bool> phonemeSymbols; // (phoneme, isVowel)
HashSet<string> glideSymbols;
G2pDictionary(TrieNode root, Dictionary<string, bool> phonemeSymbols, HashSet<string> glideSymbols) {
this.root = root;
this.phonemeSymbols = phonemeSymbols;
this.glideSymbols = glideSymbols;
}
public bool IsValidSymbol(string symbol) {
return phonemeSymbols.ContainsKey(symbol);
}
public bool IsVowel(string symbol) {
return phonemeSymbols.TryGetValue(symbol, out var isVowel) && isVowel;
}
public bool IsGlide(string symbol) {
return glideSymbols.Contains(symbol);
}
public string[] Query(string grapheme) {
return QueryTrie(root, grapheme, 0);
}
public string[] UnpackHint(string hint, char separator = ' ') {
return hint.Split(separator)
.Where(s => phonemeSymbols.ContainsKey(s))
.ToArray();
}
string[] QueryTrie(TrieNode node, string word, int index) {
if (index == word.Length) {
if (node.symbols == null) {
return null;
}
return node.symbols.Clone() as string[];
}
if (node.children.TryGetValue(word[index], out var child)) {
return QueryTrie(child, word, index + 1);
}
return null;
}
public class Builder {
TrieNode root;
Dictionary<string, bool> phonemeSymbols; // (phoneme, isVowel)
HashSet<string> glideSymbols;
internal Builder() {
root = new TrieNode();
phonemeSymbols = new Dictionary<string, bool>();
glideSymbols = new HashSet<string>();
}
/// <summary>
/// Add valid symbols of dictionary.
/// </summary>
public Builder AddSymbol(string symbol, string type) {
phonemeSymbols[symbol] = type == "vowel";
if(type == "semivowel" || type == "liquid") {
glideSymbols.Add(symbol);
} else {
glideSymbols.Remove(symbol);
}
return this;
}
public Builder AddSymbol(string symbol, bool isVowel) {
phonemeSymbols[symbol] = isVowel;
return this;
}
public Builder AddSymbol(string symbol, bool isVowel, bool isGlide) {
phonemeSymbols[symbol] = isVowel;
if (isGlide && !isVowel) {
glideSymbols.Add(symbol);
} else {
glideSymbols.Remove(symbol);
}
return this;
}
/// <summary>
/// Must finish adding symbols before adding entries, otherwise symbols get ignored.
/// </summary>
public Builder AddEntry(string grapheme, IEnumerable<string> symbols) {
BuildTrie(root, grapheme, 0, symbols);
return this;
}
void BuildTrie(TrieNode node, string grapheme, int index, IEnumerable<string> symbols) {
if (index == grapheme.Length) {
node.symbols = symbols
.Where(symbol => phonemeSymbols.ContainsKey(symbol))
.ToArray();
return;
}
if (!node.children.TryGetValue(grapheme[index], out var child)) {
child = new TrieNode();
node.children[grapheme[index]] = child;
}
BuildTrie(child, grapheme, index + 1, symbols);
}
public Builder Load(string input) {
var data = Core.Yaml.DefaultDeserializer.Deserialize<G2pDictionaryData>(input);
return Load(data);
}
public Builder Load(TextReader textReader) {
var data = Core.Yaml.DefaultDeserializer.Deserialize<G2pDictionaryData>(textReader);
return Load(data);
}
public Builder Load(G2pDictionaryData data){
if (data.symbols != null) {
foreach (var symbolData in data.symbols) {
AddSymbol(symbolData.symbol, symbolData.type);
}
}
if (data.entries != null) {
foreach (var entry in data.entries) {
AddEntry(entry.grapheme, entry.phonemes);
}
}
return this;
}
public G2pDictionary Build() {
return new G2pDictionary(root, phonemeSymbols, glideSymbols);
}
}
public static Builder NewBuilder() {
return new Builder();
}
}
}