-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparse_rules.py
42 lines (28 loc) · 1.03 KB
/
parse_rules.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
import csv
import os
import pickle
def _read_file_lines(path):
with open(path, 'r', encoding='utf-8') as file:
data = file.read().split("\n")
return data
def _split_lines(lines, separator=" "):
return [line.split() for line in lines]
def _get_rules(directory):
with open(os.path.join(directory, "phoneticRules.txt"), 'r', encoding='utf-8') as d:
columns = d.read().split("\n")
split = _split_lines(columns, "\t\t\t\t\t\t")
return split
def _get_slang(directory):
with open(os.path.join(directory, "slang.txt"), 'r', encoding='utf-8') as d:
columns = d.read().split("\n")
slang = dict([line.split(" ") for line in columns])
return slang
def _get_exps(directory):
return _get_rules(directory), _get_slang(directory)
def get_vocab_nltk(path):
with open(path, 'rb') as f:
return pickle.load(f)
def get_vocab(path):
columns = _read_file_lines(path)
slang = dict([(line.split(" ")[0], int(line.split(" ")[1])) for line in columns])
return slang