-
Notifications
You must be signed in to change notification settings - Fork 36
/
grammarmaker.js
158 lines (128 loc) · 4.21 KB
/
grammarmaker.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
// Daniel Shiffman
// Programming from A to Z, Fall 2014
// https://github.com/shiffman/Programming-from-A-to-Z-F14
// Context Free Grammar
// This example writes a "haiku" grammar file using a source text
// Using RiTa it counts syllables to place words properly in the grammar file
// We can get command line arguments in a node program
// Here we're checking to make sure we've typed three things (the last being the filename)
if (process.argv.length < 3) {
console.log('Oops, you forgot to pass in a text file.');
process.exit(1);
}
// The 'fs' (file system) module allows us to read and write files
// http://nodejs.org/api/fs.html
var fs = require('fs');
var filename = process.argv[2];
var rita = require('rita');
// Read the file as utf8 and process the data in the function analyze
fs.readFile(filename, 'utf8', analyze);
function analyze(err, data) {
if (err) {
throw err;
}
var grammar = '';
grammar += "# This grammar file is based on Daniel Howe's Haiku grammar\n";
grammar += "# Which is based on a grammar by G.B. Kaminaga\n";
grammar += "# line-breaks are noted by '%' sign\n\n";
grammar += "<start> -> ";
grammar += "<5-line> % <7-line> % <5-line>\n";
grammar += "<5-line> -> ";
grammar += " <1> <4> | <1> <3> <1> | <1> <1> <3> | <1> <2> <2> | <1> <2> <1> <1> | <1> <1> <2> <1> | <1> <1> <1> <2> | <1> <1> <1> <1> <1> | <2> <3> | <2> <2> <1> | <2> <1> <2> | <2> <1> <1> <1> | <3> <2> | <3> <1> <1> | <4> <1> | <5>\n";
grammar += "<7-line> ->";
grammar += "<1> <1> <5-line> | <2> <5-line> | <5-line> <1> <1> | <5-line> <2> \n";
// Create 5 arrays to store words of different syllable counts
var wordsBySyllable = new Array(5);
for (var i = 0; i < wordsBySyllable.length; i++) {
wordsBySyllable[i] = [];
}
var concordance = new Concordance();
concordance.process(data);
var words = concordance.getKeys();
// Go through all the words
for (var i = 0; i < words.length; i++) {
var s = words[i];
// Use RiTa's Analyzer to determine syllable count
var syllables = RiTa.getSyllables(s);
console.log(syllables);
// Syllables are separated with colons
var count = syllables.split("/").length;
if (count < 6) {
// Add the word to the appropriate ArrayList
// Assuming it has between 1 and 5 syllables
wordsBySyllable[count-1].push(s);
}
}
// Finish up the file by writing production rules
// for 1-5 syllable words
for (var i = 0; i < 5; i++) {
grammar += "<"+ (i+1) + "> ->";
for (var j = 0; j < wordsBySyllable[i].length; j++) {
var s = wordsBySyllable[i][j];
grammar += s + " | ";
}
grammar += "\n";
}
// If we wanted to write a file out
fs.writeFile("generated_grammar.g", grammar, output);
function output(err) {
if (err) {
throw err;
}
console.log("The new file was saved!");
};
}
function Concordance() {
this.hash = {};
this.keys = [];
// Splitting up the text
function split(text) {
// Split into array of tokens
return text.split(/\W+/);
}
// A function to validate a toke
function validate(token) {
return /\w{2,}/.test(token);
}
// Process new text
this.process = function(data) {
var tokens = split(data);
// For every token
for (var i = 0; i < tokens.length; i++) {
// Lowercase everything to ignore case
var token = tokens[i].toLowerCase();
if (validate(token)) {
// Increase the count for the token
this.increment(token);
}
}
}
// An array of keys
this.getKeys = function() {
return this.keys;
}
// Get the count for a word
this.getCount = function(word) {
return this.hash[word];
}
// Increment the count for a word
this.increment = function(word) {
// Is this a new word?
if (this.hash[word] == undefined) {
this.hash[word] = 1;
this.keys.push(word);
// Otherwise just increment its count
} else {
this.hash[word]++;
}
}
// Sort array of keys by counts
this.sortByCount = function() {
// A fancy way to sort each element
// Compare the counts
var dict = this;
this.keys.sort(function(a,b) {
return (dict.getCount(b) - dict.getCount(a));
});
}
}