-
Notifications
You must be signed in to change notification settings - Fork 15
/
index.js
196 lines (172 loc) · 5.27 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
// CSV parser for node.js that handles all standard CSV parsing and
// returns the data elements in a single 'data' event emmitter.
// Currently the only exported function is each(filename, option),
// where filename is the file to process and options can have any
// of the following:
//
// strDelimiter: The string to use for delimiting data elements.
// headers: If the first line of the file represents headers.
// Setting this will convert the translated array into
// an object with the headers as attributes and the
// values assigned.
// readAmount: Number of bytes to read before parsing and processing.
//
//
//
//
var fs = require("fs"),
util = require("util"),
events = require("events");
// CSVToArray Parsing function from http://www.bennadel.com/blog/1504-Ask-Ben-Parsing-CSV-Strings-With-Javascript-Exec-Regular-Expression-Command.htm
// This will parse a delimited string into an array of
// arrays. The default delimiter is the comma, but this
// can be overriden in the second argument.
function CSVToArray( strData, strDelimiter ){
// Check to see if the delimiter is defined. If not,
// then default to comma.
strDelimiter = (strDelimiter || ",");
// Create a regular expression to parse the CSV values.
var objPattern = new RegExp(
(
// Delimiters.
"(\\" + strDelimiter + "|\\r?\\n|\\r|^)" +
// Quoted fields.
"(?:\"([^\"]*(?:\"\"[^\"]*)*)\"|" +
// Standard fields.
"([^\"\\" + strDelimiter + "\\r\\n]*))"
),
"gi"
);
// Create an array to hold our data. Give the array
// a default empty first row.
var arrData = [[]];
// Create an array to hold our individual pattern
// matching groups.
var arrMatches = null;
// Keep looping over the regular expression matches
// until we can no longer find a match.
while (arrMatches = objPattern.exec( strData )){
// Get the delimiter that was found.
var strMatchedDelimiter = arrMatches[ 1 ];
// Check to see if the given delimiter has a length
// (is not the start of string) and if it matches
// field delimiter. If id does not, then we know
// that this delimiter is a row delimiter.
if (
strMatchedDelimiter.length &&
(strMatchedDelimiter != strDelimiter)
){
// Since we have reached a new row of data,
// add an empty row to our data array.
arrData.push( [] );
}
// Now that we have our delimiter out of the way,
// let's check to see which kind of value we
// captured (quoted or unquoted).
if (arrMatches[ 2 ]){
// We found a quoted value. When we capture
// this value, unescape any double quotes.
var strMatchedValue = arrMatches[ 2 ].replace(
new RegExp( "\"\"", "g" ),
"\""
);
} else {
// We found a non-quoted value.
var strMatchedValue = arrMatches[ 3 ];
}
// Now that we have our value string, let's add
// it to the data array.
arrData[ arrData.length - 1 ].push( strMatchedValue );
}
// Return the parsed data.
return( arrData );
}
exports.each = function (filename, options) {
options = (options || {});
var strDelimiter = (options.strDelimiter || ",");
var headers = null;
var position = 0;
var readAmount = (options.readAmount || (16384));
var buffer = "";
var fd = null;
var stream = new events.EventEmitter();
var emit_row = function(row) {
var data = CSVToArray(row)[0];
if (options["headers"]) {
if (headers == null) {
headers = data;
} else {
var obj = {};
data.forEach(function(d,i) { obj[headers[i]] = d; });
data = obj;
}
}
stream.emit("data", data);
}
var readMore = function() {
fs.read(fd, readAmount, position, "utf8", function(err, data, bytesRead) {
if (err) {
console.log("E1");
stream.emit("error",e);
fs.close(fd);
} else {
position += bytesRead;
buffer += data;
var parts = buffer.split("\n");
var pl = parts.length;
if (pl > 1) {
for( var i = 0; i < (pl - 1); i++) {
emit_row(parts[i]);
}
buffer = parts[pl-1];
}
if (bytesRead == readAmount) {
readMore();
} else {
if (buffer.length > 0) {
emit_row(buffer);
}
fs.close(fd);
stream.emit("end")
}
}
});
};
fs.open(filename, 'r', function (err, _fd) {
if (err) {
console.log("Could not open the file: "+filename);
} else if (_fd) {
fd = _fd;
readMore();
}
});
return stream;
};
exports.parse = function(str, options, data_listener) {
if (typeof options == "function") {
data_listener = options;
options = {};
}
if (typeof data_listener != "function") {
throw new Exception("Data listener must be provided");
}
options = (options || {});
var stream = new events.EventEmitter();
stream.addListener("data", data_listener);
var parts = str.split("\n");
var pl = parts.length;
var headers = null;
for( var i = 0; i < pl; i++) {
var data = CSVToArray(parts[i])[0];
if (options["headers"]) {
if (headers == null) {
headers = data;
} else {
var obj = {};
data.forEach(function(d,i) { obj[headers[i]] = d; });
data = obj;
}
}
stream.emit('data', data);
}
};