-
Notifications
You must be signed in to change notification settings - Fork 6
/
jedGettextParser.js
202 lines (169 loc) · 7.44 KB
/
jedGettextParser.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
'use strict';
(function(root, factory) {
if (typeof define === 'function' && define.amd) {
// AMD. Register as an anonymous module.
define([], factory);
} else if (typeof exports === 'object') {
// Node. Does not work with strict CommonJS, but
// only CommonJS-like environments that support module.exports,
// like Node.
module.exports = factory();
} else {
// Browser globals
root.jedGettextParser = factory();
}
}(this, function() {
/* Return what this module exports. */
function Parser() {
this._littleEndian;
this._dataView;
this._encoding;
this._originalOffset;
this._translationOffset;
}
Parser.prototype._MAGIC = 0x950412de;
Parser.prototype._getEndianness = function() {
/* MO files can be big or little endian, independent of the source or current platform. Use DataView's optional get*** argument to set the endianness if necessary. */
if (this._dataView.getUint32(0, true) == this._MAGIC) {
this._littleEndian = true;
} else if (this._dataView.getUint32(0, false) == this._MAGIC){
this._littleEndian = false;
} else {
throw new Error('Not a gettext binary message catalog file.');
}
}
Parser.prototype._readTranslationPair = function(originalOffset, translationOffset) {
var length, position, idBytes, strBytes;
/* Get original byte array, that forms the key. */
length = this._dataView.getUint32(originalOffset, this._littleEndian);
position = this._dataView.getUint32(originalOffset + 4, this._littleEndian);
try {
idBytes = new Uint8Array(this._dataView.buffer, position, length);
} catch(e) {
throw new Error('The given ArrayBuffer data is corrupt or incomplete.');
}
/* Get translation byte array, that forms the value. */
length = this._dataView.getUint32(translationOffset, this._littleEndian);
position = this._dataView.getUint32(translationOffset + 4, this._littleEndian);
try {
strBytes = new Uint8Array(this._dataView.buffer, position, length);
} catch(e) {
throw new Error('The given ArrayBuffer data is corrupt or incomplete.');
}
return {
id: idBytes,
str: strBytes
};
}
Parser.prototype._parseHeader = function() {
/* Read translation header. This is stored as a msgstr where the msgid
is '', so it's the first entry in the translation block, since
strings are sorted. Assume that the header is in UTF-8. We only want
the language, encoding and plural forms values, which should all be
ASCII anyway.
*/
var msgBytes = this._readTranslationPair(this._originalOffset, this._translationOffset);
var language, pluralForms;
if (msgBytes.id.byteLength == 0) {
var decoder = new TextDecoder();
var str = decoder.decode(msgBytes.str);
var headers = {};
str.split("\n").forEach(function(line){
/* Header format is like HTTP headers. */
var parts = line.split(':');
var key = parts.shift().trim();
var value = parts.join(':').trim();
headers[key] = value;
});
/* Get encoding if not given. */
if (!this._encoding) {
var pos = headers['Content-Type'].indexOf('charset=');
if (pos != -1 && pos + 8 < headers['Content-Type'].length) {
/* TextDecoder expects a lowercased encoding name. */
this._encoding = headers['Content-Type'].substring(pos + 8).toLowerCase();
}
if (!this._encoding) {
this._encoding = 'utf-8';
}
}
/* Get language from header. */
language = headers['Language'];
/* Get plural forms from header. */
pluralForms = headers['Plural-Forms'];
}
return {
'': {
'domain': '',
'lang': language,
'plural_forms': pluralForms,
}
}
}
Parser.prototype._splitPlurals = function(msgid, msgstr) {
/* Need to handle plurals. Don't need to handle contexts, because Jed
expects the context-msgid strings to be its keys. However, plural
translations must be split into an array of strings. Jed only wants
the first part of a plural as its key. */
return {
id: msgid.split('\u0000')[0],
str: msgstr.split('\u0000')
}
}
Parser.prototype.parse = function(buffer, encoding) {
/* A mo file can be empty apart from its magic, revision, strings count,
offsets and hash table size, but fields for these must all exist, so
verify the file is large enough. */
if (buffer.byteLength < 28) {
throw new Error('The given ArrayBuffer is too small to hold a valid .mo file.');
}
this._dataView = new DataView(buffer);
this._encoding = encoding;
this._getEndianness();
/* Get size and offsets. Skip the revision and hash table, they're
unnecessary. */
var stringsCount = this._dataView.getUint32(8, this._littleEndian);
this._originalOffset = this._dataView.getUint32(12, this._littleEndian);
this._translationOffset = this._dataView.getUint32(16, this._littleEndian);
/* Parse header for info, and use it to create the Jed locale_data
object 'header'. */
var jedLocaleData = this._parseHeader();
/* Create a TextDecoder for encoding conversion. */
try {
var decoder = new TextDecoder(this._encoding);
} catch(e) {
throw new Error("The encoding label provided ('" + this._encoding + "') is invalid.");
}
/* Now get translations. */
var originalOffset = this._originalOffset + 8;
var translationOffset = this._translationOffset + 8;
for (var i = 1; i < stringsCount; ++i) {
var msgBytes = this._readTranslationPair(originalOffset, translationOffset);
var msg = this._splitPlurals( decoder.decode(msgBytes.id), decoder.decode(msgBytes.str) );
jedLocaleData[msg.id] = [].concat(msg.str);
originalOffset += 8;
translationOffset += 8;
}
return jedLocaleData;
}
return {
mo: {
parse: function(buffer, options) {
/* Leave the encoding undefined if no options are given. */
options = options || { domain: 'messages' };
options.domain = options.domain || 'messages';
if (buffer && buffer.byteLength == 0) {
throw new Error('Given ArrayBuffer is empty.');
}
if (!buffer || Object.prototype.toString.call(buffer) != '[object ArrayBuffer]') {
throw new Error('First argument must be an ArrayBuffer.');
}
var parser = new Parser();
var messages = parser.parse(buffer, options.encoding);
messages[''].domain = options.domain;
var locale_data = {};
locale_data[options.domain] = messages;
return locale_data;
}
}
};
}));