diff --git a/lib/codecs.js b/lib/codecs.js index 9a37484a..ad7e93fb 100644 --- a/lib/codecs.js +++ b/lib/codecs.js @@ -1,9 +1,87 @@ +'use strict'; + +/* eslint-disable new-cap */ + var iconv = require('iconv-lite'); +var through = require('through2'); + +var DEFAULT_ENCODING = require('./constants').DEFAULT_ENCODING; + +function Codec(codec, encoding) { + this.codec = codec; + this.enc = codec.enc || encoding; + this.bomAware = codec.bomAware || false; +} + -var cache = { - utf8: iconv.getCodec('utf8'), +function getEncoder(codec) { + return new codec.encoder(null, codec); +} + +Codec.prototype.encode = function(str) { + var encoder = getEncoder(this.codec); + var buf = encoder.write(str); + var end = encoder.end(); + return end && end.length > 0 ? Buffer.concat(buf, end) : buf; }; +Codec.prototype.encodeStream = function() { + var encoder = getEncoder(this.codec); + return through( + { decodeStrings: false }, + function(str, enc, cb) { + var buf = encoder.write(str); + if (buf && buf.length) { + this.push(buf); + } + cb(); + }, + function(cb) { + var buf = encoder.end(); + if (buf && buf.length) { + this.push(buf); + } + cb(); + } + ); +}; + + +function getDecoder(codec) { + return new codec.decoder(null, codec); +} + +Codec.prototype.decode = function(buf) { + var decoder = getDecoder(this.codec); + var str = decoder.write(buf); + var end = decoder.end(); + return end ? str + end : str; +}; + +Codec.prototype.decodeStream = function() { + var decoder = getDecoder(this.codec); + return through( + { encoding: DEFAULT_ENCODING }, + function(buf, enc, cb) { + var str = decoder.write(buf); + if (str && str.length) { + this.push(str, DEFAULT_ENCODING); + } + cb(); + }, + function(cb) { + var str = decoder.end(); + if (str && str.length) { + this.push(str, DEFAULT_ENCODING); + } + cb(); + } + ); +}; + + +var cache = {}; + function getCodec(encoding) { var codec = cache[encoding]; if (!!codec || !encoding || cache.hasOwnProperty(encoding)) { @@ -11,7 +89,7 @@ function getCodec(encoding) { } try { - codec = iconv.getCodec(encoding); + codec = new Codec(iconv.getCodec(encoding), encoding); } catch (err) { // Unsupported codec } @@ -20,4 +98,8 @@ function getCodec(encoding) { return codec; } + +// Pre-load default encoding +getCodec(DEFAULT_ENCODING); + module.exports = getCodec; diff --git a/lib/constants.js b/lib/constants.js index f0c28256..db60b770 100644 --- a/lib/constants.js +++ b/lib/constants.js @@ -3,4 +3,5 @@ module.exports = { MASK_MODE: parseInt('7777', 8), DEFAULT_FILE_MODE: parseInt('0666', 8), + DEFAULT_ENCODING: 'utf8', }; diff --git a/lib/dest/options.js b/lib/dest/options.js index 33ac7723..d824e979 100644 --- a/lib/dest/options.js +++ b/lib/dest/options.js @@ -1,5 +1,7 @@ 'use strict'; +var DEFAULT_ENCODING = require('../constants').DEFAULT_ENCODING; + var config = { cwd: { type: 'string', @@ -27,7 +29,7 @@ var config = { }, encoding: { type: ['string', 'boolean'], - default: 'utf8', + default: DEFAULT_ENCODING, }, sourcemaps: { type: ['string', 'boolean'], diff --git a/lib/dest/write-contents/write-buffer.js b/lib/dest/write-contents/write-buffer.js index fb75177c..c72d0c13 100644 --- a/lib/dest/write-contents/write-buffer.js +++ b/lib/dest/write-contents/write-buffer.js @@ -1,9 +1,8 @@ 'use strict'; -var iconv = require('iconv-lite'); - var fo = require('../../file-operations'); var getCodec = require('../../codecs'); +var DEFAULT_ENCODING = require('../../constants').DEFAULT_ENCODING; function writeBuffer(file, optResolver, onWritten) { var encoding = optResolver.resolve('encoding', file); @@ -19,9 +18,9 @@ function writeBuffer(file, optResolver, onWritten) { var contents = file.contents; - if (encoding && codec.enc !== 'utf8') { - contents = iconv.decode(contents, 'utf8', { stripBOM: false }); - contents = iconv.encode(contents, encoding); + if (encoding && codec.enc !== DEFAULT_ENCODING) { + contents = getCodec(DEFAULT_ENCODING).decode(contents); + contents = codec.encode(contents); } fo.writeFile(file.path, contents, opt, onWriteFile); diff --git a/lib/dest/write-contents/write-stream.js b/lib/dest/write-contents/write-stream.js index 87957400..561ae366 100644 --- a/lib/dest/write-contents/write-stream.js +++ b/lib/dest/write-contents/write-stream.js @@ -1,9 +1,8 @@ 'use strict'; -var iconv = require('iconv-lite'); - var fo = require('../../file-operations'); var getCodec = require('../../codecs'); +var DEFAULT_ENCODING = require('../../constants').DEFAULT_ENCODING; var readStream = require('../../src/read-contents/read-stream'); function writeStream(file, optResolver, onWritten) { @@ -24,10 +23,10 @@ function writeStream(file, optResolver, onWritten) { var contents = file.contents; - if (encoding && encoding.enc !== 'utf8') { + if (encoding && encoding.enc !== DEFAULT_ENCODING) { contents = contents - .pipe(iconv.decodeStream('utf8', { stripBOM: false })) - .pipe(iconv.encodeStream(encoding)); + .pipe(getCodec(DEFAULT_ENCODING).decodeStream()) + .pipe(codec.encodeStream()); } file.contents.once('error', onComplete); diff --git a/lib/src/options.js b/lib/src/options.js index d7e9b9ba..e6b090c6 100644 --- a/lib/src/options.js +++ b/lib/src/options.js @@ -1,5 +1,7 @@ 'use strict'; +var DEFAULT_ENCODING = require('../constants').DEFAULT_ENCODING; + var config = { buffer: { type: 'boolean', @@ -18,7 +20,7 @@ var config = { }, encoding: { type: ['string', 'boolean'], - default: 'utf8', + default: DEFAULT_ENCODING, }, sourcemaps: { type: 'boolean', diff --git a/lib/src/read-contents/read-buffer.js b/lib/src/read-contents/read-buffer.js index 1561a45a..a38b9ce1 100644 --- a/lib/src/read-contents/read-buffer.js +++ b/lib/src/read-contents/read-buffer.js @@ -1,10 +1,10 @@ 'use strict'; var fs = require('graceful-fs'); -var iconv = require('iconv-lite'); var removeBomBuffer = require('remove-bom-buffer'); var getCodec = require('../../codecs'); +var DEFAULT_ENCODING = require('../../constants').DEFAULT_ENCODING; function bufferFile(file, optResolver, onRead) { var encoding = optResolver.resolve('encoding', file); @@ -23,10 +23,10 @@ function bufferFile(file, optResolver, onRead) { if (encoding) { var removeBOM = codec.bomAware && optResolver.resolve('removeBOM', file); - if (codec.enc !== 'utf8') { - contents = iconv.decode(contents, encoding, { stripBOM: false }); + if (codec.enc !== DEFAULT_ENCODING) { + contents = codec.decode(contents); removeBOM = removeBOM && contents[0] === '\ufeff'; - contents = iconv.encode(contents, 'utf8'); + contents = getCodec(DEFAULT_ENCODING).encode(contents); } if (removeBOM) { diff --git a/lib/src/read-contents/read-stream.js b/lib/src/read-contents/read-stream.js index e1f900d8..108e8738 100644 --- a/lib/src/read-contents/read-stream.js +++ b/lib/src/read-contents/read-stream.js @@ -1,11 +1,11 @@ 'use strict'; var fs = require('graceful-fs'); -var iconv = require('iconv-lite'); var removeBomStream = require('remove-bom-stream'); var lazystream = require('lazystream'); var getCodec = require('../../codecs'); +var DEFAULT_ENCODING = require('../../constants').DEFAULT_ENCODING; function streamFile(file, optResolver, onRead) { var encoding = optResolver.resolve('encoding', file); @@ -22,10 +22,10 @@ function streamFile(file, optResolver, onRead) { if (encoding) { var removeBOM = codec.bomAware && optResolver.resolve('removeBOM', file); - if (codec.enc !== 'utf8') { + if (codec.enc !== DEFAULT_ENCODING) { contents = contents - .pipe(iconv.decodeStream(encoding, { stripBOM: false })) - .pipe(iconv.encodeStream('utf8')); + .pipe(codec.decodeStream()) + .pipe(getCodec(DEFAULT_ENCODING).encodeStream()); } if (removeBOM) { diff --git a/test/codecs.js b/test/codecs.js index 0b23ccf7..0222e05a 100644 --- a/test/codecs.js +++ b/test/codecs.js @@ -1,8 +1,24 @@ 'use strict'; +var fs = require('graceful-fs'); var expect = require('expect'); +var miss = require('mississippi'); + +var from = miss.from; +var pipe = miss.pipe; +var concat = miss.concat; var getCodec = require('../lib/codecs'); +var DEFAULT_ENCODING = require('../lib/constants').DEFAULT_ENCODING; + +var testCodec = require('./utils/codecs'); +var testConstants = require('./utils/test-constants'); + +var beNotBomInputPath = testConstants.beNotBomInputPath; +var leNotBomInputPath = testConstants.leNotBomInputPath; +var notBomContents = testConstants.notBomContents; +var encodedInputPath = testConstants.encodedInputPath; +var encodedContents = testConstants.encodedContents; describe('codecs', function() { @@ -11,51 +27,201 @@ describe('codecs', function() { done(); }); - it('returns an object for \'utf8\'', function(done) { - var codec = getCodec('utf8'); - expect(typeof codec).toEqual('object'); + it('returns undefined for unsupported encoding', function(done) { + var codec = getCodec('fubar42'); + expect(codec).toBe(undefined); done(); }); - it('returns an object for \'utf8\' with an enc property', function(done) { - var codec = getCodec('utf8'); - expect(codec.enc).toEqual('utf8'); + it('returns a proper codec for default encoding ' + DEFAULT_ENCODING, function(done) { + var codec = getCodec(DEFAULT_ENCODING); + testCodec(codec); + expect(codec.enc).toEqual(DEFAULT_ENCODING); + expect(codec.bomAware).toBe(true); done(); }); - it('returns an object for \'utf8\' with a bomAware property', function(done) { - var codec = getCodec('utf8'); - expect(codec.bomAware).toEqual(true); + it('returns a proper codec for utf16be', function(done) { + var codec = getCodec('utf16be'); + testCodec(codec); + expect(codec.bomAware).toBe(true); done(); }); - it('returns an object for \'utf16be\'', function(done) { + it('can decode bytes from utf16be encoding to a string (buffer)', function(done) { var codec = getCodec('utf16be'); - expect(typeof codec).toEqual('object'); + var expected = notBomContents.replace('X', 'BE'); + + var result = codec.decode(fs.readFileSync(beNotBomInputPath)); + expect(result).toExist(); + expect(typeof result).toEqual('string'); + expect(result.slice(2)).toEqual(expected); // Ignore leading garbage done(); }); - it('returns an object for \'utf16be\' with a bomAware property', function(done) { + it('can decode bytes from utf16be encoding to a string (stream)', function(done) { var codec = getCodec('utf16be'); - expect(codec.bomAware).toEqual(true); + var expected = notBomContents.replace('X', 'BE'); + + function assert(result) { + expect(result).toExist(); + expect(typeof result).toEqual('string'); + expect(result.slice(2)).toEqual(expected); // Ignore leading garbage + } + + pipe([ + fs.createReadStream(beNotBomInputPath), + codec.decodeStream(), + concat(assert), + ], done); + }); + + it('can encode a string to bytes in utf16be encoding (buffer)', function(done) { + var codec = getCodec('utf16be'); + var expected = fs.readFileSync(beNotBomInputPath); + + var result = codec.encode(notBomContents.replace('X', 'BE')); + expect(result).toExist(); + expect(typeof result).toEqual('object'); + expect(Buffer.isBuffer(result)).toBe(true); + expect(result).toMatch(expected.slice(4)); // Ignore leading garbage done(); }); - it('returns an object for \'utf16le\'', function(done) { + it('can encode a string to bytes in utf16be encoding (stream)', function(done) { + var codec = getCodec('utf16be'); + var expected = fs.readFileSync(beNotBomInputPath); + + function assert(result) { + expect(result).toExist(); + expect(typeof result).toEqual('object'); + expect(Buffer.isBuffer(result)).toBe(true); + expect(result).toMatch(expected.slice(4)); // Ignore leading garbage + } + + pipe([ + from.obj([notBomContents.replace('X', 'BE')]), + codec.encodeStream(), + concat(assert), + ], done); + }); + + it('returns a proper codec for utf16le', function(done) { var codec = getCodec('utf16le'); - expect(typeof codec).toEqual('object'); + testCodec(codec); + expect(codec.bomAware).toBe(true); done(); }); - it('returns an object for \'utf16le\' with a bomAware property', function(done) { + it('can decode bytes from utf16le encoding to a string (buffer)', function(done) { var codec = getCodec('utf16le'); - expect(codec.bomAware).toEqual(true); + var expected = notBomContents.replace('X', 'LE'); + + var result = codec.decode(fs.readFileSync(leNotBomInputPath)); + expect(result).toExist(); + expect(typeof result).toEqual('string'); + expect(result.slice(2)).toEqual(expected); // Ignore leading garbage done(); }); - it('returns undefined for unsupported encoding', function(done) { - var codec = getCodec('fubar42'); - expect(codec).toNotExist(); + it('can decode bytes from utf16le encoding to a string (stream)', function(done) { + var codec = getCodec('utf16le'); + var expected = notBomContents.replace('X', 'LE'); + + function assert(result) { + expect(result).toExist(); + expect(typeof result).toEqual('string'); + expect(result.slice(2)).toEqual(expected); // Ignore leading garbage + } + + pipe([ + fs.createReadStream(leNotBomInputPath), + codec.decodeStream(), + concat(assert), + ], done); + }); + + it('can encode a string to bytes in utf16le encoding (buffer)', function(done) { + var codec = getCodec('utf16le'); + var expected = fs.readFileSync(leNotBomInputPath); + + var result = codec.encode(notBomContents.replace('X', 'LE')); + expect(result).toExist(); + expect(typeof result).toEqual('object'); + expect(Buffer.isBuffer(result)).toBe(true); + expect(result).toMatch(expected.slice(4)); // Ignore leading garbage + done(); + }); + + it('can encode a string to bytes in utf16le encoding (stream)', function(done) { + var codec = getCodec('utf16le'); + var expected = fs.readFileSync(leNotBomInputPath); + + function assert(result) { + expect(result).toExist(); + expect(typeof result).toEqual('object'); + expect(Buffer.isBuffer(result)).toBe(true); + expect(result).toMatch(expected.slice(4)); // Ignore leading garbage + } + + pipe([ + from.obj([notBomContents.replace('X', 'LE')]), + codec.encodeStream(), + concat(assert), + ], done); + }); + + it('returns a proper codec for gb2312', function(done) { + var codec = getCodec('gb2312'); + testCodec(codec); + done(); + }); + + it('can decode bytes from gb2312 encoding to a string (buffer)', function(done) { + var codec = getCodec('gb2312'); + var expected = encodedContents; + + var result = codec.decode(fs.readFileSync(encodedInputPath)); + expect(result).toEqual(expected); done(); }); + + it('can decode bytes from gb2312 encoding to a string (stream)', function(done) { + var codec = getCodec('gb2312'); + var expected = encodedContents; + + function assert(result) { + expect(result).toEqual(expected); + } + + pipe([ + fs.createReadStream(encodedInputPath), + codec.decodeStream(), + concat(assert), + ], done); + }); + + it('can encode a string to bytes in gb2312 encoding (buffer)', function(done) { + var codec = getCodec('gb2312'); + var expected = fs.readFileSync(encodedInputPath); + + var result = codec.encode(encodedContents); + expect(result).toMatch(expected); + done(); + }); + + it('can encode a string to bytes in gb2312 encoding (stream)', function(done) { + var codec = getCodec('gb2312'); + var expected = fs.readFileSync(encodedInputPath); + + function assert(result) { + expect(result).toMatch(expected); + } + + pipe([ + from.obj([encodedContents]), + codec.encodeStream(), + concat(assert), + ], done); + }); }); diff --git a/test/fixtures/not-bom-utf16be.txt b/test/fixtures/not-bom-utf16be.txt index b9dce78a..54d2ef62 100644 Binary files a/test/fixtures/not-bom-utf16be.txt and b/test/fixtures/not-bom-utf16be.txt differ diff --git a/test/fixtures/not-bom-utf16le.txt b/test/fixtures/not-bom-utf16le.txt index 07cc600c..8e94b0bc 100644 Binary files a/test/fixtures/not-bom-utf16le.txt and b/test/fixtures/not-bom-utf16le.txt differ diff --git a/test/utils/codecs.js b/test/utils/codecs.js new file mode 100644 index 00000000..d959a856 --- /dev/null +++ b/test/utils/codecs.js @@ -0,0 +1,16 @@ +'use strict'; + +var expect = require('expect'); + +function checkCodec(codec) { + expect(typeof codec).toEqual('object'); + expect(codec.constructor.name).toEqual('Codec'); + expect(typeof codec.enc).toEqual('string'); + expect(typeof codec.bomAware).toEqual('boolean'); + expect(typeof codec.encode).toEqual('function'); + expect(typeof codec.encodeStream).toEqual('function'); + expect(typeof codec.decode).toEqual('function'); + expect(typeof codec.decodeStream).toEqual('function'); +} + +module.exports = checkCodec; diff --git a/test/utils/test-constants.js b/test/utils/test-constants.js index 2fd0b5fa..3d3099cc 100644 --- a/test/utils/test-constants.js +++ b/test/utils/test-constants.js @@ -29,6 +29,7 @@ var leBomInputPath = path.join(inputBase, './bom-utf16le.txt'); var bomContents = 'This file is saved as UTF-X with the appropriate BOM.\n'; var beNotBomInputPath = path.join(inputBase, './not-bom-utf16be.txt'); var leNotBomInputPath = path.join(inputBase, './not-bom-utf16le.txt'); +var notBomContents = 'This file is saved as UTF-16-X. It contains some garbage at the start that looks like a UTF-8-encoded BOM (but isn\'t).\n'; var ranBomInputPath = path.join(inputBase, './ranbom.bin'); // Used for encoding tests var encodedInputPath = path.join(inputBase, './enc-gb2312.txt'); @@ -69,6 +70,7 @@ module.exports = { leBomInputPath: leBomInputPath, beNotBomInputPath: beNotBomInputPath, leNotBomInputPath: leNotBomInputPath, + notBomContents: notBomContents, ranBomInputPath: ranBomInputPath, bomContents: bomContents, encodedInputPath: encodedInputPath,