From bf281cfdfa3f91fcac8a117c5bdf5c465a6d8fa0 Mon Sep 17 00:00:00 2001 From: Trevor Norris Date: Thu, 2 Jun 2016 10:55:36 -0600 Subject: [PATCH 1/2] buffer: introduce latin1 encoding term When node began using the OneByte API (f150d56) it also switched to officially supporting ISO-8859-1. Though at the time no new encoding string was introduced. Introduce the new encoding string 'latin1' to be more explicit. The previous 'binary' and documented as an alias to 'latin1'. While many tests have switched to use 'latin1', there are still plenty that do both 'binary' and 'latin1' checks side-by-side to ensure there is no regression. --- doc/api/buffer.md | 16 ++++- doc/api/crypto.md | 68 +++++++++---------- lib/_http_outgoing.js | 10 +-- lib/_tls_wrap.js | 2 +- lib/buffer.js | 9 ++- lib/internal/streams/lazy_transform.js | 2 +- lib/net.js | 3 +- lib/string_decoder.js | 3 +- src/node.cc | 12 +++- src/node.h | 18 ++--- src/node_buffer.cc | 14 ++-- src/stream_base-inl.h | 4 +- src/stream_base.cc | 2 +- src/string_bytes.cc | 14 ++-- .../test-stringbytes-external-at-max.js | 2 +- ...ngbytes-external-exceed-max-by-1-binary.js | 6 +- test/disabled/tls_server.js | 2 +- test/parallel/test-buffer-alloc.js | 13 ++-- test/parallel/test-buffer-bytelength.js | 5 +- test/parallel/test-buffer-fill.js | 21 ++++++ test/parallel/test-buffer-includes.js | 8 +++ test/parallel/test-buffer-indexof.js | 22 ++++++ test/parallel/test-buffer.js | 25 ++++++- test/parallel/test-crypto-binary-default.js | 10 +-- test/parallel/test-crypto-dh.js | 6 +- test/parallel/test-crypto-hash.js | 8 +-- test/parallel/test-crypto-padding-aes256.js | 4 +- test/parallel/test-crypto-padding.js | 2 +- test/parallel/test-crypto-pbkdf2.js | 4 +- test/parallel/test-crypto-sign-verify.js | 10 +-- test/parallel/test-file-read-noexist.js | 2 +- .../parallel/test-fs-write-stream-encoding.js | 2 +- test/parallel/test-http-status-message.js | 2 +- test/parallel/test-net-binary.js | 8 +-- test/parallel/test-stream-writev.js | 4 +- test/parallel/test-stream2-writable.js | 4 +- test/parallel/test-stringbytes-external.js | 23 +++++-- 37 files changed, 246 insertions(+), 124 deletions(-) diff --git a/doc/api/buffer.md b/doc/api/buffer.md index f20f80c747da77..354fed25d586ec 100644 --- a/doc/api/buffer.md +++ b/doc/api/buffer.md @@ -165,12 +165,22 @@ The character encodings currently supported by Node.js include: this encoding will also correctly accept "URL and Filename Safe Alphabet" as specified in [RFC 4648, Section 5]. -* `'binary'` - A way of encoding the buffer into a one-byte (`latin-1`) - encoded string. The string `'latin-1'` is not supported. Instead, pass - `'binary'` to use `'latin-1'` encoding. +* `'latin1'` - A way of encoding the buffer into a one-byte encoded string + (as defined by the IANA in [RFC1345](https://tools.ietf.org/html/rfc1345), + page 63, to be the Latin-1 supplement block and C0/C1 control codes). + +* `'binary'` - (deprecated) A way of encoding the buffer into a one-byte + (`latin1`) encoded string. * `'hex'` - Encode each byte as two hexadecimal characters. +_Note_: Today's browsers follow the [WHATWG +spec](https://encoding.spec.whatwg.org/) that aliases both `latin1` and +`iso-8859-1` to `win-1252`. Meaning, while doing something like `http.get()`, +if the returned charset is one of those listed in the WHATWG spec it's possible +that the server actually returned `win-1252` encoded data, and using `latin1` +encoding may incorrectly decode the graphical characters. + ## Buffers and TypedArray Buffers are also `Uint8Array` TypedArray instances. However, there are subtle diff --git a/doc/api/crypto.md b/doc/api/crypto.md index e195c98e322746..759fe124410c79 100644 --- a/doc/api/crypto.md +++ b/doc/api/crypto.md @@ -160,7 +160,7 @@ console.log(encrypted); ### cipher.final([output_encoding]) Returns any remaining enciphered contents. If `output_encoding` -parameter is one of `'binary'`, `'base64'` or `'hex'`, a string is returned. +parameter is one of `'latin1'`, `'base64'` or `'hex'`, a string is returned. If an `output_encoding` is not provided, a [`Buffer`][] is returned. Once the `cipher.final()` method has been called, the `Cipher` object can no @@ -198,13 +198,13 @@ The `cipher.setAutoPadding()` method must be called before [`cipher.final()`][]. ### cipher.update(data[, input_encoding][, output_encoding]) Updates the cipher with `data`. If the `input_encoding` argument is given, -it's value must be one of `'utf8'`, `'ascii'`, or `'binary'` and the `data` +it's value must be one of `'utf8'`, `'ascii'`, or `'latin1'` and the `data` argument is a string using the specified encoding. If the `input_encoding` argument is not given, `data` must be a [`Buffer`][]. If `data` is a [`Buffer`][] then `input_encoding` is ignored. The `output_encoding` specifies the output format of the enciphered -data, and can be `'binary'`, `'base64'` or `'hex'`. If the `output_encoding` +data, and can be `'latin1'`, `'base64'` or `'hex'`. If the `output_encoding` is specified, a string using the specified encoding is returned. If no `output_encoding` is provided, a [`Buffer`][] is returned. @@ -277,7 +277,7 @@ console.log(decrypted); ### decipher.final([output_encoding]) Returns any remaining deciphered contents. If `output_encoding` -parameter is one of `'binary'`, `'base64'` or `'hex'`, a string is returned. +parameter is one of `'latin1'`, `'base64'` or `'hex'`, a string is returned. If an `output_encoding` is not provided, a [`Buffer`][] is returned. Once the `decipher.final()` method has been called, the `Decipher` object can @@ -313,13 +313,13 @@ The `decipher.setAutoPadding()` method must be called before ### decipher.update(data[, input_encoding][, output_encoding]) Updates the decipher with `data`. If the `input_encoding` argument is given, -it's value must be one of `'binary'`, `'base64'`, or `'hex'` and the `data` +it's value must be one of `'latin1'`, `'base64'`, or `'hex'` and the `data` argument is a string using the specified encoding. If the `input_encoding` argument is not given, `data` must be a [`Buffer`][]. If `data` is a [`Buffer`][] then `input_encoding` is ignored. The `output_encoding` specifies the output format of the enciphered -data, and can be `'binary'`, `'ascii'` or `'utf8'`. If the `output_encoding` +data, and can be `'latin1'`, `'ascii'` or `'utf8'`. If the `output_encoding` is specified, a string using the specified encoding is returned. If no `output_encoding` is provided, a [`Buffer`][] is returned. @@ -361,7 +361,7 @@ Computes the shared secret using `other_public_key` as the other party's public key and returns the computed shared secret. The supplied key is interpreted using the specified `input_encoding`, and secret is encoded using specified `output_encoding`. Encodings can be -`'binary'`, `'hex'`, or `'base64'`. If the `input_encoding` is not +`'latin1'`, `'hex'`, or `'base64'`. If the `input_encoding` is not provided, `other_public_key` is expected to be a [`Buffer`][]. If `output_encoding` is given a string is returned; otherwise, a @@ -371,45 +371,45 @@ If `output_encoding` is given a string is returned; otherwise, a Generates private and public Diffie-Hellman key values, and returns the public key in the specified `encoding`. This key should be -transferred to the other party. Encoding can be `'binary'`, `'hex'`, +transferred to the other party. Encoding can be `'latin1'`, `'hex'`, or `'base64'`. If `encoding` is provided a string is returned; otherwise a [`Buffer`][] is returned. ### diffieHellman.getGenerator([encoding]) Returns the Diffie-Hellman generator in the specified `encoding`, which can -be `'binary'`, `'hex'`, or `'base64'`. If `encoding` is provided a string is +be `'latin1'`, `'hex'`, or `'base64'`. If `encoding` is provided a string is returned; otherwise a [`Buffer`][] is returned. ### diffieHellman.getPrime([encoding]) Returns the Diffie-Hellman prime in the specified `encoding`, which can -be `'binary'`, `'hex'`, or `'base64'`. If `encoding` is provided a string is +be `'latin1'`, `'hex'`, or `'base64'`. If `encoding` is provided a string is returned; otherwise a [`Buffer`][] is returned. ### diffieHellman.getPrivateKey([encoding]) Returns the Diffie-Hellman private key in the specified `encoding`, -which can be `'binary'`, `'hex'`, or `'base64'`. If `encoding` is provided a +which can be `'latin1'`, `'hex'`, or `'base64'`. If `encoding` is provided a string is returned; otherwise a [`Buffer`][] is returned. ### diffieHellman.getPublicKey([encoding]) Returns the Diffie-Hellman public key in the specified `encoding`, which -can be `'binary'`, `'hex'`, or `'base64'`. If `encoding` is provided a +can be `'latin1'`, `'hex'`, or `'base64'`. If `encoding` is provided a string is returned; otherwise a [`Buffer`][] is returned. ### diffieHellman.setPrivateKey(private_key[, encoding]) Sets the Diffie-Hellman private key. If the `encoding` argument is provided -and is either `'binary'`, `'hex'`, or `'base64'`, `private_key` is expected +and is either `'latin1'`, `'hex'`, or `'base64'`, `private_key` is expected to be a string. If no `encoding` is provided, `private_key` is expected to be a [`Buffer`][]. ### diffieHellman.setPublicKey(public_key[, encoding]) Sets the Diffie-Hellman public key. If the `encoding` argument is provided -and is either `'binary'`, `'hex'` or `'base64'`, `public_key` is expected +and is either `'latin1'`, `'hex'` or `'base64'`, `public_key` is expected to be a string. If no `encoding` is provided, `public_key` is expected to be a [`Buffer`][]. @@ -460,7 +460,7 @@ Computes the shared secret using `other_public_key` as the other party's public key and returns the computed shared secret. The supplied key is interpreted using specified `input_encoding`, and the returned secret is encoded using the specified `output_encoding`. Encodings can be -`'binary'`, `'hex'`, or `'base64'`. If the `input_encoding` is not +`'latin1'`, `'hex'`, or `'base64'`. If the `input_encoding` is not provided, `other_public_key` is expected to be a [`Buffer`][]. If `output_encoding` is given a string will be returned; otherwise a @@ -476,14 +476,14 @@ The `format` arguments specifies point encoding and can be `'compressed'`, `'uncompressed'`, or `'hybrid'`. If `format` is not specified, the point will be returned in `'uncompressed'` format. -The `encoding` argument can be `'binary'`, `'hex'`, or `'base64'`. If +The `encoding` argument can be `'latin1'`, `'hex'`, or `'base64'`. If `encoding` is provided a string is returned; otherwise a [`Buffer`][] is returned. ### ecdh.getPrivateKey([encoding]) Returns the EC Diffie-Hellman private key in the specified `encoding`, -which can be `'binary'`, `'hex'`, or `'base64'`. If `encoding` is provided +which can be `'latin1'`, `'hex'`, or `'base64'`. If `encoding` is provided a string is returned; otherwise a [`Buffer`][] is returned. ### ecdh.getPublicKey([encoding[, format]]) @@ -495,13 +495,13 @@ The `format` argument specifies point encoding and can be `'compressed'`, `'uncompressed'`, or `'hybrid'`. If `format` is not specified the point will be returned in `'uncompressed'` format. -The `encoding` argument can be `'binary'`, `'hex'`, or `'base64'`. If +The `encoding` argument can be `'latin1'`, `'hex'`, or `'base64'`. If `encoding` is specified, a string is returned; otherwise a [`Buffer`][] is returned. ### ecdh.setPrivateKey(private_key[, encoding]) -Sets the EC Diffie-Hellman private key. The `encoding` can be `'binary'`, +Sets the EC Diffie-Hellman private key. The `encoding` can be `'latin1'`, `'hex'` or `'base64'`. If `encoding` is provided, `private_key` is expected to be a string; otherwise `private_key` is expected to be a [`Buffer`][]. If `private_key` is not valid for the curve specified when the `ECDH` object was @@ -512,7 +512,7 @@ public point (key) is also generated and set in the ECDH object. Stability: 0 - Deprecated -Sets the EC Diffie-Hellman public key. Key encoding can be `'binary'`, +Sets the EC Diffie-Hellman public key. Key encoding can be `'latin1'`, `'hex'` or `'base64'`. If `encoding` is provided `public_key` is expected to be a string; otherwise a [`Buffer`][] is expected. @@ -604,7 +604,7 @@ console.log(hash.digest('hex')); ### hash.digest([encoding]) Calculates the digest of all of the data passed to be hashed (using the -[`hash.update()`][] method). The `encoding` can be `'hex'`, `'binary'` or +[`hash.update()`][] method). The `encoding` can be `'hex'`, `'latin1'` or `'base64'`. If `encoding` is provided a string will be returned; otherwise a [`Buffer`][] is returned. @@ -615,7 +615,7 @@ called. Multiple calls will cause an error to be thrown. Updates the hash content with the given `data`, the encoding of which is given in `input_encoding` and can be `'utf8'`, `'ascii'` or -`'binary'`. If `encoding` is not provided, and the `data` is a string, an +`'latin1'`. If `encoding` is not provided, and the `data` is a string, an encoding of `'utf8'` is enforced. If `data` is a [`Buffer`][] then `input_encoding` is ignored. @@ -678,7 +678,7 @@ console.log(hmac.digest('hex')); ### hmac.digest([encoding]) Calculates the HMAC digest of all of the data passed using [`hmac.update()`][]. -The `encoding` can be `'hex'`, `'binary'` or `'base64'`. If `encoding` is +The `encoding` can be `'hex'`, `'latin1'` or `'base64'`. If `encoding` is provided a string is returned; otherwise a [`Buffer`][] is returned; The `Hmac` object can not be used again after `hmac.digest()` has been @@ -688,7 +688,7 @@ called. Multiple calls to `hmac.digest()` will result in an error being thrown. Updates the `Hmac` content with the given `data`, the encoding of which is given in `input_encoding` and can be `'utf8'`, `'ascii'` or -`'binary'`. If `encoding` is not provided, and the `data` is a string, an +`'latin1'`. If `encoding` is not provided, and the `data` is a string, an encoding of `'utf8'` is enforced. If `data` is a [`Buffer`][] then `input_encoding` is ignored. @@ -768,7 +768,7 @@ object, it is interpreted as a hash containing two properties: * `key` : {String} - PEM encoded private key * `passphrase` : {String} - passphrase for the private key -The `output_format` can specify one of `'binary'`, `'hex'` or `'base64'`. If +The `output_format` can specify one of `'latin1'`, `'hex'` or `'base64'`. If `output_format` is provided a string is returned; otherwise a [`Buffer`][] is returned. @@ -779,7 +779,7 @@ called. Multiple calls to `sign.sign()` will result in an error being thrown. Updates the `Sign` content with the given `data`, the encoding of which is given in `input_encoding` and can be `'utf8'`, `'ascii'` or -`'binary'`. If `encoding` is not provided, and the `data` is a string, an +`'latin1'`. If `encoding` is not provided, and the `data` is a string, an encoding of `'utf8'` is enforced. If `data` is a [`Buffer`][] then `input_encoding` is ignored. @@ -831,7 +831,7 @@ console.log(verify.verify(public_key, signature)); Updates the `Verify` content with the given `data`, the encoding of which is given in `input_encoding` and can be `'utf8'`, `'ascii'` or -`'binary'`. If `encoding` is not provided, and the `data` is a string, an +`'latin1'`. If `encoding` is not provided, and the `data` is a string, an encoding of `'utf8'` is enforced. If `data` is a [`Buffer`][] then `input_encoding` is ignored. @@ -843,7 +843,7 @@ Verifies the provided data using the given `object` and `signature`. The `object` argument is a string containing a PEM encoded object, which can be one an RSA public key, a DSA public key, or an X.509 certificate. The `signature` argument is the previously calculated signature for the data, in -the `signature_format` which can be `'binary'`, `'hex'` or `'base64'`. +the `signature_format` which can be `'latin1'`, `'hex'` or `'base64'`. If a `signature_format` is specified, the `signature` is expected to be a string; otherwise `signature` is expected to be a [`Buffer`][]. @@ -869,7 +869,7 @@ or [buffers][`Buffer`]. The default value is `'buffer'`, which makes methods default to [`Buffer`][] objects. The `crypto.DEFAULT_ENCODING` mechanism is provided for backwards compatibility -with legacy programs that expect `'binary'` to be the default encoding. +with legacy programs that expect `'latin1'` to be the default encoding. New applications should expect the default to be `'buffer'`. This property may become deprecated in a future Node.js release. @@ -889,7 +889,7 @@ recent OpenSSL releases, `openssl list-cipher-algorithms` will display the available cipher algorithms. The `password` is used to derive the cipher key and initialization vector (IV). -The value must be either a `'binary'` encoded string or a [`Buffer`][]. +The value must be either a `'latin1'` encoded string or a [`Buffer`][]. The implementation of `crypto.createCipher()` derives keys using the OpenSSL function [`EVP_BytesToKey`][] with the digest algorithm set to MD5, one @@ -913,7 +913,7 @@ recent OpenSSL releases, `openssl list-cipher-algorithms` will display the available cipher algorithms. The `key` is the raw key used by the `algorithm` and `iv` is an -[initialization vector][]. Both arguments must be `'binary'` encoded strings or +[initialization vector][]. Both arguments must be `'latin1'` encoded strings or [buffers][`Buffer`]. ### crypto.createCredentials(details) @@ -968,7 +968,7 @@ recent OpenSSL releases, `openssl list-cipher-algorithms` will display the available cipher algorithms. The `key` is the raw key used by the `algorithm` and `iv` is an -[initialization vector][]. Both arguments must be `'binary'` encoded strings or +[initialization vector][]. Both arguments must be `'latin1'` encoded strings or [buffers][`Buffer`]. ### crypto.createDiffieHellman(prime[, prime_encoding][, generator][, generator_encoding]) @@ -979,7 +979,7 @@ optional specific `generator`. The `generator` argument can be a number, string, or [`Buffer`][]. If `generator` is not specified, the value `2` is used. -The `prime_encoding` and `generator_encoding` arguments can be `'binary'`, +The `prime_encoding` and `generator_encoding` arguments can be `'latin1'`, `'hex'`, or `'base64'`. If `prime_encoding` is specified, `prime` is expected to be a string; otherwise @@ -1345,7 +1345,7 @@ unified Stream API, and before there were [`Buffer`][] objects for handling binary data. As such, the many of the `crypto` defined classes have methods not typically found on other Node.js classes that implement the [streams][stream] API (e.g. `update()`, `final()`, or `digest()`). Also, many methods accepted -and returned `'binary'` encoded strings by default rather than Buffers. This +and returned `'latin1'` encoded strings by default rather than Buffers. This default was changed after Node.js v0.8 to use [`Buffer`][] objects by default instead. diff --git a/lib/_http_outgoing.js b/lib/_http_outgoing.js index e1e78e010cfd62..18a610feab9c4c 100644 --- a/lib/_http_outgoing.js +++ b/lib/_http_outgoing.js @@ -130,7 +130,7 @@ OutgoingMessage.prototype._send = function(data, encoding, callback) { data = this._header + data; } else { this.output.unshift(this._header); - this.outputEncodings.unshift('binary'); + this.outputEncodings.unshift('latin1'); this.outputCallbacks.unshift(null); this.outputSize += this._header.length; if (typeof this._onPendingData === 'function') @@ -453,7 +453,7 @@ OutgoingMessage.prototype.write = function(chunk, encoding, callback) { if (typeof chunk === 'string' && encoding !== 'hex' && encoding !== 'base64' && - encoding !== 'binary') { + encoding !== 'latin1') { len = Buffer.byteLength(chunk, encoding); chunk = len.toString(16) + CRLF + chunk + CRLF; ret = this._send(chunk, encoding, callback); @@ -468,7 +468,7 @@ OutgoingMessage.prototype.write = function(chunk, encoding, callback) { this.connection.cork(); process.nextTick(connectionCorkNT, this.connection); } - this._send(len.toString(16), 'binary', null); + this._send(len.toString(16), 'latin1', null); this._send(crlf_buf, null, null); this._send(chunk, encoding, null); ret = this._send(crlf_buf, null, callback); @@ -582,10 +582,10 @@ OutgoingMessage.prototype.end = function(data, encoding, callback) { } if (this._hasBody && this.chunkedEncoding) { - ret = this._send('0\r\n' + this._trailer + '\r\n', 'binary', finish); + ret = this._send('0\r\n' + this._trailer + '\r\n', 'latin1', finish); } else { // Force a flush, HACK. - ret = this._send('', 'binary', finish); + ret = this._send('', 'latin1', finish); } if (this.connection && data) diff --git a/lib/_tls_wrap.js b/lib/_tls_wrap.js index b7669532406491..c2ecd07a4b77bc 100644 --- a/lib/_tls_wrap.js +++ b/lib/_tls_wrap.js @@ -608,7 +608,7 @@ TLSSocket.prototype.setServername = function(name) { TLSSocket.prototype.setSession = function(session) { if (typeof session === 'string') - session = Buffer.from(session, 'binary'); + session = Buffer.from(session, 'latin1'); this._handle.setSession(session); }; diff --git a/lib/buffer.js b/lib/buffer.js index d3cbdf66a3a21d..ab012c96b5611f 100644 --- a/lib/buffer.js +++ b/lib/buffer.js @@ -331,6 +331,7 @@ Buffer.isEncoding = function(encoding) { case 'utf8': case 'utf-8': case 'ascii': + case 'latin1': case 'binary': case 'base64': case 'ucs2': @@ -408,6 +409,7 @@ function byteLength(string, encoding) { for (;;) { switch (encoding) { case 'ascii': + case 'latin1': case 'binary': return len; @@ -509,8 +511,9 @@ function slowToString(encoding, start, end) { case 'ascii': return this.asciiSlice(start, end); + case 'latin1': case 'binary': - return this.binarySlice(start, end); + return this.latin1Slice(start, end); case 'base64': return this.base64Slice(start, end); @@ -658,6 +661,7 @@ function slowIndexOf(buffer, val, byteOffset, encoding, dir) { case 'ucs-2': case 'utf16le': case 'utf-16le': + case 'latin1': case 'binary': return binding.indexOfString(buffer, val, byteOffset, encoding, dir); @@ -801,8 +805,9 @@ Buffer.prototype.write = function(string, offset, length, encoding) { case 'ascii': return this.asciiWrite(string, offset, length); + case 'latin1': case 'binary': - return this.binaryWrite(string, offset, length); + return this.latin1Write(string, offset, length); case 'base64': // Warning: maxLength not taken into account in base64Write diff --git a/lib/internal/streams/lazy_transform.js b/lib/internal/streams/lazy_transform.js index 7e290b16cf5cae..bd68bef4b6dd17 100644 --- a/lib/internal/streams/lazy_transform.js +++ b/lib/internal/streams/lazy_transform.js @@ -22,7 +22,7 @@ util.inherits(LazyTransform, stream.Transform); get: function() { stream.Transform.call(this, this._options); this._writableState.decodeStrings = false; - this._writableState.defaultEncoding = 'binary'; + this._writableState.defaultEncoding = 'latin1'; return this[prop]; }, set: function(val) { diff --git a/lib/net.js b/lib/net.js index 509112f58f2077..d64a264f965e71 100644 --- a/lib/net.js +++ b/lib/net.js @@ -726,8 +726,9 @@ Socket.prototype._write = function(data, encoding, cb) { function createWriteReq(req, handle, data, encoding) { switch (encoding) { + case 'latin1': case 'binary': - return handle.writeBinaryString(req, data); + return handle.writeLatin1String(req, data); case 'buffer': return handle.writeBuffer(req, data); diff --git a/lib/string_decoder.js b/lib/string_decoder.js index aaadfd89341a8c..6eb71efc07d803 100644 --- a/lib/string_decoder.js +++ b/lib/string_decoder.js @@ -19,6 +19,7 @@ function normalizeEncoding(enc) { return 'utf16le'; case 'base64': case 'ascii': + case 'latin1': case 'binary': case 'hex': return enc; @@ -225,7 +226,7 @@ function base64End(buf) { return r; } -// Pass bytes on through for single-byte encodings (e.g. ascii, binary, hex) +// Pass bytes on through for single-byte encodings (e.g. ascii, latin1, hex) function simpleWrite(buf) { return buf.toString(this.encoding); } diff --git a/src/node.cc b/src/node.cc index a6f5325cb6cb58..da514787a3f607 100644 --- a/src/node.cc +++ b/src/node.cc @@ -1312,11 +1312,17 @@ enum encoding ParseEncoding(const char* encoding, return UCS2; } break; + case 'l': + // latin1 + if (encoding[1] == 'a') { + if (strncmp(encoding + 2, "tin1", 4) == 0) + return LATIN1; + } case 'b': // binary if (encoding[1] == 'i') { if (strncmp(encoding + 2, "nary", 4) == 0) - return BINARY; + return LATIN1; // buffer } else if (encoding[1] == 'u') { @@ -1346,6 +1352,8 @@ enum encoding ParseEncoding(const char* encoding, return UCS2; } else if (StringEqualNoCase(encoding, "utf-16le")) { return UCS2; + } else if (StringEqualNoCase(encoding, "latin1")) { + return LATIN1; } else if (StringEqualNoCase(encoding, "binary")) { return BINARY; } else if (StringEqualNoCase(encoding, "buffer")) { @@ -1389,7 +1397,7 @@ ssize_t DecodeBytes(Isolate* isolate, if (val->IsArray()) { fprintf(stderr, "'raw' encoding (array of integers) has been removed. " - "Use 'binary'.\n"); + "Use 'latin1'.\n"); UNREACHABLE(); return -1; } diff --git a/src/node.h b/src/node.h index c1c149cdc30eb9..d813b45a0c83b5 100644 --- a/src/node.h +++ b/src/node.h @@ -278,15 +278,15 @@ inline void NODE_SET_PROTOTYPE_METHOD(v8::Local recv, } #define NODE_SET_PROTOTYPE_METHOD node::NODE_SET_PROTOTYPE_METHOD -enum encoding {ASCII, UTF8, BASE64, UCS2, BINARY, HEX, BUFFER}; +enum encoding {ASCII, UTF8, BASE64, UCS2, LATIN1, BINARY, HEX, BUFFER}; NODE_EXTERN enum encoding ParseEncoding( v8::Isolate* isolate, v8::Local encoding_v, - enum encoding default_encoding = BINARY); + enum encoding default_encoding = LATIN1); NODE_DEPRECATED("Use ParseEncoding(isolate, ...)", inline enum encoding ParseEncoding( v8::Local encoding_v, - enum encoding default_encoding = BINARY) { + enum encoding default_encoding = LATIN1) { return ParseEncoding(v8::Isolate::GetCurrent(), encoding_v, default_encoding); }) @@ -302,7 +302,7 @@ NODE_DEPRECATED("Use FatalException(isolate, ...)", NODE_EXTERN v8::Local Encode(v8::Isolate* isolate, const char* buf, size_t len, - enum encoding encoding = BINARY); + enum encoding encoding = LATIN1); // The input buffer should be in host endianness. NODE_EXTERN v8::Local Encode(v8::Isolate* isolate, @@ -313,7 +313,7 @@ NODE_DEPRECATED("Use Encode(isolate, ...)", inline v8::Local Encode( const void* buf, size_t len, - enum encoding encoding = BINARY) { + enum encoding encoding = LATIN1) { v8::Isolate* isolate = v8::Isolate::GetCurrent(); if (encoding == UCS2) { assert(reinterpret_cast(buf) % sizeof(uint16_t) == 0 && @@ -327,11 +327,11 @@ NODE_DEPRECATED("Use Encode(isolate, ...)", // Returns -1 if the handle was not valid for decoding NODE_EXTERN ssize_t DecodeBytes(v8::Isolate* isolate, v8::Local, - enum encoding encoding = BINARY); + enum encoding encoding = LATIN1); NODE_DEPRECATED("Use DecodeBytes(isolate, ...)", inline ssize_t DecodeBytes( v8::Local val, - enum encoding encoding = BINARY) { + enum encoding encoding = LATIN1) { return DecodeBytes(v8::Isolate::GetCurrent(), val, encoding); }) @@ -340,12 +340,12 @@ NODE_EXTERN ssize_t DecodeWrite(v8::Isolate* isolate, char* buf, size_t buflen, v8::Local, - enum encoding encoding = BINARY); + enum encoding encoding = LATIN1); NODE_DEPRECATED("Use DecodeWrite(isolate, ...)", inline ssize_t DecodeWrite(char* buf, size_t buflen, v8::Local val, - enum encoding encoding = BINARY) { + enum encoding encoding = LATIN1) { return DecodeWrite(v8::Isolate::GetCurrent(), buf, buflen, val, encoding); }) diff --git a/src/node_buffer.cc b/src/node_buffer.cc index 388decac4ad5d0..2472c0bb82e2ae 100644 --- a/src/node_buffer.cc +++ b/src/node_buffer.cc @@ -491,8 +491,8 @@ void StringSlice(const FunctionCallbackInfo& args) { } -void BinarySlice(const FunctionCallbackInfo& args) { - StringSlice(args); +void Latin1Slice(const FunctionCallbackInfo& args) { + StringSlice(args); } @@ -692,8 +692,8 @@ void Base64Write(const FunctionCallbackInfo& args) { } -void BinaryWrite(const FunctionCallbackInfo& args) { - StringWrite(args); +void Latin1Write(const FunctionCallbackInfo& args) { + StringWrite(args); } @@ -1035,7 +1035,7 @@ void IndexOfString(const FunctionCallbackInfo& args) { needle_length, offset, is_forward); - } else if (enc == BINARY) { + } else if (enc == LATIN1) { uint8_t* needle_data = static_cast(malloc(needle_length)); if (needle_data == nullptr) { return args.GetReturnValue().Set(-1); @@ -1183,14 +1183,14 @@ void SetupBufferJS(const FunctionCallbackInfo& args) { env->SetMethod(proto, "asciiSlice", AsciiSlice); env->SetMethod(proto, "base64Slice", Base64Slice); - env->SetMethod(proto, "binarySlice", BinarySlice); + env->SetMethod(proto, "latin1Slice", Latin1Slice); env->SetMethod(proto, "hexSlice", HexSlice); env->SetMethod(proto, "ucs2Slice", Ucs2Slice); env->SetMethod(proto, "utf8Slice", Utf8Slice); env->SetMethod(proto, "asciiWrite", AsciiWrite); env->SetMethod(proto, "base64Write", Base64Write); - env->SetMethod(proto, "binaryWrite", BinaryWrite); + env->SetMethod(proto, "latin1Write", Latin1Write); env->SetMethod(proto, "hexWrite", HexWrite); env->SetMethod(proto, "ucs2Write", Ucs2Write); env->SetMethod(proto, "utf8Write", Utf8Write); diff --git a/src/stream_base-inl.h b/src/stream_base-inl.h index a34c01082de566..da636909b695f3 100644 --- a/src/stream_base-inl.h +++ b/src/stream_base-inl.h @@ -71,8 +71,8 @@ void StreamBase::AddMethods(Environment* env, "writeUcs2String", JSMethod >); env->SetProtoMethod(t, - "writeBinaryString", - JSMethod >); + "writeLatin1String", + JSMethod >); } diff --git a/src/stream_base.cc b/src/stream_base.cc index 8db127dff66a44..105c4ad45895c0 100644 --- a/src/stream_base.cc +++ b/src/stream_base.cc @@ -33,7 +33,7 @@ template int StreamBase::WriteString( const FunctionCallbackInfo& args); template int StreamBase::WriteString( const FunctionCallbackInfo& args); -template int StreamBase::WriteString( +template int StreamBase::WriteString( const FunctionCallbackInfo& args); diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 8a327deac2f439..6454e11323b45b 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -271,6 +271,7 @@ size_t StringBytes::Write(Isolate* isolate, switch (encoding) { case ASCII: + case LATIN1: case BINARY: if (is_extern && str->IsOneByte()) { memcpy(buf, data, nbytes); @@ -376,15 +377,17 @@ size_t StringBytes::StorageSize(Isolate* isolate, size_t data_size = 0; bool is_buffer = Buffer::HasInstance(val); - if (is_buffer && (encoding == BUFFER || encoding == BINARY)) { + if (is_buffer && + (encoding == BUFFER || encoding == BINARY || encoding == LATIN1)) { return Buffer::Length(val); } Local str = val->ToString(isolate); switch (encoding) { - case BINARY: case ASCII: + case LATIN1: + case BINARY: data_size = str->Length(); break; @@ -425,7 +428,8 @@ size_t StringBytes::Size(Isolate* isolate, size_t data_size = 0; bool is_buffer = Buffer::HasInstance(val); - if (is_buffer && (encoding == BUFFER || encoding == BINARY)) + if (is_buffer && + (encoding == BUFFER || encoding == BINARY || encoding == LATIN1)) return Buffer::Length(val); const char* data; @@ -435,8 +439,9 @@ size_t StringBytes::Size(Isolate* isolate, Local str = val->ToString(isolate); switch (encoding) { - case BINARY: case ASCII: + case LATIN1: + case BINARY: data_size = str->Length(); break; @@ -639,6 +644,7 @@ Local StringBytes::Encode(Isolate* isolate, buflen); break; + case LATIN1: case BINARY: if (buflen < EXTERN_APEX) val = OneByteString(isolate, buf, buflen); diff --git a/test/addons/stringbytes-external-exceed-max/test-stringbytes-external-at-max.js b/test/addons/stringbytes-external-exceed-max/test-stringbytes-external-at-max.js index 867aedf0f67239..ebf6a01a0a01d0 100644 --- a/test/addons/stringbytes-external-exceed-max/test-stringbytes-external-at-max.js +++ b/test/addons/stringbytes-external-exceed-max/test-stringbytes-external-at-max.js @@ -29,5 +29,5 @@ if (!binding.ensureAllocation(2 * kStringMaxLength)) { return; } -const maxString = buf.toString('binary'); +const maxString = buf.toString('latin1'); assert.equal(maxString.length, kStringMaxLength); diff --git a/test/addons/stringbytes-external-exceed-max/test-stringbytes-external-exceed-max-by-1-binary.js b/test/addons/stringbytes-external-exceed-max/test-stringbytes-external-exceed-max-by-1-binary.js index 04e4ad9f93c4e2..0fa4bca7232b78 100644 --- a/test/addons/stringbytes-external-exceed-max/test-stringbytes-external-exceed-max-by-1-binary.js +++ b/test/addons/stringbytes-external-exceed-max/test-stringbytes-external-exceed-max-by-1-binary.js @@ -30,13 +30,13 @@ if (!binding.ensureAllocation(2 * kStringMaxLength)) { } assert.throws(function() { - buf.toString('binary'); + buf.toString('latin1'); }, /"toString\(\)" failed/); -var maxString = buf.toString('binary', 1); +var maxString = buf.toString('latin1', 1); assert.equal(maxString.length, kStringMaxLength); // Free the memory early instead of at the end of the next assignment maxString = undefined; -maxString = buf.toString('binary', 0, kStringMaxLength); +maxString = buf.toString('latin1', 0, kStringMaxLength); assert.equal(maxString.length, kStringMaxLength); diff --git a/test/disabled/tls_server.js b/test/disabled/tls_server.js index 14e48f6b31a50f..1e72809a8a04ad 100644 --- a/test/disabled/tls_server.js +++ b/test/disabled/tls_server.js @@ -19,7 +19,7 @@ try { var i = 0; var server = net.createServer(function(connection) { connection.setSecure(credentials); - connection.setEncoding('binary'); + connection.setEncoding('latin1'); connection.on('secure', function() { //console.log('Secure'); diff --git a/test/parallel/test-buffer-alloc.js b/test/parallel/test-buffer-alloc.js index 0f06393d6a5aaa..83e9e2f5109067 100644 --- a/test/parallel/test-buffer-alloc.js +++ b/test/parallel/test-buffer-alloc.js @@ -235,7 +235,7 @@ assert.strictEqual('Unknown encoding: invalid', caught_error.message); // try to create 0-length buffers Buffer.from(''); Buffer.from('', 'ascii'); -Buffer.from('', 'binary'); +Buffer.from('', 'latin1'); Buffer.alloc(0); Buffer.allocUnsafe(0); @@ -689,7 +689,7 @@ assert.equal(dot.toString('base64'), '//4uAA=='); for (let i = 0; i < segments.length; ++i) { pos += b.write(segments[i], pos, 'base64'); } - assert.equal(b.toString('binary', 0, pos), 'Madness?! This is node.js!'); + assert.equal(b.toString('latin1', 0, pos), 'Madness?! This is node.js!'); } // Regression test for https://github.com/nodejs/node/issues/3496. @@ -845,13 +845,13 @@ assert.equal(0, Buffer.from('hello').slice(0, 0).length); // Binary encoding should write only one byte per character. const b = Buffer.from([0xde, 0xad, 0xbe, 0xef]); let s = String.fromCharCode(0xffff); - b.write(s, 0, 'binary'); + b.write(s, 0, 'latin1'); assert.equal(0xff, b[0]); assert.equal(0xad, b[1]); assert.equal(0xbe, b[2]); assert.equal(0xef, b[3]); s = String.fromCharCode(0xaaee); - b.write(s, 0, 'binary'); + b.write(s, 0, 'latin1'); assert.equal(0xee, b[0]); assert.equal(0xad, b[1]); assert.equal(0xbe, b[2]); @@ -969,7 +969,7 @@ assert.equal(0, Buffer.from('hello').slice(0, 0).length); // test for buffer overrun const buf = Buffer.from([0, 0, 0, 0, 0]); // length: 5 var sub = buf.slice(0, 4); // length: 4 - written = sub.write('12345', 'binary'); + written = sub.write('12345', 'latin1'); assert.equal(written, 4); assert.equal(buf[4], 0); } @@ -992,7 +992,7 @@ assert.equal(Buffer.from('99').length, 2); assert.equal(Buffer.from('13.37').length, 5); // Ensure that the length argument is respected. -'ascii utf8 hex base64 binary'.split(' ').forEach(function(enc) { +'ascii utf8 hex base64 latin1'.split(' ').forEach(function(enc) { assert.equal(Buffer.allocUnsafe(1).write('aaaaaa', 0, 1, enc), 1); }); @@ -1011,6 +1011,7 @@ Buffer.from(Buffer.allocUnsafe(0), 0, 0); 'utf8', 'utf-8', 'ascii', + 'latin1', 'binary', 'base64', 'ucs2', diff --git a/test/parallel/test-buffer-bytelength.js b/test/parallel/test-buffer-bytelength.js index 01bf12e544908e..97e24193c1d675 100644 --- a/test/parallel/test-buffer-bytelength.js +++ b/test/parallel/test-buffer-bytelength.js @@ -6,9 +6,9 @@ var Buffer = require('buffer').Buffer; var SlowBuffer = require('buffer').SlowBuffer; // coerce values to string -assert.equal(Buffer.byteLength(32, 'binary'), 2); +assert.equal(Buffer.byteLength(32, 'latin1'), 2); assert.equal(Buffer.byteLength(NaN, 'utf8'), 3); -assert.equal(Buffer.byteLength({}, 'binary'), 15); +assert.equal(Buffer.byteLength({}, 'latin1'), 15); assert.equal(Buffer.byteLength(), 9); var buff = new Buffer(10); @@ -80,6 +80,7 @@ assert.equal(Buffer.byteLength('aaaa==', 'base64'), 3); assert.equal(Buffer.byteLength('Il était tué'), 14); assert.equal(Buffer.byteLength('Il était tué', 'utf8'), 14); assert.equal(Buffer.byteLength('Il était tué', 'ascii'), 12); +assert.equal(Buffer.byteLength('Il était tué', 'latin1'), 12); assert.equal(Buffer.byteLength('Il était tué', 'binary'), 12); ['ucs2', 'ucs-2', 'utf16le', 'utf-16le'].forEach(function(encoding) { assert.equal(24, Buffer.byteLength('Il était tué', encoding)); diff --git a/test/parallel/test-buffer-fill.js b/test/parallel/test-buffer-fill.js index 667cede052cba4..638fda819d4c0b 100644 --- a/test/parallel/test-buffer-fill.js +++ b/test/parallel/test-buffer-fill.js @@ -73,6 +73,27 @@ testBufs('a\u0234b\u0235c\u0236', 4, 1, 'binary'); testBufs('a\u0234b\u0235c\u0236', 12, 1, 'binary'); +// LATIN1 +testBufs('abc', 'latin1'); +testBufs('\u0222aa', 'latin1'); +testBufs('a\u0234b\u0235c\u0236', 'latin1'); +testBufs('abc', 4, 'latin1'); +testBufs('abc', 5, 'latin1'); +testBufs('abc', SIZE, 'latin1'); +testBufs('\u0222aa', 2, 'latin1'); +testBufs('\u0222aa', 8, 'latin1'); +testBufs('a\u0234b\u0235c\u0236', 4, 'latin1'); +testBufs('a\u0234b\u0235c\u0236', 12, 'latin1'); +testBufs('abc', 4, -1, 'latin1'); +testBufs('abc', 4, 1, 'latin1'); +testBufs('abc', 5, 1, 'latin1'); +testBufs('\u0222aa', 2, -1, 'latin1'); +testBufs('\u0222aa', 8, 1, 'latin1'); +testBufs('a\u0234b\u0235c\u0236', 4, -1, 'latin1'); +testBufs('a\u0234b\u0235c\u0236', 4, 1, 'latin1'); +testBufs('a\u0234b\u0235c\u0236', 12, 1, 'latin1'); + + // UCS2 testBufs('abc', 'ucs2'); testBufs('\u0222aa', 'ucs2'); diff --git a/test/parallel/test-buffer-includes.js b/test/parallel/test-buffer-includes.js index 7962d1cef850f4..90f24c01f31982 100644 --- a/test/parallel/test-buffer-includes.js +++ b/test/parallel/test-buffer-includes.js @@ -102,6 +102,14 @@ assert( Buffer.from(b.toString('ascii'), 'ascii') .includes(Buffer.from('d', 'ascii'), 0, 'ascii')); +// test latin1 encoding +assert( + Buffer.from(b.toString('latin1'), 'latin1') + .includes('d', 0, 'latin1')); +assert( + Buffer.from(b.toString('latin1'), 'latin1') + .includes(Buffer.from('d', 'latin1'), 0, 'latin1')); + // test binary encoding assert( Buffer.from(b.toString('binary'), 'binary') diff --git a/test/parallel/test-buffer-indexof.js b/test/parallel/test-buffer-indexof.js index 647f1e9a623248..3d5620aa8101a8 100644 --- a/test/parallel/test-buffer-indexof.js +++ b/test/parallel/test-buffer-indexof.js @@ -102,6 +102,23 @@ assert.equal( Buffer.from(b.toString('ascii'), 'ascii') .indexOf(Buffer.from('d', 'ascii'), 0, 'ascii'), 3); +// test latin1 encoding +assert.equal( + Buffer.from(b.toString('latin1'), 'latin1') + .indexOf('d', 0, 'latin1'), 3); +assert.equal( + Buffer.from(b.toString('latin1'), 'latin1') + .indexOf(Buffer.from('d', 'latin1'), 0, 'latin1'), 3); +assert.equal( + Buffer.from('aa\u00e8aa', 'latin1') + .indexOf('\u00e8', 'latin1'), 2); +assert.equal( + Buffer.from('\u00e8', 'latin1') + .indexOf('\u00e8', 'latin1'), 0); +assert.equal( + Buffer.from('\u00e8', 'latin1') + .indexOf(Buffer.from('\u00e8', 'latin1'), 'latin1'), 0); + // test binary encoding assert.equal( Buffer.from(b.toString('binary'), 'binary') @@ -357,20 +374,24 @@ assert.equal(b.lastIndexOf('b', [2]), 1); // Test needles longer than the haystack. assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 'ucs2'), -1); assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 'utf8'), -1); +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 'latin1'), -1); assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 'binary'), -1); assert.strictEqual(b.lastIndexOf(Buffer.from('aaaaaaaaaaaaaaa')), -1); assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 2, 'ucs2'), -1); assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 3, 'utf8'), -1); +assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 5, 'latin1'), -1); assert.strictEqual(b.lastIndexOf('aaaaaaaaaaaaaaa', 5, 'binary'), -1); assert.strictEqual(b.lastIndexOf(Buffer.from('aaaaaaaaaaaaaaa'), 7), -1); // 你好 expands to a total of 6 bytes using UTF-8 and 4 bytes using UTF-16 assert.strictEqual(buf_bc.lastIndexOf('你好', 'ucs2'), -1); assert.strictEqual(buf_bc.lastIndexOf('你好', 'utf8'), -1); +assert.strictEqual(buf_bc.lastIndexOf('你好', 'latin1'), -1); assert.strictEqual(buf_bc.lastIndexOf('你好', 'binary'), -1); assert.strictEqual(buf_bc.lastIndexOf(Buffer.from('你好')), -1); assert.strictEqual(buf_bc.lastIndexOf('你好', 2, 'ucs2'), -1); assert.strictEqual(buf_bc.lastIndexOf('你好', 3, 'utf8'), -1); +assert.strictEqual(buf_bc.lastIndexOf('你好', 5, 'latin1'), -1); assert.strictEqual(buf_bc.lastIndexOf('你好', 5, 'binary'), -1); assert.strictEqual(buf_bc.lastIndexOf(Buffer.from('你好'), 7), -1); @@ -395,6 +416,7 @@ assert.equal(-1, bufferString.lastIndexOf('a ', -28)); // but in a part of the buffer that does not make search to search // due do length constraints. const abInUCS2 = Buffer.from('ab', 'ucs2'); +assert.strictEqual(-1, Buffer.from('µaaaa¶bbbb', 'latin1').lastIndexOf('µ')); assert.strictEqual(-1, Buffer.from('µaaaa¶bbbb', 'binary').lastIndexOf('µ')); assert.strictEqual(-1, Buffer.from('bc').lastIndexOf('ab')); assert.strictEqual(-1, Buffer.from('abc').lastIndexOf('qa')); diff --git a/test/parallel/test-buffer.js b/test/parallel/test-buffer.js index f773da321d5a0e..f3ab74902bb29b 100644 --- a/test/parallel/test-buffer.js +++ b/test/parallel/test-buffer.js @@ -237,6 +237,7 @@ assert.strictEqual('Unknown encoding: invalid', caught_error.message); // try to create 0-length buffers new Buffer(''); new Buffer('', 'ascii'); +new Buffer('', 'latin1'); new Buffer('', 'binary'); Buffer(0); @@ -687,6 +688,7 @@ assert.equal(dot.toString('base64'), '//4uAA=='); for (let i = 0; i < segments.length; ++i) { pos += b.write(segments[i], pos, 'base64'); } + assert.equal(b.toString('latin1', 0, pos), 'Madness?! This is node.js!'); assert.equal(b.toString('binary', 0, pos), 'Madness?! This is node.js!'); } @@ -845,6 +847,23 @@ assert.equal(0, Buffer('hello').slice(0, 0).length); assert.equal(b.toString(encoding), 'あいうえお'); }); +{ + // latin1 encoding should write only one byte per character. + const b = Buffer([0xde, 0xad, 0xbe, 0xef]); + let s = String.fromCharCode(0xffff); + b.write(s, 0, 'latin1'); + assert.equal(0xff, b[0]); + assert.equal(0xad, b[1]); + assert.equal(0xbe, b[2]); + assert.equal(0xef, b[3]); + s = String.fromCharCode(0xaaee); + b.write(s, 0, 'latin1'); + assert.equal(0xee, b[0]); + assert.equal(0xad, b[1]); + assert.equal(0xbe, b[2]); + assert.equal(0xef, b[3]); +} + { // Binary encoding should write only one byte per character. const b = Buffer([0xde, 0xad, 0xbe, 0xef]); @@ -973,6 +992,9 @@ assert.equal(0, Buffer('hello').slice(0, 0).length); // test for buffer overrun const buf = new Buffer([0, 0, 0, 0, 0]); // length: 5 var sub = buf.slice(0, 4); // length: 4 + written = sub.write('12345', 'latin1'); + assert.equal(written, 4); + assert.equal(buf[4], 0); written = sub.write('12345', 'binary'); assert.equal(written, 4); assert.equal(buf[4], 0); @@ -994,7 +1016,7 @@ assert.equal(Buffer('99').length, 2); assert.equal(Buffer('13.37').length, 5); // Ensure that the length argument is respected. -'ascii utf8 hex base64 binary'.split(' ').forEach(function(enc) { +'ascii utf8 hex base64 latin1 binary'.split(' ').forEach(function(enc) { assert.equal(Buffer(1).write('aaaaaa', 0, 1, enc), 1); }); @@ -1013,6 +1035,7 @@ Buffer(Buffer(0), 0, 0); 'utf8', 'utf-8', 'ascii', + 'latin1', 'binary', 'base64', 'ucs2', diff --git a/test/parallel/test-crypto-binary-default.js b/test/parallel/test-crypto-binary-default.js index 74447ce69d2ee9..bca8e13dd51dfd 100644 --- a/test/parallel/test-crypto-binary-default.js +++ b/test/parallel/test-crypto-binary-default.js @@ -14,7 +14,7 @@ var crypto = require('crypto'); var tls = require('tls'); const DH_NOT_SUITABLE_GENERATOR = crypto.constants.DH_NOT_SUITABLE_GENERATOR; -crypto.DEFAULT_ENCODING = 'binary'; +crypto.DEFAULT_ENCODING = 'latin1'; var fs = require('fs'); var path = require('path'); @@ -346,11 +346,11 @@ var a3 = crypto.createHash('sha512').update('Test123').digest(); // binary var a4 = crypto.createHash('sha1').update('Test123').digest('buffer'); if (!common.hasFipsCrypto) { - var a0 = crypto.createHash('md5').update('Test123').digest('binary'); + var a0 = crypto.createHash('md5').update('Test123').digest('latin1'); assert.equal( a0, 'h\u00ea\u00cb\u0097\u00d8o\fF!\u00fa+\u000e\u0017\u00ca\u00bd\u008c', - 'Test MD5 as binary' + 'Test MD5 as latin1' ); } @@ -364,7 +364,7 @@ assert.equal(a3, '\u00c1(4\u00f1\u0003\u001fd\u0097!O\'\u00d4C/&Qz\u00d4' + '\u00d6\u0092\u00a3\u00df\u00a2i\u00a1\u009b\n\n*\u000f' + '\u00d7\u00d6\u00a2\u00a8\u0085\u00e3<\u0083\u009c\u0093' + '\u00c2\u0006\u00da0\u00a1\u00879(G\u00ed\'', - 'Test SHA512 as assumed binary'); + 'Test SHA512 as assumed latin1'); assert.deepStrictEqual( a4, @@ -530,7 +530,7 @@ var dh2 = crypto.createDiffieHellman(p1, 'base64'); var key1 = dh1.generateKeys(); var key2 = dh2.generateKeys('hex'); var secret1 = dh1.computeSecret(key2, 'hex', 'base64'); -var secret2 = dh2.computeSecret(key1, 'binary', 'buffer'); +var secret2 = dh2.computeSecret(key1, 'latin1', 'buffer'); assert.equal(secret1, secret2.toString('base64')); diff --git a/test/parallel/test-crypto-dh.js b/test/parallel/test-crypto-dh.js index e9fbd30a2effc8..9d383ed4177200 100644 --- a/test/parallel/test-crypto-dh.js +++ b/test/parallel/test-crypto-dh.js @@ -17,7 +17,7 @@ var dh2 = crypto.createDiffieHellman(p1, 'buffer'); var key1 = dh1.generateKeys(); var key2 = dh2.generateKeys('hex'); var secret1 = dh1.computeSecret(key2, 'hex', 'base64'); -var secret2 = dh2.computeSecret(key1, 'binary', 'buffer'); +var secret2 = dh2.computeSecret(key1, 'latin1', 'buffer'); assert.equal(secret1, secret2.toString('base64')); assert.equal(dh1.verifyError, 0); @@ -155,7 +155,7 @@ const ecdh2 = crypto.createECDH('prime256v1'); key1 = ecdh1.generateKeys(); key2 = ecdh2.generateKeys('hex'); secret1 = ecdh1.computeSecret(key2, 'hex', 'base64'); -secret2 = ecdh2.computeSecret(key1, 'binary', 'buffer'); +secret2 = ecdh2.computeSecret(key1, 'latin1', 'buffer'); assert.equal(secret1, secret2.toString('base64')); @@ -176,7 +176,7 @@ const ecdh3 = crypto.createECDH('secp256k1'); const key3 = ecdh3.generateKeys(); assert.throws(function() { - ecdh2.computeSecret(key3, 'binary', 'buffer'); + ecdh2.computeSecret(key3, 'latin1', 'buffer'); }); // ECDH should allow .setPrivateKey()/.setPublicKey() diff --git a/test/parallel/test-crypto-hash.js b/test/parallel/test-crypto-hash.js index 55817ea69c64bd..81ee1d60c53977 100644 --- a/test/parallel/test-crypto-hash.js +++ b/test/parallel/test-crypto-hash.js @@ -38,11 +38,11 @@ a8.end(); a8 = a8.read(); if (!common.hasFipsCrypto) { - var a0 = crypto.createHash('md5').update('Test123').digest('binary'); + var a0 = crypto.createHash('md5').update('Test123').digest('latin1'); assert.equal( a0, 'h\u00ea\u00cb\u0097\u00d8o\fF!\u00fa+\u000e\u0017\u00ca\u00bd\u008c', - 'Test MD5 as binary' + 'Test MD5 as latin1' ); } assert.equal(a1, '8308651804facb7b9af8ffc53a33a22d6a1c8ac2', 'Test SHA1'); @@ -56,7 +56,7 @@ assert.deepStrictEqual( '\u00d6\u0092\u00a3\u00df\u00a2i\u00a1\u009b\n\n*\u000f' + '\u00d7\u00d6\u00a2\u00a8\u0085\u00e3<\u0083\u009c\u0093' + '\u00c2\u0006\u00da0\u00a1\u00879(G\u00ed\'', - 'binary'), + 'latin1'), 'Test SHA512 as assumed buffer'); assert.deepStrictEqual( a4, @@ -102,7 +102,7 @@ assert.equal( assert.notEqual( hutf8, - crypto.createHash('sha512').update('УТФ-8 text', 'binary').digest('hex')); + crypto.createHash('sha512').update('УТФ-8 text', 'latin1').digest('hex')); var h3 = crypto.createHash('sha256'); h3.digest(); diff --git a/test/parallel/test-crypto-padding-aes256.js b/test/parallel/test-crypto-padding-aes256.js index be1edd5edb32d3..e835867d61d005 100644 --- a/test/parallel/test-crypto-padding-aes256.js +++ b/test/parallel/test-crypto-padding-aes256.js @@ -18,13 +18,13 @@ function aes256(decipherFinal) { function encrypt(val, pad) { var c = crypto.createCipheriv('aes256', key, iv); c.setAutoPadding(pad); - return c.update(val, 'utf8', 'binary') + c.final('binary'); + return c.update(val, 'utf8', 'latin1') + c.final('latin1'); } function decrypt(val, pad) { var c = crypto.createDecipheriv('aes256', key, iv); c.setAutoPadding(pad); - return c.update(val, 'binary', 'utf8') + c[decipherFinal]('utf8'); + return c.update(val, 'latin1', 'utf8') + c[decipherFinal]('utf8'); } // echo 0123456789abcdef0123456789abcdef \ diff --git a/test/parallel/test-crypto-padding.js b/test/parallel/test-crypto-padding.js index a6174728735601..7822df64d9e2de 100644 --- a/test/parallel/test-crypto-padding.js +++ b/test/parallel/test-crypto-padding.js @@ -65,7 +65,7 @@ function dec(encd, pad) { var decrypt = crypto.createDecipheriv(CIPHER_NAME, KEY_PLAIN, IV_PLAIN); decrypt.setAutoPadding(pad); var plain = decrypt.update(encd, 'hex'); - plain += decrypt.final('binary'); + plain += decrypt.final('latin1'); return plain; } diff --git a/test/parallel/test-crypto-pbkdf2.js b/test/parallel/test-crypto-pbkdf2.js index ea888180a05145..c1897c2d69ebdd 100644 --- a/test/parallel/test-crypto-pbkdf2.js +++ b/test/parallel/test-crypto-pbkdf2.js @@ -13,10 +13,10 @@ var crypto = require('crypto'); // function testPBKDF2(password, salt, iterations, keylen, expected) { var actual = crypto.pbkdf2Sync(password, salt, iterations, keylen, 'sha256'); - assert.equal(actual.toString('binary'), expected); + assert.equal(actual.toString('latin1'), expected); crypto.pbkdf2(password, salt, iterations, keylen, 'sha256', (err, actual) => { - assert.equal(actual.toString('binary'), expected); + assert.equal(actual.toString('latin1'), expected); }); } diff --git a/test/parallel/test-crypto-sign-verify.js b/test/parallel/test-crypto-sign-verify.js index e8d892a8d63f15..77ecda30534a76 100644 --- a/test/parallel/test-crypto-sign-verify.js +++ b/test/parallel/test-crypto-sign-verify.js @@ -33,23 +33,23 @@ var keyPem = fs.readFileSync(common.fixturesDir + '/test_key.pem', 'ascii'); { const s2 = crypto.createSign('RSA-SHA256') .update('Test123') - .sign(keyPem, 'binary'); + .sign(keyPem, 'latin1'); let s2stream = crypto.createSign('RSA-SHA256'); s2stream.end('Test123'); - s2stream = s2stream.sign(keyPem, 'binary'); + s2stream = s2stream.sign(keyPem, 'latin1'); assert.equal(s2, s2stream, 'Stream produces same output'); let verified = crypto.createVerify('RSA-SHA256') .update('Test') .update('123') - .verify(certPem, s2, 'binary'); - assert.strictEqual(verified, true, 'sign and verify (binary)'); + .verify(certPem, s2, 'latin1'); + assert.strictEqual(verified, true, 'sign and verify (latin1)'); const verStream = crypto.createVerify('RSA-SHA256'); verStream.write('Tes'); verStream.write('t12'); verStream.end('3'); - verified = verStream.verify(certPem, s2, 'binary'); + verified = verStream.verify(certPem, s2, 'latin1'); assert.strictEqual(verified, true, 'sign and verify (stream)'); } diff --git a/test/parallel/test-file-read-noexist.js b/test/parallel/test-file-read-noexist.js index 096af7cce36a70..3673b701b56878 100644 --- a/test/parallel/test-file-read-noexist.js +++ b/test/parallel/test-file-read-noexist.js @@ -6,7 +6,7 @@ var fs = require('fs'); var got_error = false; var filename = path.join(common.fixturesDir, 'does_not_exist.txt'); -fs.readFile(filename, 'binary', function(err, content) { +fs.readFile(filename, 'latin1', function(err, content) { if (err) { got_error = true; } else { diff --git a/test/parallel/test-fs-write-stream-encoding.js b/test/parallel/test-fs-write-stream-encoding.js index 7bbffec1a5d3b7..3ce47b73829b53 100644 --- a/test/parallel/test-fs-write-stream-encoding.js +++ b/test/parallel/test-fs-write-stream-encoding.js @@ -5,7 +5,7 @@ const fs = require('fs'); const path = require('path'); const stream = require('stream'); const firstEncoding = 'base64'; -const secondEncoding = 'binary'; +const secondEncoding = 'latin1'; const examplePath = path.join(common.fixturesDir, 'x.txt'); const dummyPath = path.join(common.tmpDir, 'x.txt'); diff --git a/test/parallel/test-http-status-message.js b/test/parallel/test-http-status-message.js index e579b8e995e120..a95ada58ba8dd2 100644 --- a/test/parallel/test-http-status-message.js +++ b/test/parallel/test-http-status-message.js @@ -22,7 +22,7 @@ function test() { bufs.push(chunk); }); client.on('end', function() { - var head = Buffer.concat(bufs).toString('binary').split('\r\n')[0]; + var head = Buffer.concat(bufs).toString('latin1').split('\r\n')[0]; assert.equal('HTTP/1.1 200 Custom Message', head); console.log('ok'); s.close(); diff --git a/test/parallel/test-net-binary.js b/test/parallel/test-net-binary.js index 484ae60c111b3a..48f8c041944966 100644 --- a/test/parallel/test-net-binary.js +++ b/test/parallel/test-net-binary.js @@ -14,9 +14,9 @@ for (var i = 255; i >= 0; i--) { // safe constructor var echoServer = net.Server(function(connection) { - connection.setEncoding('binary'); + connection.setEncoding('latin1'); connection.on('data', function(chunk) { - connection.write(chunk, 'binary'); + connection.write(chunk, 'latin1'); }); connection.on('end', function() { connection.end(); @@ -32,11 +32,11 @@ echoServer.on('listening', function() { port: common.PORT }); - c.setEncoding('binary'); + c.setEncoding('latin1'); c.on('data', function(chunk) { var n = j + chunk.length; while (j < n && j < 256) { - c.write(String.fromCharCode(j), 'binary'); + c.write(String.fromCharCode(j), 'latin1'); j++; } if (j === 256) { diff --git a/test/parallel/test-stream-writev.js b/test/parallel/test-stream-writev.js index 879684007c2485..92479fc88f2958 100644 --- a/test/parallel/test-stream-writev.js +++ b/test/parallel/test-stream-writev.js @@ -58,7 +58,7 @@ function test(decode, uncork, multi, next) { { encoding: 'ascii', chunk: 'hello, ' }, { encoding: 'utf8', chunk: 'world' }, { encoding: 'buffer', chunk: [33] }, - { encoding: 'binary', chunk: '\nand then...' }, + { encoding: 'latin1', chunk: '\nand then...' }, { encoding: 'hex', chunk: 'facebea7deadbeefdecafbad' } ]; @@ -82,7 +82,7 @@ function test(decode, uncork, multi, next) { w.cork(); w.write(Buffer.from('!'), 'buffer', cnt('!')); - w.write('\nand then...', 'binary', cnt('and then')); + w.write('\nand then...', 'latin1', cnt('and then')); if (multi) w.uncork(); diff --git a/test/parallel/test-stream2-writable.js b/test/parallel/test-stream2-writable.js index 2b687e0a24d1a2..6acfcde4efc313 100644 --- a/test/parallel/test-stream2-writable.js +++ b/test/parallel/test-stream2-writable.js @@ -140,6 +140,7 @@ test('write bufferize', function(t) { 'utf8', 'utf-8', 'ascii', + 'latin1', 'binary', 'base64', 'ucs2', @@ -177,6 +178,7 @@ test('write no bufferize', function(t) { 'utf8', 'utf-8', 'ascii', + 'latin1', 'binary', 'base64', 'ucs2', @@ -275,7 +277,7 @@ test('encoding should be ignored for buffers', function(t) { t.end(); }; var buf = Buffer.from(hex, 'hex'); - tw.write(buf, 'binary'); + tw.write(buf, 'latin1'); }); test('writables are not pipable', function(t) { diff --git a/test/parallel/test-stringbytes-external.js b/test/parallel/test-stringbytes-external.js index 68332232171ab4..ec7ae5aa75bafd 100644 --- a/test/parallel/test-stringbytes-external.js +++ b/test/parallel/test-stringbytes-external.js @@ -11,7 +11,13 @@ var write_str = 'a'; // first do basic checks var b = Buffer.from(write_str, 'ucs2'); -var c = b.toString('binary'); +// first check latin1 +var c = b.toString('latin1'); +assert.equal(b[0], 0x61); +assert.equal(b[1], 0); +assert.equal(ucs2_control, c); +// now check binary +c = b.toString('binary'); assert.equal(b[0], 0x61); assert.equal(b[1], 0); assert.equal(ucs2_control, c); @@ -28,13 +34,20 @@ for (let i = 0; i < b.length; i += 2) { assert.equal(b[i], 0x61); assert.equal(b[i + 1], 0); } + // create another string to create an external string -var b_bin = b.toString('binary'); var b_ucs = b.toString('ucs2'); + +// check control against external binary string +var l_bin = b.toString('latin1'); +assert.equal(ucs2_control, l_bin); + // check control against external binary string +var b_bin = b.toString('binary'); assert.equal(ucs2_control, b_bin); + // create buffer copy from external -var c_bin = Buffer.from(b_bin, 'binary'); +var c_bin = Buffer.from(l_bin, 'latin1'); var c_ucs = Buffer.from(b_ucs, 'ucs2'); // make sure they're the same length assert.equal(c_bin.length, c_ucs.length); @@ -44,8 +57,8 @@ for (let i = 0; i < c_bin.length; i++) { } // check resultant strings assert.equal(c_bin.toString('ucs2'), c_ucs.toString('ucs2')); -assert.equal(c_bin.toString('binary'), ucs2_control); -assert.equal(c_ucs.toString('binary'), ucs2_control); +assert.equal(c_bin.toString('latin1'), ucs2_control); +assert.equal(c_ucs.toString('latin1'), ucs2_control); // now let's test BASE64 and HEX ecoding/decoding From a7bd2a67a0a234b5a77983e3bc68ecea23aea93e Mon Sep 17 00:00:00 2001 From: Trevor Norris Date: Thu, 2 Jun 2016 11:22:43 -0600 Subject: [PATCH 2/2] src: remove final trace of raw encoding A message stuck around in the native API warning users to not use 'raw' encoding. Followed by an abort(). This is no longer necessary since all other signs of 'raw' encoding have been removed. --- doc/api/buffer.md | 3 +-- src/node.cc | 7 ------- 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/doc/api/buffer.md b/doc/api/buffer.md index 354fed25d586ec..66832d1eddf99e 100644 --- a/doc/api/buffer.md +++ b/doc/api/buffer.md @@ -169,8 +169,7 @@ The character encodings currently supported by Node.js include: (as defined by the IANA in [RFC1345](https://tools.ietf.org/html/rfc1345), page 63, to be the Latin-1 supplement block and C0/C1 control codes). -* `'binary'` - (deprecated) A way of encoding the buffer into a one-byte - (`latin1`) encoded string. +* `'binary'` - Alias for `latin1`. * `'hex'` - Encode each byte as two hexadecimal characters. diff --git a/src/node.cc b/src/node.cc index da514787a3f607..0cf16a12dff08f 100644 --- a/src/node.cc +++ b/src/node.cc @@ -1395,13 +1395,6 @@ ssize_t DecodeBytes(Isolate* isolate, enum encoding encoding) { HandleScope scope(isolate); - if (val->IsArray()) { - fprintf(stderr, "'raw' encoding (array of integers) has been removed. " - "Use 'latin1'.\n"); - UNREACHABLE(); - return -1; - } - return StringBytes::Size(isolate, val, encoding); }