From 3380cd5fdb7cf13209d48e4fa5901637ffde17e2 Mon Sep 17 00:00:00 2001 From: Ben Noordhuis Date: Thu, 2 Feb 2017 23:23:49 +0100 Subject: [PATCH] src: support UTF-8 in compiled-in JS source files Detect it when source files in lib/ are not ASCII. Decode them as UTF-8 and store them as UTF-16 in the binary so they can be used as external string resources without non-ASCII characters getting mangled. Fixes: https://github.com/nodejs/node/issues/10673 PR-URL: https://github.com/nodejs/node/pull/11129 Reviewed-By: Anna Henningsen Reviewed-By: James M Snell --- node.gyp | 8 +-- src/node_javascript.cc | 51 ------------------ tools/js2c.py | 117 +++++++++++++++++++++++++++-------------- 3 files changed, 81 insertions(+), 95 deletions(-) delete mode 100644 src/node_javascript.cc diff --git a/node.gyp b/node.gyp index 36001019410090..69b26d340d6666 100644 --- a/node.gyp +++ b/node.gyp @@ -145,7 +145,7 @@ 'src', 'tools/msvs/genfiles', 'deps/uv/src/ares', - '<(SHARED_INTERMEDIATE_DIR)', # for node_natives.h + '<(SHARED_INTERMEDIATE_DIR)', ], 'sources': [ @@ -166,7 +166,6 @@ 'src/node_debug_options.cc', 'src/node_file.cc', 'src/node_http_parser.cc', - 'src/node_javascript.cc', 'src/node_main.cc', 'src/node_os.cc', 'src/node_revert.cc', @@ -234,11 +233,11 @@ 'deps/http_parser/http_parser.h', 'deps/v8/include/v8.h', 'deps/v8/include/v8-debug.h', - '<(SHARED_INTERMEDIATE_DIR)/node_natives.h', # javascript files to make for an even more pleasant IDE experience '<@(library_files)', # node.gyp is added to the project by default. 'common.gypi', + '<(SHARED_INTERMEDIATE_DIR)/node_javascript.cc', ], 'defines': [ @@ -711,12 +710,13 @@ 'actions': [ { 'action_name': 'node_js2c', + 'process_outputs_as_sources': 1, 'inputs': [ '<@(library_files)', './config.gypi', ], 'outputs': [ - '<(SHARED_INTERMEDIATE_DIR)/node_natives.h', + '<(SHARED_INTERMEDIATE_DIR)/node_javascript.cc', ], 'conditions': [ [ 'node_use_dtrace=="false" and node_use_etw=="false"', { diff --git a/src/node_javascript.cc b/src/node_javascript.cc deleted file mode 100644 index 3f6d6c82a85269..00000000000000 --- a/src/node_javascript.cc +++ /dev/null @@ -1,51 +0,0 @@ -#include "node.h" -#include "node_natives.h" -#include "v8.h" -#include "env.h" -#include "env-inl.h" - -namespace node { - -using v8::Local; -using v8::NewStringType; -using v8::Object; -using v8::String; - -// id##_data is defined in node_natives.h. -#define V(id) \ - static struct : public String::ExternalOneByteStringResource { \ - const char* data() const override { \ - return reinterpret_cast(id##_data); \ - } \ - size_t length() const override { return sizeof(id##_data); } \ - void Dispose() override { /* Default calls `delete this`. */ } \ - } id##_external_data; -NODE_NATIVES_MAP(V) -#undef V - -Local MainSource(Environment* env) { - auto maybe_string = - String::NewExternalOneByte( - env->isolate(), - &internal_bootstrap_node_external_data); - return maybe_string.ToLocalChecked(); -} - -void DefineJavaScript(Environment* env, Local target) { - auto context = env->context(); -#define V(id) \ - do { \ - auto key = \ - String::NewFromOneByte( \ - env->isolate(), id##_name, NewStringType::kNormal, \ - sizeof(id##_name)).ToLocalChecked(); \ - auto value = \ - String::NewExternalOneByte( \ - env->isolate(), &id##_external_data).ToLocalChecked(); \ - CHECK(target->Set(context, key, value).FromJust()); \ - } while (0); - NODE_NATIVES_MAP(V) -#undef V -} - -} // namespace node diff --git a/tools/js2c.py b/tools/js2c.py index 4808c56813ce17..f7951617d34064 100755 --- a/tools/js2c.py +++ b/tools/js2c.py @@ -37,13 +37,16 @@ import string -def ToCString(contents): - step = 20 - slices = (contents[i:i+step] for i in xrange(0, len(contents), step)) - slices = map(lambda s: ','.join(str(ord(c)) for c in s), slices) +def ToCArray(elements, step=10): + slices = (elements[i:i+step] for i in xrange(0, len(elements), step)) + slices = map(lambda s: ','.join(str(x) for x in s), slices) return ',\n'.join(slices) +def ToCString(contents): + return ToCArray(map(ord, contents), step=20) + + def ReadFile(filename): file = open(filename, "rt") try: @@ -161,34 +164,72 @@ def ReadMacros(lines): return (constants, macros) -HEADER_TEMPLATE = """\ -#ifndef NODE_NATIVES_H_ -#define NODE_NATIVES_H_ +TEMPLATE = """ +#include "node.h" +#include "node_javascript.h" +#include "v8.h" +#include "env.h" +#include "env-inl.h" -#include +namespace node {{ -#define NODE_NATIVES_MAP(V) \\ -{node_natives_map} +{definitions} + +v8::Local MainSource(Environment* env) {{ + return internal_bootstrap_node_value.ToStringChecked(env->isolate()); +}} + +void DefineJavaScript(Environment* env, v8::Local target) {{ + {initializers} +}} -namespace node {{ -{sources} }} // namespace node +""" -#endif // NODE_NATIVES_H_ +ONE_BYTE_STRING = """ +static const uint8_t raw_{var}[] = {{ {data} }}; +static struct : public v8::String::ExternalOneByteStringResource {{ + const char* data() const override {{ + return reinterpret_cast(raw_{var}); + }} + size_t length() const override {{ return arraysize(raw_{var}); }} + void Dispose() override {{ /* Default calls `delete this`. */ }} + v8::Local ToStringChecked(v8::Isolate* isolate) {{ + return v8::String::NewExternalOneByte(isolate, this).ToLocalChecked(); + }} +}} {var}; """ +TWO_BYTE_STRING = """ +static const uint16_t raw_{var}[] = {{ {data} }}; +static struct : public v8::String::ExternalStringResource {{ + const uint16_t* data() const override {{ return raw_{var}; }} + size_t length() const override {{ return arraysize(raw_{var}); }} + void Dispose() override {{ /* Default calls `delete this`. */ }} + v8::Local ToStringChecked(v8::Isolate* isolate) {{ + return v8::String::NewExternalTwoByte(isolate, this).ToLocalChecked(); + }} +}} {var}; +""" -NODE_NATIVES_MAP = """\ - V({escaped_id}) \\ +INITIALIZER = """\ +CHECK(target->Set(env->context(), + {key}.ToStringChecked(env->isolate()), + {value}.ToStringChecked(env->isolate())).FromJust()); """ -SOURCES = """\ -static const uint8_t {escaped_id}_name[] = {{ -{name}}}; -static const uint8_t {escaped_id}_data[] = {{ -{data}}}; -""" +def Render(var, data): + # Treat non-ASCII as UTF-8 and convert it to UTF-16. + if any(ord(c) > 127 for c in data): + template = TWO_BYTE_STRING + data = map(ord, data.decode('utf-8').encode('utf-16be')) + data = [data[i] * 256 + data[i+1] for i in xrange(0, len(data), 2)] + data = ToCArray(data) + else: + template = ONE_BYTE_STRING + data = ToCString(data) + return template.format(var=var, data=data) def JS2C(source, target): @@ -207,36 +248,32 @@ def JS2C(source, target): (consts, macros) = ReadMacros(macro_lines) # Build source code lines - node_natives_map = [] - sources = [] + definitions = [] + initializers = [] - for s in modules: - lines = ReadFile(str(s)) + for name in modules: + lines = ReadFile(str(name)) lines = ExpandConstants(lines, consts) lines = ExpandMacros(lines, macros) - data = ToCString(lines) # On Windows, "./foo.bar" in the .gyp file is passed as "foo.bar" # so don't assume there is always a slash in the file path. - if '/' in s or '\\' in s: - id = '/'.join(re.split('/|\\\\', s)[1:]) - else: - id = s - - if '.' in id: - id = id.split('.', 1)[0] + if '/' in name or '\\' in name: + name = '/'.join(re.split('/|\\\\', name)[1:]) - name = ToCString(id) - escaped_id = id.replace('-', '_').replace('/', '_') - node_natives_map.append(NODE_NATIVES_MAP.format(**locals())) - sources.append(SOURCES.format(**locals())) + name = name.split('.', 1)[0] + var = name.replace('-', '_').replace('/', '_') + key = '%s_key' % var + value = '%s_value' % var - node_natives_map = ''.join(node_natives_map) - sources = ''.join(sources) + definitions.append(Render(key, name)) + definitions.append(Render(value, lines)) + initializers.append(INITIALIZER.format(key=key, value=value)) # Emit result output = open(str(target[0]), "w") - output.write(HEADER_TEMPLATE.format(**locals())) + output.write(TEMPLATE.format(definitions=''.join(definitions), + initializers=''.join(initializers))) output.close() def main():