Skip to content

Commit

Permalink
src: support UTF-8 in compiled-in JS source files
Browse files Browse the repository at this point in the history
Detect it when source files in lib/ are not ASCII.  Decode them as UTF-8
and store them as UTF-16 in the binary so they can be used as external
string resources without non-ASCII characters getting mangled.
  • Loading branch information
bnoordhuis committed Feb 2, 2017
1 parent 4e259b2 commit d9490e3
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 21 deletions.
53 changes: 38 additions & 15 deletions src/node_javascript.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,53 @@

namespace node {

using v8::Isolate;
using v8::Local;
using v8::MaybeLocal;
using v8::NewStringType;
using v8::Object;
using v8::String;

template <typename T, size_t N, T P>
struct ExternalStringResource;

template <size_t N, const char (&P)[N]>
struct ExternalStringResource<const char[N], N, P>
: public String::ExternalOneByteStringResource {
const char* data() const override { return P; }
size_t length() const override { return N; }
void Dispose() override { /* Default calls `delete this`. */ }
};

template <size_t N, const uint16_t (&P)[N]>
struct ExternalStringResource<const uint16_t[N], N, P>
: public String::ExternalStringResource {
const uint16_t* data() const override { return P; }
size_t length() const override { return N; }
void Dispose() override { /* Default calls `delete this`. */ }
};

// id##_data is defined in node_natives.h.
#define V(id) \
static struct : public String::ExternalOneByteStringResource { \
const char* data() const override { \
return reinterpret_cast<const char*>(id##_data); \
} \
size_t length() const override { return sizeof(id##_data); } \
void Dispose() override { /* Default calls `delete this`. */ } \
} id##_external_data;
#define V(id) \
static ExternalStringResource<decltype(id##_data), \
arraysize(id##_data), \
id##_data> id##_external_data;
NODE_NATIVES_MAP(V)
#undef V

inline MaybeLocal<String>
ToExternal(Isolate* isolate, String::ExternalOneByteStringResource* that) {
return String::NewExternalOneByte(isolate, that);
}

inline MaybeLocal<String>
ToExternal(Isolate* isolate, String::ExternalStringResource* that) {
return String::NewExternalTwoByte(isolate, that);
}

Local<String> MainSource(Environment* env) {
auto maybe_string =
String::NewExternalOneByte(
env->isolate(),
&internal_bootstrap_node_external_data);
return maybe_string.ToLocalChecked();
return ToExternal(env->isolate(),
&internal_bootstrap_node_external_data).ToLocalChecked();
}

void DefineJavaScript(Environment* env, Local<Object> target) {
Expand All @@ -40,8 +64,7 @@ void DefineJavaScript(Environment* env, Local<Object> target) {
env->isolate(), id##_name, NewStringType::kNormal, \
sizeof(id##_name)).ToLocalChecked(); \
auto value = \
String::NewExternalOneByte( \
env->isolate(), &id##_external_data).ToLocalChecked(); \
ToExternal(env->isolate(), &id##_external_data).ToLocalChecked(); \
CHECK(target->Set(context, key, value).FromJust()); \
} while (0);
NODE_NATIVES_MAP(V)
Expand Down
24 changes: 18 additions & 6 deletions tools/js2c.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,16 @@
import string


def ToCString(contents):
step = 20
slices = (contents[i:i+step] for i in xrange(0, len(contents), step))
slices = map(lambda s: ','.join(str(ord(c)) for c in s), slices)
def ToCArray(elements, step=10):
slices = (elements[i:i+step] for i in xrange(0, len(elements), step))
slices = map(lambda s: ','.join(str(x) for x in s), slices)
return ',\n'.join(slices)


def ToCString(contents):
return ToCArray(map(ord, contents), step=20)


def ReadFile(filename):
file = open(filename, "rt")
try:
Expand Down Expand Up @@ -186,7 +189,7 @@ def ReadMacros(lines):
SOURCES = """\
static const uint8_t {escaped_id}_name[] = {{
{name}}};
static const uint8_t {escaped_id}_data[] = {{
static const {ctype} {escaped_id}_data[] = {{
{data}}};
"""

Expand Down Expand Up @@ -214,7 +217,16 @@ def JS2C(source, target):
lines = ReadFile(str(s))
lines = ExpandConstants(lines, consts)
lines = ExpandMacros(lines, macros)
data = ToCString(lines)

# Treat non-ASCII as UTF-8 and convert it to UTF-16.
if any(ord(c) > 127 for c in lines):
ctype = 'uint16_t'
data = map(ord, lines.decode('utf-8').encode('utf-16be'))
data = [data[i] * 256 + data[i+1] for i in xrange(0, len(data), 2)]
data = ToCArray(data)
else:
ctype = 'char'
data = ToCString(lines)

# On Windows, "./foo.bar" in the .gyp file is passed as "foo.bar"
# so don't assume there is always a slash in the file path.
Expand Down

0 comments on commit d9490e3

Please sign in to comment.