Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bootstrap: make snapshot reproducible #50983

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/aliased_buffer-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

namespace node {

typedef size_t AliasedBufferIndex;
typedef uint64_t AliasedBufferIndex;

template <typename NativeT, typename V8T>
AliasedBufferBase<NativeT, V8T>::AliasedBufferBase(
Expand Down
2 changes: 1 addition & 1 deletion src/aliased_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@

namespace node {

typedef size_t AliasedBufferIndex;
typedef uint64_t AliasedBufferIndex;

/**
* Do not use this class directly when creating instances of it - use the
Expand Down
7 changes: 5 additions & 2 deletions src/base_object_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,13 @@ namespace node {
SERIALIZABLE_NON_BINDING_TYPES(V)

#define V(TypeId, NativeType) k_##TypeId,
enum class BindingDataType : uint8_t { BINDING_TYPES(V) kBindingDataTypeCount };
// To avoid padding, the enums are uint64_t.
enum class BindingDataType : uint64_t {
BINDING_TYPES(V) kBindingDataTypeCount
};
// Make sure that we put the bindings first so that we can also use the enums
// for the bindings as index to the binding data store.
enum class EmbedderObjectType : uint8_t {
enum class EmbedderObjectType : uint64_t {
BINDING_TYPES(V) SERIALIZABLE_NON_BINDING_TYPES(V)
// We do not need to know about all the unserializable non-binding types for
// now so we do not list them.
Expand Down
24 changes: 19 additions & 5 deletions src/blob_serializer_deserializer-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -238,7 +238,8 @@ size_t BlobSerializer<Impl>::WriteVector(const std::vector<T>& data) {
if (is_debug) {
std::string str = std::is_arithmetic_v<T> ? "" : ToStr(data);
std::string name = GetName<T>();
Debug("\nWriteVector<%s>() (%d-byte), count=%d: %s\n",
Debug("\nAt 0x%x: WriteVector<%s>() (%d-byte), count=%d: %s\n",
sink.size(),
name.c_str(),
sizeof(T),
data.size(),
Expand Down Expand Up @@ -270,7 +271,10 @@ size_t BlobSerializer<Impl>::WriteVector(const std::vector<T>& data) {
template <typename Impl>
size_t BlobSerializer<Impl>::WriteStringView(std::string_view data,
StringLogMode mode) {
Debug("WriteStringView(), length=%zu: %p\n", data.size(), data.data());
Debug("At 0x%x: WriteStringView(), length=%zu: %p\n",
sink.size(),
data.size(),
data.data());
size_t written_total = WriteArithmetic<size_t>(data.size());

size_t length = data.size();
Expand All @@ -294,17 +298,27 @@ size_t BlobSerializer<Impl>::WriteString(const std::string& data) {
return WriteStringView(data, StringLogMode::kAddressAndContent);
}

static size_t kPreviewCount = 16;

// Helper for writing an array of numeric types.
template <typename Impl>
template <typename T>
size_t BlobSerializer<Impl>::WriteArithmetic(const T* data, size_t count) {
static_assert(std::is_arithmetic_v<T>, "Arithmetic type");
DCHECK_GT(count, 0); // Should not write contents for vectors of size 0.
if (is_debug) {
std::string str =
"{ " + std::to_string(data[0]) + (count > 1 ? ", ... }" : " }");
size_t preview_count = count < kPreviewCount ? count : kPreviewCount;
std::string str = "{ ";
for (size_t i = 0; i < preview_count; ++i) {
str += (std::to_string(data[i]) + ",");
}
if (count > preview_count) {
str += "...";
}
str += "}";
std::string name = GetName<T>();
Debug("Write<%s>() (%zu-byte), count=%zu: %s",
Debug("At 0x%x: Write<%s>() (%zu-byte), count=%zu: %s",
sink.size(),
name.c_str(),
sizeof(T),
count,
Expand Down
6 changes: 6 additions & 0 deletions src/encoding_binding.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@ class BindingData : public SnapshotableObject {
AliasedBufferIndex encode_into_results_buffer;
};

// Make sure that there's no padding in the struct since we will memcpy
// them into the snapshot blob and they need to be reproducible.
static_assert(sizeof(InternalFieldInfo) ==
sizeof(InternalFieldInfoBase) + sizeof(AliasedBufferIndex),
"InternalFieldInfo should have no padding");

BindingData(Realm* realm,
v8::Local<v8::Object> obj,
InternalFieldInfo* info = nullptr);
Expand Down
24 changes: 15 additions & 9 deletions src/node.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1288,18 +1288,24 @@ ExitCode GenerateAndWriteSnapshotData(const SnapshotData** snapshot_data_ptr,
return exit_code;
}
} else {
std::optional<std::string> builder_script_content;
// Otherwise, load and run the specified builder script.
std::unique_ptr<SnapshotData> generated_data =
std::make_unique<SnapshotData>();
std::string builder_script_content;
int r = ReadFileSync(&builder_script_content, builder_script.c_str());
if (r != 0) {
FPrintF(stderr,
"Cannot read builder script %s for building snapshot. %s: %s",
builder_script,
uv_err_name(r),
uv_strerror(r));
return ExitCode::kGenericUserError;
if (builder_script != "node:generate_default_snapshot") {
builder_script_content = std::string();
int r = ReadFileSync(&(builder_script_content.value()),
builder_script.c_str());
if (r != 0) {
FPrintF(stderr,
"Cannot read builder script %s for building snapshot. %s: %s\n",
builder_script,
uv_err_name(r),
uv_strerror(r));
return ExitCode::kGenericUserError;
}
} else {
snapshot_config.builder_script_path = std::nullopt;
}

exit_code = node::SnapshotBuilder::Generate(generated_data.get(),
Expand Down
6 changes: 6 additions & 0 deletions src/node_file.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,12 @@ class BindingData : public SnapshotableObject {
AliasedBufferIndex statfs_field_bigint_array;
};

// Make sure that there's no padding in the struct since we will memcpy
// them into the snapshot blob and they need to be reproducible.
static_assert(sizeof(InternalFieldInfo) == sizeof(InternalFieldInfoBase) +
sizeof(AliasedBufferIndex) * 4,
"InternalFieldInfo should have no padding");

enum class FilePathIsFileReturnType {
kIsFile = 0,
kIsNotFile,
Expand Down
6 changes: 6 additions & 0 deletions src/node_process.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,12 @@ class BindingData : public SnapshotableObject {
AliasedBufferIndex hrtime_buffer;
};

// Make sure that there's no padding in the struct since we will memcpy
// them into the snapshot blob and they need to be reproducible.
static_assert(sizeof(InternalFieldInfo) ==
sizeof(InternalFieldInfoBase) + sizeof(AliasedBufferIndex),
"InternalFieldInfo should have no padding");

static void AddMethods(v8::Isolate* isolate,
v8::Local<v8::ObjectTemplate> target);
static void RegisterExternalReferences(ExternalReferenceRegistry* registry);
Expand Down
57 changes: 38 additions & 19 deletions src/node_snapshotable.cc
Original file line number Diff line number Diff line change
Expand Up @@ -282,9 +282,9 @@ size_t SnapshotSerializer::Write(const PropInfo& data) {
}

// Layout of AsyncHooks::SerializeInfo
// [ 4/8 bytes ] snapshot index of async_ids_stack
// [ 4/8 bytes ] snapshot index of fields
// [ 4/8 bytes ] snapshot index of async_id_fields
// [ 8 bytes ] snapshot index of async_ids_stack
// [ 8 bytes ] snapshot index of fields
// [ 8 bytes ] snapshot index of async_id_fields
// [ 4/8 bytes ] snapshot index of js_execution_async_resources
// [ 4/8 bytes ] length of native_execution_async_resources
// [ ... ] snapshot indices of each element in
Expand Down Expand Up @@ -387,9 +387,9 @@ size_t SnapshotSerializer::Write(const ImmediateInfo::SerializeInfo& data) {
}

// Layout of PerformanceState::SerializeInfo
// [ 4/8 bytes ] snapshot index of root
// [ 4/8 bytes ] snapshot index of milestones
// [ 4/8 bytes ] snapshot index of observers
// [ 8 bytes ] snapshot index of root
// [ 8 bytes ] snapshot index of milestones
// [ 8 bytes ] snapshot index of observers
template <>
performance::PerformanceState::SerializeInfo SnapshotDeserializer::Read() {
Debug("Read<PerformanceState::SerializeInfo>()\n");
Expand Down Expand Up @@ -599,16 +599,17 @@ std::vector<char> SnapshotData::ToBlob() const {
size_t written_total = 0;

// Metadata
w.Debug("Write magic %" PRIx32 "\n", kMagic);
w.Debug("0x%x: Write magic %" PRIx32 "\n", w.sink.size(), kMagic);
written_total += w.WriteArithmetic<uint32_t>(kMagic);
w.Debug("Write metadata\n");
w.Debug("0x%x: Write metadata\n", w.sink.size());
written_total += w.Write<SnapshotMetadata>(metadata);

w.Debug("0x%x: Write snapshot blob\n", w.sink.size());
written_total += w.Write<v8::StartupData>(v8_snapshot_blob_data);
w.Debug("Write isolate_data_indices\n");
w.Debug("0x%x: Write IsolateDataSerializeInfo\n", w.sink.size());
written_total += w.Write<IsolateDataSerializeInfo>(isolate_data_info);
w.Debug("0x%x: Write EnvSerializeInfo\n", w.sink.size());
written_total += w.Write<EnvSerializeInfo>(env_info);
w.Debug("Write code_cache\n");
w.Debug("0x%x: Write CodeCacheInfo\n", w.sink.size());
written_total += w.WriteVector<builtins::CodeCacheInfo>(code_cache);
w.Debug("SnapshotData::ToBlob() Wrote %d bytes\n", written_total);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Might be worth check that DCHECK_EQ(written_total, w.sink.size()).


Expand Down Expand Up @@ -1262,18 +1263,30 @@ void DeserializeNodeContextData(Local<Context> holder,
int index,
StartupData payload,
void* callback_data) {
// This is unreachable for now. We will reset all the pointers in
// Environment::AssignToContext() via the realm constructor.
UNREACHABLE();
// We will reset all the pointers in Environment::AssignToContext()
// via the realm constructor.
switch (index) {
case ContextEmbedderIndex::kEnvironment:
case ContextEmbedderIndex::kContextifyContext:
case ContextEmbedderIndex::kRealm:
case ContextEmbedderIndex::kContextTag: {
uint64_t index_64;
int size = sizeof(index_64);
CHECK_EQ(payload.raw_size, size);
memcpy(&index_64, payload.data, payload.raw_size);
CHECK_EQ(index_64, static_cast<uint64_t>(index));
break;
}
default:
UNREACHABLE();
}
}

StartupData SerializeNodeContextData(Local<Context> holder,
int index,
void* callback_data) {
// For now we just reset all of them in Environment::AssignToContext().
// We return empty data here to make sure that the embedder data serialized
// into the snapshot is reproducible and V8 doesn't have to try to serialize
// the pointer values that won't be useful during deserialization.
// For pointer values, we need to return some non-empty data so that V8
// does not serialize them verbatim, making the snapshot unreproducible.
switch (index) {
case ContextEmbedderIndex::kEnvironment:
case ContextEmbedderIndex::kContextifyContext:
Expand All @@ -1286,7 +1299,13 @@ StartupData SerializeNodeContextData(Local<Context> holder,
static_cast<int>(index),
*holder,
data);
return {nullptr, 0};
// We use uint64_t to avoid padding.
uint64_t index_64 = static_cast<uint64_t>(index);
// It must be allocated with new[] because V8 will call delete[] on it.
size_t size = sizeof(index_64);
char* startup_data = new char[size];
memcpy(startup_data, &index_64, size);
return {startup_data, static_cast<int>(size)};
}
default:
UNREACHABLE();
Expand Down
37 changes: 33 additions & 4 deletions src/node_snapshotable.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@

#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

#include <cassert> // For static_assert
#include <cstddef> // For offsetof
#include "aliased_buffer.h"
#include "base_object.h"
#include "util.h"
Expand Down Expand Up @@ -33,13 +35,13 @@ bool WithoutCodeCache(const SnapshotConfig& config);
// and pass it into the V8 callback as the payload of StartupData.
// The memory chunk looks like this:
//
// [ type ] - EmbedderObjectType (a uint8_t)
// [ length ] - a size_t
// [ type ] - EmbedderObjectType (a uint64_t)
// [ length ] - a uint64_t
// [ ... ] - custom bytes of size |length - header size|
struct InternalFieldInfoBase {
public:
EmbedderObjectType type;
size_t length;
uint64_t length;

template <typename T>
static T* New(EmbedderObjectType type) {
Expand Down Expand Up @@ -71,14 +73,35 @@ struct InternalFieldInfoBase {
InternalFieldInfoBase() = default;
};

// Make sure that there's no padding in the struct since we will memcpy
// them into the snapshot blob and they need to be reproducible.
static_assert(offsetof(InternalFieldInfoBase, type) == 0,
"InternalFieldInfoBase::type should start from offset 0");
static_assert(offsetof(InternalFieldInfoBase, length) ==
sizeof(EmbedderObjectType),
"InternalFieldInfoBase::type should have no padding");

struct EmbedderTypeInfo {
enum class MemoryMode : uint8_t { kBaseObject, kCppGC };
// To avoid padding, the enum is uint64_t.
enum class MemoryMode : uint64_t { kBaseObject = 0, kCppGC };
EmbedderTypeInfo(EmbedderObjectType t, MemoryMode m) : type(t), mode(m) {}
EmbedderTypeInfo() = default;

EmbedderObjectType type;
MemoryMode mode;
};

// Make sure that there's no padding in the struct since we will memcpy
// them into the snapshot blob and they need to be reproducible.
static_assert(offsetof(EmbedderTypeInfo, type) == 0,
"EmbedderTypeInfo::type should start from offset 0");
static_assert(offsetof(EmbedderTypeInfo, mode) == sizeof(EmbedderObjectType),
"EmbedderTypeInfo::type should have no padding");
static_assert(sizeof(EmbedderTypeInfo) ==
sizeof(EmbedderObjectType) +
sizeof(EmbedderTypeInfo::MemoryMode),
"EmbedderTypeInfo::mode should have no padding");

// An interface for snapshotable native objects to inherit from.
// Use the SERIALIZABLE_OBJECT_METHODS() macro in the class to define
// the following methods to implement:
Expand Down Expand Up @@ -150,6 +173,12 @@ class BindingData : public SnapshotableObject {
AliasedBufferIndex is_building_snapshot_buffer;
};

// Make sure that there's no padding in the struct since we will memcpy
// them into the snapshot blob and they need to be reproducible.
static_assert(sizeof(InternalFieldInfo) ==
sizeof(InternalFieldInfoBase) + sizeof(AliasedBufferIndex),
"InternalFieldInfo should have no padding");

BindingData(Realm* realm,
v8::Local<v8::Object> obj,
InternalFieldInfo* info = nullptr);
Expand Down
7 changes: 7 additions & 0 deletions src/node_v8.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@ class BindingData : public SnapshotableObject {
AliasedBufferIndex heap_space_statistics_buffer;
AliasedBufferIndex heap_code_statistics_buffer;
};

// Make sure that there's no padding in the struct since we will memcpy
// them into the snapshot blob and they need to be reproducible.
static_assert(sizeof(InternalFieldInfo) == sizeof(InternalFieldInfoBase) +
sizeof(AliasedBufferIndex) * 3,
"InternalFieldInfo should have no padding");

BindingData(Realm* realm,
v8::Local<v8::Object> obj,
InternalFieldInfo* info = nullptr);
Expand Down
Loading