Skip to content

Commit

Permalink
src: use Blob{Des|S}erializer for SEA blobs
Browse files Browse the repository at this point in the history
PR-URL: #47962
Reviewed-By: Darshan Sen <[email protected]>
  • Loading branch information
joyeecheung authored and targos committed May 30, 2023
1 parent 6c60d90 commit 33aa373
Show file tree
Hide file tree
Showing 8 changed files with 206 additions and 93 deletions.
52 changes: 27 additions & 25 deletions src/blob_serializer_deserializer-inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@

#include "debug_utils-inl.h"

// This is related to the blob that is used in snapshots and has nothing to do
// with `node_blob.h`.
// This is related to the blob that is used in snapshots and single executable
// applications and has nothing to do with `node_blob.h`.

namespace node {

Expand Down Expand Up @@ -130,22 +130,22 @@ std::vector<T> BlobDeserializer<Impl>::ReadVector() {

template <typename Impl>
std::string BlobDeserializer<Impl>::ReadString() {
size_t length = ReadArithmetic<size_t>();

if (is_debug) {
Debug("ReadString(), length=%d: ", length);
}
std::string_view view = ReadStringView(StringLogMode::kAddressAndContent);
return std::string(view);
}

CHECK_GT(length, 0); // There should be no empty strings.
MallocedBuffer<char> buf(length + 1);
memcpy(buf.data, sink.data() + read_total, length + 1);
std::string result(buf.data, length); // This creates a copy of buf.data.
template <typename Impl>
std::string_view BlobDeserializer<Impl>::ReadStringView(StringLogMode mode) {
size_t length = ReadArithmetic<size_t>();
Debug("ReadStringView(), length=%zu: ", length);

if (is_debug) {
Debug("\"%s\", read %zu bytes\n", result.c_str(), length + 1);
std::string_view result(sink.data() + read_total, length);
Debug("%p, read %zu bytes\n", result.data(), result.size());
if (mode == StringLogMode::kAddressAndContent) {
Debug("%s", result);
}

read_total += length + 1;
read_total += length;
return result;
}

Expand Down Expand Up @@ -262,26 +262,28 @@ size_t BlobSerializer<Impl>::WriteVector(const std::vector<T>& data) {
// [ 4/8 bytes ] length
// [ |length| bytes ] contents
template <typename Impl>
size_t BlobSerializer<Impl>::WriteString(const std::string& data) {
CHECK_GT(data.size(), 0); // No empty strings should be written.
size_t BlobSerializer<Impl>::WriteStringView(std::string_view data,
StringLogMode mode) {
Debug("WriteStringView(), length=%zu: %p\n", data.size(), data.data());
size_t written_total = WriteArithmetic<size_t>(data.size());
if (is_debug) {
std::string str = ToStr(data);
Debug("WriteString(), length=%zu: \"%s\"\n", data.size(), data.c_str());
}

// Write the null-terminated string.
size_t length = data.size() + 1;
sink.insert(sink.end(), data.c_str(), data.c_str() + length);
size_t length = data.size();
sink.insert(sink.end(), data.data(), data.data() + length);
written_total += length;

if (is_debug) {
Debug("WriteString() wrote %zu bytes\n", written_total);
Debug("WriteStringView() wrote %zu bytes\n", written_total);
if (mode == StringLogMode::kAddressAndContent) {
Debug("%s", data);
}

return written_total;
}

template <typename Impl>
size_t BlobSerializer<Impl>::WriteString(const std::string& data) {
return WriteStringView(data, StringLogMode::kAddressAndContent);
}

// Helper for writing an array of numeric types.
template <typename Impl>
template <typename T>
Expand Down
18 changes: 11 additions & 7 deletions src/blob_serializer_deserializer.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@

#if defined(NODE_WANT_INTERNALS) && NODE_WANT_INTERNALS

// This is related to the blob that is used in snapshots and has nothing to do
// with `node_blob.h`.
// This is related to the blob that is used in snapshots and single executable
// applications and has nothing to do with `node_blob.h`.

namespace node {

Expand All @@ -27,6 +27,11 @@ class BlobSerializerDeserializer {
bool is_debug = false;
};

enum class StringLogMode {
kAddressOnly, // Can be used when the string contains binary content.
kAddressAndContent,
};

// Child classes are expected to implement T Read<T>() where
// !std::is_arithmetic_v<T> && !std::is_same_v<T, std::string>
template <typename Impl>
Expand All @@ -52,7 +57,9 @@ class BlobDeserializer : public BlobSerializerDeserializer {
template <typename T>
std::vector<T> ReadVector();

// ReadString() creates a copy of the data. ReadStringView() doesn't.
std::string ReadString();
std::string_view ReadStringView(StringLogMode mode);

// Helper for reading an array of numeric types.
template <typename T>
Expand All @@ -77,11 +84,7 @@ template <typename Impl>
class BlobSerializer : public BlobSerializerDeserializer {
public:
explicit BlobSerializer(bool is_debug_v)
: BlobSerializerDeserializer(is_debug_v) {
// Currently the snapshot blob built with an empty script is around 4MB.
// So use that as the default sink size.
sink.reserve(4 * 1024 * 1024);
}
: BlobSerializerDeserializer(is_debug_v) {}
~BlobSerializer() {}

Impl* impl() { return static_cast<Impl*>(this); }
Expand All @@ -102,6 +105,7 @@ class BlobSerializer : public BlobSerializerDeserializer {
// The layout of a written string:
// [ 4/8 bytes ] length
// [ |length| bytes ] contents
size_t WriteStringView(std::string_view data, StringLogMode mode);
size_t WriteString(const std::string& data);

// Helper for writing an array of numeric types.
Expand Down
1 change: 1 addition & 0 deletions src/debug_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ void NODE_EXTERN_PRIVATE FWrite(FILE* file, const std::string& str);
V(INSPECTOR_PROFILER) \
V(CODE_CACHE) \
V(NGTCP2_DEBUG) \
V(SEA) \
V(WASI) \
V(MKSNAPSHOT)

Expand Down
10 changes: 6 additions & 4 deletions src/node_main_instance.cc
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,16 @@ ExitCode NodeMainInstance::Run() {

void NodeMainInstance::Run(ExitCode* exit_code, Environment* env) {
if (*exit_code == ExitCode::kNoFailure) {
bool is_sea = false;
bool runs_sea_code = false;
#ifndef DISABLE_SINGLE_EXECUTABLE_APPLICATION
if (sea::IsSingleExecutable()) {
is_sea = true;
LoadEnvironment(env, sea::FindSingleExecutableCode());
runs_sea_code = true;
sea::SeaResource sea = sea::FindSingleExecutableResource();
std::string_view code = sea.code;
LoadEnvironment(env, code);
}
#endif
if (!is_sea) {
if (!runs_sea_code) {
LoadEnvironment(env, StartExecutionCallback{});
}

Expand Down
147 changes: 93 additions & 54 deletions src/node_sea.cc
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "node_sea.h"

#include "blob_serializer_deserializer-inl.h"
#include "debug_utils-inl.h"
#include "env-inl.h"
#include "json_parser.h"
Expand Down Expand Up @@ -34,16 +35,6 @@ namespace node {
namespace sea {

namespace {
// A special number that will appear at the beginning of the single executable
// preparation blobs ready to be injected into the binary. We use this to check
// that the data given to us are intended for building single executable
// applications.
const uint32_t kMagic = 0x143da20;

enum class SeaFlags : uint32_t {
kDefault = 0,
kDisableExperimentalSeaWarning = 1 << 0,
};

SeaFlags operator|(SeaFlags x, SeaFlags y) {
return static_cast<SeaFlags>(static_cast<uint32_t>(x) |
Expand All @@ -59,47 +50,100 @@ SeaFlags operator|=(/* NOLINT (runtime/references) */ SeaFlags& x, SeaFlags y) {
return x = x | y;
}

struct SeaResource {
SeaFlags flags = SeaFlags::kDefault;
std::string_view code;
static constexpr size_t kHeaderSize = sizeof(kMagic) + sizeof(SeaFlags);
class SeaSerializer : public BlobSerializer<SeaSerializer> {
public:
SeaSerializer()
: BlobSerializer<SeaSerializer>(
per_process::enabled_debug_list.enabled(DebugCategory::SEA)) {}

template <typename T,
std::enable_if_t<!std::is_same<T, std::string>::value>* = nullptr,
std::enable_if_t<!std::is_arithmetic<T>::value>* = nullptr>
size_t Write(const T& data);
};

SeaResource FindSingleExecutableResource() {
template <>
size_t SeaSerializer::Write(const SeaResource& sea) {
sink.reserve(SeaResource::kHeaderSize + sea.code.size());

Debug("Write SEA magic %x\n", kMagic);
size_t written_total = WriteArithmetic<uint32_t>(kMagic);

uint32_t flags = static_cast<uint32_t>(sea.flags);
Debug("Write SEA flags %x\n", flags);
written_total += WriteArithmetic<uint32_t>(flags);
DCHECK_EQ(written_total, SeaResource::kHeaderSize);

Debug("Write SEA resource code %p, size=%zu\n",
sea.code.data(),
sea.code.size());
written_total += WriteStringView(sea.code, StringLogMode::kAddressAndContent);
return written_total;
}

class SeaDeserializer : public BlobDeserializer<SeaDeserializer> {
public:
explicit SeaDeserializer(std::string_view v)
: BlobDeserializer<SeaDeserializer>(
per_process::enabled_debug_list.enabled(DebugCategory::SEA), v) {}

template <typename T,
std::enable_if_t<!std::is_same<T, std::string>::value>* = nullptr,
std::enable_if_t<!std::is_arithmetic<T>::value>* = nullptr>
T Read();
};

template <>
SeaResource SeaDeserializer::Read() {
uint32_t magic = ReadArithmetic<uint32_t>();
Debug("Read SEA magic %x\n", magic);

CHECK_EQ(magic, kMagic);
SeaFlags flags(static_cast<SeaFlags>(ReadArithmetic<uint32_t>()));
Debug("Read SEA flags %x\n", static_cast<uint32_t>(flags));
CHECK_EQ(read_total, SeaResource::kHeaderSize);

std::string_view code = ReadStringView(StringLogMode::kAddressAndContent);
Debug("Read SEA resource code %p, size=%zu\n", code.data(), code.size());
return {flags, code};
}

std::string_view FindSingleExecutableBlob() {
CHECK(IsSingleExecutable());
static const SeaResource sea_resource = []() -> SeaResource {
static const std::string_view result = []() -> std::string_view {
size_t size;
#ifdef __APPLE__
postject_options options;
postject_options_init(&options);
options.macho_segment_name = "NODE_SEA";
const char* code = static_cast<const char*>(
const char* blob = static_cast<const char*>(
postject_find_resource("NODE_SEA_BLOB", &size, &options));
#else
const char* code = static_cast<const char*>(
const char* blob = static_cast<const char*>(
postject_find_resource("NODE_SEA_BLOB", &size, nullptr));
#endif
uint32_t first_word = reinterpret_cast<const uint32_t*>(code)[0];
CHECK_EQ(first_word, kMagic);
SeaFlags flags{
reinterpret_cast<const SeaFlags*>(code + sizeof(first_word))[0]};
// TODO(joyeecheung): do more checks here e.g. matching the versions.
return {
flags,
{
code + SeaResource::kHeaderSize,
size - SeaResource::kHeaderSize,
},
};
return {blob, size};
}();
return sea_resource;
per_process::Debug(DebugCategory::SEA,
"Found SEA blob %p, size=%zu\n",
result.data(),
result.size());
return result;
}

} // namespace
} // anonymous namespace

std::string_view FindSingleExecutableCode() {
SeaResource sea_resource = FindSingleExecutableResource();
return sea_resource.code;
SeaResource FindSingleExecutableResource() {
static const SeaResource sea_resource = []() -> SeaResource {
std::string_view blob = FindSingleExecutableBlob();
per_process::Debug(DebugCategory::SEA,
"Found SEA resource %p, size=%zu\n",
blob.data(),
blob.size());
SeaDeserializer deserializer(blob);
return deserializer.Read<SeaResource>();
}();
return sea_resource;
}

bool IsSingleExecutable() {
Expand Down Expand Up @@ -194,51 +238,46 @@ std::optional<SeaConfig> ParseSingleExecutableConfig(
return result;
}

bool GenerateSingleExecutableBlob(const SeaConfig& config) {
ExitCode GenerateSingleExecutableBlob(const SeaConfig& config) {
std::string main_script;
// TODO(joyeecheung): unify the file utils.
int r = ReadFileSync(&main_script, config.main_path.c_str());
if (r != 0) {
const char* err = uv_strerror(r);
FPrintF(stderr, "Cannot read main script %s:%s\n", config.main_path, err);
return false;
return ExitCode::kGenericUserError;
}

std::vector<char> sink;
// TODO(joyeecheung): reuse the SnapshotSerializerDeserializer for this.
sink.reserve(SeaResource::kHeaderSize + main_script.size());
const char* pos = reinterpret_cast<const char*>(&kMagic);
sink.insert(sink.end(), pos, pos + sizeof(kMagic));
pos = reinterpret_cast<const char*>(&(config.flags));
sink.insert(sink.end(), pos, pos + sizeof(SeaFlags));
sink.insert(
sink.end(), main_script.data(), main_script.data() + main_script.size());

uv_buf_t buf = uv_buf_init(sink.data(), sink.size());
SeaResource sea{config.flags, main_script};

SeaSerializer serializer;
serializer.Write(sea);

uv_buf_t buf = uv_buf_init(serializer.sink.data(), serializer.sink.size());
r = WriteFileSync(config.output_path.c_str(), buf);
if (r != 0) {
const char* err = uv_strerror(r);
FPrintF(stderr, "Cannot write output to %s:%s\n", config.output_path, err);
return false;
return ExitCode::kGenericUserError;
}

FPrintF(stderr,
"Wrote single executable preparation blob to %s\n",
config.output_path);
return true;
return ExitCode::kNoFailure;
}

} // anonymous namespace

ExitCode BuildSingleExecutableBlob(const std::string& config_path) {
std::optional<SeaConfig> config_opt =
ParseSingleExecutableConfig(config_path);
if (!config_opt.has_value() ||
!GenerateSingleExecutableBlob(config_opt.value())) {
return ExitCode::kGenericUserError;
if (config_opt.has_value()) {
ExitCode code = GenerateSingleExecutableBlob(config_opt.value());
return code;
}

return ExitCode::kNoFailure;
return ExitCode::kGenericUserError;
}

void Initialize(Local<Object> target,
Expand Down
Loading

0 comments on commit 33aa373

Please sign in to comment.