From 3d9d5dc18977d21a04299f4a37b366f9a1d32051 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Fri, 3 Nov 2023 00:57:19 -0400 Subject: [PATCH 1/3] Create `MemorySink` This is for writing to a `MemorySourceAccessor`. --- src/libutil/memory-source-accessor.cc | 56 +++++++++++++++++++++++++++ src/libutil/memory-source-accessor.hh | 25 ++++++++++++ 2 files changed, 81 insertions(+) diff --git a/src/libutil/memory-source-accessor.cc b/src/libutil/memory-source-accessor.cc index f34f6c09171..78a4dd29815 100644 --- a/src/libutil/memory-source-accessor.cc +++ b/src/libutil/memory-source-accessor.cc @@ -121,4 +121,60 @@ CanonPath MemorySourceAccessor::addFile(CanonPath path, std::string && contents) return path; } + +using File = MemorySourceAccessor::File; + +void MemorySink::createDirectory(const Path & path) +{ + auto * f = dst.open(CanonPath{path}, File { File::Directory { } }); + if (!f) + throw Error("file '%s' cannot be made because some parent file is not a directory", path); + + if (!std::holds_alternative(f->raw)) + throw Error("file '%s' is not a directory", path); +}; + +void MemorySink::createRegularFile(const Path & path) +{ + auto * f = dst.open(CanonPath{path}, File { File::Regular {} }); + if (!f) + throw Error("file '%s' cannot be made because some parent file is not a directory", path); + if (!(r = std::get_if(&f->raw))) + throw Error("file '%s' is not a regular file", path); +} + +void MemorySink::closeRegularFile() +{ + r = nullptr; +} + +void MemorySink::isExecutable() +{ + assert(r); + r->executable = true; +} + +void MemorySink::preallocateContents(uint64_t len) +{ + assert(r); + r->contents.reserve(len); +} + +void MemorySink::receiveContents(std::string_view data) +{ + assert(r); + r->contents += data; +} + +void MemorySink::createSymlink(const Path & path, const std::string & target) +{ + auto * f = dst.open(CanonPath{path}, File { File::Symlink { } }); + if (!f) + throw Error("file '%s' cannot be made because some parent file is not a directory", path); + if (auto * s = std::get_if(&f->raw)) + s->target = target; + else + throw Error("file '%s' is not a symbolic link", path); +} + } diff --git a/src/libutil/memory-source-accessor.hh b/src/libutil/memory-source-accessor.hh index 014fa8098c9..b908f3713c0 100644 --- a/src/libutil/memory-source-accessor.hh +++ b/src/libutil/memory-source-accessor.hh @@ -1,4 +1,5 @@ #include "source-accessor.hh" +#include "fs-sink.hh" #include "variant-wrapper.hh" namespace nix { @@ -71,4 +72,28 @@ struct MemorySourceAccessor : virtual SourceAccessor CanonPath addFile(CanonPath path, std::string && contents); }; +/** + * Write to a `MemorySourceAccessor` at the given path + */ +struct MemorySink : ParseSink +{ + MemorySourceAccessor & dst; + + MemorySink(MemorySourceAccessor & dst) : dst(dst) { } + + void createDirectory(const Path & path) override; + + void createRegularFile(const Path & path) override; + void receiveContents(std::string_view data) override; + void isExecutable() override; + void closeRegularFile() override; + + void createSymlink(const Path & path, const std::string & target) override; + + void preallocateContents(uint64_t size) override; + +private: + MemorySourceAccessor::File::Regular * r; +}; + } From 9afa697ab61ea6bbbb0d88e629b62606681cc744 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Wed, 8 Nov 2023 00:30:55 -0500 Subject: [PATCH 2/3] Refactor bash test build system a bit The basic idea here is to separate a few intertwined notions: 1. Not all "run bash tests" are "install tests" 2. Not all "run bash tests" use `tests/functional/init.sh`, or any pre-test initialization at all. This will used in the next commit when we have a test that check unit test golden master data. Also, move our custom `PS4` from the test to the test runner, as it is part of how we want to display the tests, not the test themselves. Co-authored-by: Robert Hensing --- doc/manual/src/contributing/testing.md | 10 ++++---- mk/common-test.sh | 24 ++++++++++++++----- mk/debug-test.sh | 5 +++- mk/lib.mk | 7 +++--- mk/run-test.sh | 5 +++- mk/tests.mk | 19 ++++++++------- .../common/vars-and-functions.sh.in | 2 +- 7 files changed, 47 insertions(+), 25 deletions(-) diff --git a/doc/manual/src/contributing/testing.md b/doc/manual/src/contributing/testing.md index 3d75ebe7b56..0b45b88a323 100644 --- a/doc/manual/src/contributing/testing.md +++ b/doc/manual/src/contributing/testing.md @@ -133,17 +133,17 @@ ran test tests/functional/${testName}.sh... [PASS] or without `make`: ```shell-session -$ ./mk/run-test.sh tests/functional/${testName}.sh +$ ./mk/run-test.sh tests/functional/${testName}.sh tests/functional/init.sh ran test tests/functional/${testName}.sh... [PASS] ``` To see the complete output, one can also run: ```shell-session -$ ./mk/debug-test.sh tests/functional/${testName}.sh -+ foo +$ ./mk/debug-test.sh tests/functional/${testName}.sh tests/functional/init.sh ++(${testName}.sh:1) foo output from foo -+ bar ++(${testName}.sh:2) bar output from bar ... ``` @@ -175,7 +175,7 @@ edit it like so: Then, running the test with `./mk/debug-test.sh` will drop you into GDB once the script reaches that point: ```shell-session -$ ./mk/debug-test.sh tests/functional/${testName}.sh +$ ./mk/debug-test.sh tests/functional/${testName}.sh tests/functional/init.sh ... + gdb blash blub GNU gdb (GDB) 12.1 diff --git a/mk/common-test.sh b/mk/common-test.sh index 7ab25febf52..00ccd158423 100644 --- a/mk/common-test.sh +++ b/mk/common-test.sh @@ -1,15 +1,27 @@ -test_dir=tests/functional +# Remove overall test dir (at most one of the two should match) and +# remove file extension. +test_name=$(echo -n "$test" | sed \ + -e "s|^unit-test-data/||" \ + -e "s|^tests/functional/||" \ + -e "s|\.sh$||" \ + ) -test=$(echo -n "$test" | sed -e "s|^$test_dir/||") - -TESTS_ENVIRONMENT=("TEST_NAME=${test%.*}" 'NIX_REMOTE=') +TESTS_ENVIRONMENT=( + "TEST_NAME=$test_name" + 'NIX_REMOTE=' + 'PS4=+(${BASH_SOURCE[0]-$0}:$LINENO) ' +) : ${BASH:=/usr/bin/env bash} +run () { + cd "$(dirname $1)" && env "${TESTS_ENVIRONMENT[@]}" $BASH -x -e -u -o pipefail $(basename $1) +} + init_test () { - cd "$test_dir" && env "${TESTS_ENVIRONMENT[@]}" $BASH -e init.sh 2>/dev/null > /dev/null + run "$init" 2>/dev/null > /dev/null } run_test_proper () { - cd "$test_dir/$(dirname $test)" && env "${TESTS_ENVIRONMENT[@]}" $BASH -e $(basename $test) + run "$test" } diff --git a/mk/debug-test.sh b/mk/debug-test.sh index b5b628ecd68..52482c01e47 100755 --- a/mk/debug-test.sh +++ b/mk/debug-test.sh @@ -3,9 +3,12 @@ set -eu -o pipefail test=$1 +init=${2-} dir="$(dirname "${BASH_SOURCE[0]}")" source "$dir/common-test.sh" -(init_test) +if [ -n "$init" ]; then + (init_test) +fi run_test_proper diff --git a/mk/lib.mk b/mk/lib.mk index e86a7f1a448..49abe986264 100644 --- a/mk/lib.mk +++ b/mk/lib.mk @@ -122,14 +122,15 @@ $(foreach script, $(bin-scripts), $(eval $(call install-program-in,$(script),$(b $(foreach script, $(bin-scripts), $(eval programs-list += $(script))) $(foreach script, $(noinst-scripts), $(eval programs-list += $(script))) $(foreach template, $(template-files), $(eval $(call instantiate-template,$(template)))) +install_test_init=tests/functional/init.sh $(foreach test, $(install-tests), \ - $(eval $(call run-install-test,$(test))) \ + $(eval $(call run-test,$(test),$(install_test_init))) \ $(eval installcheck: $(test).test)) $(foreach test-group, $(install-tests-groups), \ - $(eval $(call run-install-test-group,$(test-group))) \ + $(eval $(call run-test-group,$(test-group),$(install_test_init))) \ $(eval installcheck: $(test-group).test-group) \ $(foreach test, $($(test-group)-tests), \ - $(eval $(call run-install-test,$(test))) \ + $(eval $(call run-test,$(test),$(install_test_init))) \ $(eval $(test-group).test-group: $(test).test))) $(foreach file, $(man-pages), $(eval $(call install-data-in, $(file), $(mandir)/man$(patsubst .%,%,$(suffix $(file)))))) diff --git a/mk/run-test.sh b/mk/run-test.sh index 1a1d659304e..da9c5a473b4 100755 --- a/mk/run-test.sh +++ b/mk/run-test.sh @@ -8,6 +8,7 @@ yellow="" normal="" test=$1 +init=${2-} dir="$(dirname "${BASH_SOURCE[0]}")" source "$dir/common-test.sh" @@ -21,7 +22,9 @@ if [ -t 1 ]; then fi run_test () { - (init_test 2>/dev/null > /dev/null) + if [ -n "$init" ]; then + (init_test 2>/dev/null > /dev/null) + fi log="$(run_test_proper 2>&1)" && status=0 || status=$? } diff --git a/mk/tests.mk b/mk/tests.mk index ec8128bdf87..bac9b704ad1 100644 --- a/mk/tests.mk +++ b/mk/tests.mk @@ -2,19 +2,22 @@ test-deps = -define run-install-test +define run-bash - .PHONY: $1.test - $1.test: $1 $(test-deps) - @env BASH=$(bash) $(bash) mk/run-test.sh $1 < /dev/null + .PHONY: $1 + $1: $2 + @env BASH=$(bash) $(bash) $3 < /dev/null - .PHONY: $1.test-debug - $1.test-debug: $1 $(test-deps) - @env BASH=$(bash) $(bash) mk/debug-test.sh $1 < /dev/null +endef + +define run-test + + $(eval $(call run-bash,$1.test,$1 $(test-deps),mk/run-test.sh $1 $2)) + $(eval $(call run-bash,$1.test-debug,$1 $(test-deps),mk/debug-test.sh $1 $2)) endef -define run-install-test-group +define run-test-group .PHONY: $1.test-group diff --git a/tests/functional/common/vars-and-functions.sh.in b/tests/functional/common/vars-and-functions.sh.in index 967d6be5477..848988af996 100644 --- a/tests/functional/common/vars-and-functions.sh.in +++ b/tests/functional/common/vars-and-functions.sh.in @@ -4,7 +4,7 @@ if [[ -z "${COMMON_VARS_AND_FUNCTIONS_SH_SOURCED-}" ]]; then COMMON_VARS_AND_FUNCTIONS_SH_SOURCED=1 -export PS4='+(${BASH_SOURCE[0]-$0}:$LINENO) ' +set +x export TEST_ROOT=$(realpath ${TMPDIR:-/tmp}/nix-test)/${TEST_NAME:-default/tests\/functional//} export NIX_STORE_DIR From 20b95d622336cf982082d7daf3075339f6edce70 Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sat, 4 Nov 2023 15:35:38 -0400 Subject: [PATCH 3/3] Git object hashing in libutil This is the core functionality but just unit-tested and not yet made part of the store layer. This is because there is some tech debt around (a) repeated boilerplate hashing objects (b) better integration of the new `SourceAccessor` type that needs to be cleaned up first. Part of RFC 133 Co-Authored-By: Matthew Bauer Co-Authored-By: Carlo Nucera Co-authored-by: Robert Hensing Co-authored-by: Florian Klink --- src/libutil/experimental-features.cc | 8 + src/libutil/experimental-features.hh | 1 + src/libutil/git.cc | 261 +++++++++++++++++- src/libutil/git.hh | 141 +++++++++- src/libutil/serialise.cc | 4 + src/libutil/serialise.hh | 1 + src/libutil/tests/git.cc | 249 +++++++++++++++-- src/libutil/tests/local.mk | 4 + unit-test-data/libutil/git/check-data.sh | 31 +++ .../libutil/git/hello-world-blob.bin | Bin 0 -> 24 bytes unit-test-data/libutil/git/hello-world.bin | Bin 0 -> 16 bytes unit-test-data/libutil/git/tree.bin | Bin 0 -> 100 bytes unit-test-data/libutil/git/tree.txt | 3 + 13 files changed, 666 insertions(+), 37 deletions(-) create mode 100644 unit-test-data/libutil/git/check-data.sh create mode 100644 unit-test-data/libutil/git/hello-world-blob.bin create mode 100644 unit-test-data/libutil/git/hello-world.bin create mode 100644 unit-test-data/libutil/git/tree.bin create mode 100644 unit-test-data/libutil/git/tree.txt diff --git a/src/libutil/experimental-features.cc b/src/libutil/experimental-features.cc index 88fb557135f..ac4d189e178 100644 --- a/src/libutil/experimental-features.cc +++ b/src/libutil/experimental-features.cc @@ -96,6 +96,14 @@ constexpr std::array xpFeatureDetails [`nix`](@docroot@/command-ref/new-cli/nix.md) for details. )", }, + { + .tag = Xp::GitHashing, + .name = "git-hashing", + .description = R"( + Allow creating (content-addressed) store objects which are hashed via Git's hashing algorithm. + These store objects will not be understandable by older versions of Nix. + )", + }, { .tag = Xp::RecursiveNix, .name = "recursive-nix", diff --git a/src/libutil/experimental-features.hh b/src/libutil/experimental-features.hh index f580fd0304c..c355b8081e6 100644 --- a/src/libutil/experimental-features.hh +++ b/src/libutil/experimental-features.hh @@ -22,6 +22,7 @@ enum struct ExperimentalFeature Flakes, FetchTree, NixCommand, + GitHashing, RecursiveNix, NoUrlLiterals, FetchClosure, diff --git a/src/libutil/git.cc b/src/libutil/git.cc index f35c2fdb75c..a4bd6009646 100644 --- a/src/libutil/git.cc +++ b/src/libutil/git.cc @@ -1,9 +1,263 @@ +#include +#include +#include +#include +#include +#include // for strcasecmp + +#include "signals.hh" +#include "config.hh" +#include "hash.hh" +#include "posix-source-accessor.hh" + #include "git.hh" +#include "serialise.hh" -#include +namespace nix::git { + +using namespace nix; +using namespace std::string_literals; + +std::optional decodeMode(RawMode m) { + switch (m) { + case (RawMode) Mode::Directory: + case (RawMode) Mode::Executable: + case (RawMode) Mode::Regular: + case (RawMode) Mode::Symlink: + return (Mode) m; + default: + return std::nullopt; + } +} + + +static std::string getStringUntil(Source & source, char byte) +{ + std::string s; + char n[1]; + source(std::string_view { n, 1 }); + while (*n != byte) { + s += *n; + source(std::string_view { n, 1 }); + } + return s; +} + + +static std::string getString(Source & source, int n) +{ + std::string v; + v.resize(n); + source(v); + return v; +} + + +void parse( + ParseSink & sink, + const Path & sinkPath, + Source & source, + std::function hook, + const ExperimentalFeatureSettings & xpSettings) +{ + xpSettings.require(Xp::GitHashing); + + auto type = getString(source, 5); + + if (type == "blob ") { + sink.createRegularFile(sinkPath); + + unsigned long long size = std::stoi(getStringUntil(source, 0)); + + sink.preallocateContents(size); + + unsigned long long left = size; + std::string buf; + buf.reserve(65536); + + while (left) { + checkInterrupt(); + buf.resize(std::min((unsigned long long)buf.capacity(), left)); + source(buf); + sink.receiveContents(buf); + left -= buf.size(); + } + } else if (type == "tree ") { + unsigned long long size = std::stoi(getStringUntil(source, 0)); + unsigned long long left = size; + + sink.createDirectory(sinkPath); + + while (left) { + std::string perms = getStringUntil(source, ' '); + left -= perms.size(); + left -= 1; + + RawMode rawMode = std::stoi(perms, 0, 8); + auto modeOpt = decodeMode(rawMode); + if (!modeOpt) + throw Error("Unknown Git permission: %o", perms); + auto mode = std::move(*modeOpt); + + std::string name = getStringUntil(source, '\0'); + left -= name.size(); + left -= 1; + + std::string hashs = getString(source, 20); + left -= 20; + + Hash hash(htSHA1); + std::copy(hashs.begin(), hashs.end(), hash.hash); + + hook(name, TreeEntry { + .mode = mode, + .hash = hash, + }); + + if (mode == Mode::Executable) + sink.isExecutable(); + } + } else throw Error("input doesn't look like a Git object"); +} + + +std::optional convertMode(SourceAccessor::Type type) +{ + switch (type) { + case SourceAccessor::tSymlink: return Mode::Symlink; + case SourceAccessor::tRegular: return Mode::Regular; + case SourceAccessor::tDirectory: return Mode::Directory; + case SourceAccessor::tMisc: return std::nullopt; + default: abort(); + } +} + + +void restore(ParseSink & sink, Source & source, std::function hook) +{ + parse(sink, "", source, [&](Path name, TreeEntry entry) { + auto [accessor, from] = hook(entry.hash); + auto stat = accessor->lstat(from); + auto gotOpt = convertMode(stat.type); + if (!gotOpt) + throw Error("file '%s' (git hash %s) has an unsupported type", + from, + entry.hash.to_string(HashFormat::Base16, false)); + auto & got = *gotOpt; + if (got != entry.mode) + throw Error("git mode of file '%s' (git hash %s) is %o but expected %o", + from, + entry.hash.to_string(HashFormat::Base16, false), + (RawMode) got, + (RawMode) entry.mode); + copyRecursive( + *accessor, from, + sink, name); + }); +} + + +void dumpBlobPrefix( + uint64_t size, Sink & sink, + const ExperimentalFeatureSettings & xpSettings) +{ + xpSettings.require(Xp::GitHashing); + auto s = fmt("blob %d\0"s, std::to_string(size)); + sink(s); +} + + +void dumpTree(const Tree & entries, Sink & sink, + const ExperimentalFeatureSettings & xpSettings) +{ + xpSettings.require(Xp::GitHashing); + + std::string v1; + + for (auto & [name, entry] : entries) { + auto name2 = name; + if (entry.mode == Mode::Directory) { + assert(name2.back() == '/'); + name2.pop_back(); + } + v1 += fmt("%o %s\0"s, static_cast(entry.mode), name2); + std::copy(entry.hash.hash, entry.hash.hash + entry.hash.hashSize, std::back_inserter(v1)); + } + + { + auto s = fmt("tree %d\0"s, v1.size()); + sink(s); + } + + sink(v1); +} + + +Mode dump( + SourceAccessor & accessor, const CanonPath & path, + Sink & sink, + std::function hook, + PathFilter & filter, + const ExperimentalFeatureSettings & xpSettings) +{ + auto st = accessor.lstat(path); + + switch (st.type) { + case SourceAccessor::tRegular: + { + accessor.readFile(path, sink, [&](uint64_t size) { + dumpBlobPrefix(size, sink, xpSettings); + }); + return st.isExecutable + ? Mode::Executable + : Mode::Regular; + } + + case SourceAccessor::tDirectory: + { + Tree entries; + for (auto & [name, _] : accessor.readDirectory(path)) { + auto child = path + name; + if (!filter(child.abs())) continue; + + auto entry = hook(child); + + auto name2 = name; + if (entry.mode == Mode::Directory) + name2 += "/"; + + entries.insert_or_assign(std::move(name2), std::move(entry)); + } + dumpTree(entries, sink, xpSettings); + return Mode::Directory; + } + + case SourceAccessor::tSymlink: + case SourceAccessor::tMisc: + default: + throw Error("file '%1%' has an unsupported type", path); + } +} + + +TreeEntry dumpHash( + HashType ht, + SourceAccessor & accessor, const CanonPath & path, PathFilter & filter) +{ + std::function hook; + hook = [&](const CanonPath & path) -> TreeEntry { + auto hashSink = HashSink(ht); + auto mode = dump(accessor, path, hashSink, hook, filter); + auto hash = hashSink.finish().first; + return { + .mode = mode, + .hash = hash, + }; + }; + + return hook(path); +} -namespace nix { -namespace git { std::optional parseLsRemoteLine(std::string_view line) { @@ -22,4 +276,3 @@ std::optional parseLsRemoteLine(std::string_view line) } } -} diff --git a/src/libutil/git.hh b/src/libutil/git.hh index bf2b9a2869a..30346007280 100644 --- a/src/libutil/git.hh +++ b/src/libutil/git.hh @@ -5,9 +5,127 @@ #include #include -namespace nix { +#include "types.hh" +#include "serialise.hh" +#include "hash.hh" +#include "source-accessor.hh" +#include "fs-sink.hh" -namespace git { +namespace nix::git { + +using RawMode = uint32_t; + +enum struct Mode : RawMode { + Directory = 0040000, + Executable = 0100755, + Regular = 0100644, + Symlink = 0120000, +}; + +std::optional decodeMode(RawMode m); + +/** + * An anonymous Git tree object entry (no name part). + */ +struct TreeEntry +{ + Mode mode; + Hash hash; + + GENERATE_CMP(TreeEntry, me->mode, me->hash); +}; + +/** + * A Git tree object, fully decoded and stored in memory. + * + * Directory names must end in a `/` for sake of sorting. See + * https://github.com/mirage/irmin/issues/352 + */ +using Tree = std::map; + +/** + * Callback for processing a child hash with `parse` + * + * The function should + * + * 1. Obtain the file system objects denoted by `gitHash` + * + * 2. Ensure they match `mode` + * + * 3. Feed them into the same sink `parse` was called with + * + * Implementations may seek to memoize resources (bandwidth, storage, + * etc.) for the same Git hash. + */ +using SinkHook = void(const Path & name, TreeEntry entry); + +void parse( + ParseSink & sink, const Path & sinkPath, + Source & source, + std::function hook, + const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); + +/** + * Assists with writing a `SinkHook` step (2). + */ +std::optional convertMode(SourceAccessor::Type type); + +/** + * Simplified version of `SinkHook` for `restore`. + * + * Given a `Hash`, return a `SourceAccessor` and `CanonPath` pointing to + * the file system object with that path. + */ +using RestoreHook = std::pair(Hash); + +/** + * Wrapper around `parse` and `RestoreSink` + */ +void restore(ParseSink & sink, Source & source, std::function hook); + +/** + * Dumps a single file to a sink + * + * @param xpSettings for testing purposes + */ +void dumpBlobPrefix( + uint64_t size, Sink & sink, + const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); + +/** + * Dumps a representation of a git tree to a sink + */ +void dumpTree( + const Tree & entries, Sink & sink, + const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); + +/** + * Callback for processing a child with `dump` + * + * The function should return the Git hash and mode of the file at the + * given path in the accessor passed to `dump`. + * + * Note that if the child is a directory, its child in must also be so + * processed in order to compute this information. + */ +using DumpHook = TreeEntry(const CanonPath & path); + +Mode dump( + SourceAccessor & accessor, const CanonPath & path, + Sink & sink, + std::function hook, + PathFilter & filter = defaultPathFilter, + const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); + +/** + * Recursively dumps path, hashing as we go. + * + * A smaller wrapper around `dump`. + */ +TreeEntry dumpHash( + HashType ht, + SourceAccessor & accessor, const CanonPath & path, + PathFilter & filter = defaultPathFilter); /** * A line from the output of `git ls-remote --symref`. @@ -16,15 +134,17 @@ namespace git { * * - Symbolic references of the form * - * ref: {target} {reference} - * - * where {target} is itself a reference and {reference} is optional + * ``` + * ref: {target} {reference} + * ``` + * where {target} is itself a reference and {reference} is optional * * - Object references of the form * - * {target} {reference} - * - * where {target} is a commit id and {reference} is mandatory + * ``` + * {target} {reference} + * ``` + * where {target} is a commit id and {reference} is mandatory */ struct LsRemoteRefLine { enum struct Kind { @@ -36,8 +156,9 @@ struct LsRemoteRefLine { std::optional reference; }; +/** + * Parse an `LsRemoteRefLine` + */ std::optional parseLsRemoteLine(std::string_view line); } - -} diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index 725ddbb8d6b..d7950b11bf6 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -74,6 +74,10 @@ void Source::operator () (char * data, size_t len) } } +void Source::operator () (std::string_view data) +{ + (*this)((char *)data.data(), data.size()); +} void Source::drainInto(Sink & sink) { diff --git a/src/libutil/serialise.hh b/src/libutil/serialise.hh index 9e07226bf81..3f57ce88ba7 100644 --- a/src/libutil/serialise.hh +++ b/src/libutil/serialise.hh @@ -73,6 +73,7 @@ struct Source * an error if it is not going to be available. */ void operator () (char * data, size_t len); + void operator () (std::string_view data); /** * Store up to ‘len’ in the buffer pointed to by ‘data’, and diff --git a/src/libutil/tests/git.cc b/src/libutil/tests/git.cc index 5b5715fc26b..2842ea4d03f 100644 --- a/src/libutil/tests/git.cc +++ b/src/libutil/tests/git.cc @@ -1,33 +1,236 @@ -#include "git.hh" #include +#include "git.hh" +#include "memory-source-accessor.hh" + +#include "tests/characterization.hh" + namespace nix { - TEST(GitLsRemote, parseSymrefLineWithReference) { - auto line = "ref: refs/head/main HEAD"; - auto res = git::parseLsRemoteLine(line); - ASSERT_TRUE(res.has_value()); - ASSERT_EQ(res->kind, git::LsRemoteRefLine::Kind::Symbolic); - ASSERT_EQ(res->target, "refs/head/main"); - ASSERT_EQ(res->reference, "HEAD"); - } +using namespace git; + +class GitTest : public CharacterizationTest +{ + Path unitTestData = getUnitTestData() + "/libutil/git"; - TEST(GitLsRemote, parseSymrefLineWithNoReference) { - auto line = "ref: refs/head/main"; - auto res = git::parseLsRemoteLine(line); - ASSERT_TRUE(res.has_value()); - ASSERT_EQ(res->kind, git::LsRemoteRefLine::Kind::Symbolic); - ASSERT_EQ(res->target, "refs/head/main"); - ASSERT_EQ(res->reference, std::nullopt); +public: + + Path goldenMaster(std::string_view testStem) const override { + return unitTestData + "/" + testStem; } - TEST(GitLsRemote, parseObjectRefLine) { - auto line = "abc123 refs/head/main"; - auto res = git::parseLsRemoteLine(line); - ASSERT_TRUE(res.has_value()); - ASSERT_EQ(res->kind, git::LsRemoteRefLine::Kind::Object); - ASSERT_EQ(res->target, "abc123"); - ASSERT_EQ(res->reference, "refs/head/main"); + /** + * We set these in tests rather than the regular globals so we don't have + * to worry about race conditions if the tests run concurrently. + */ + ExperimentalFeatureSettings mockXpSettings; + +private: + + void SetUp() override + { + mockXpSettings.set("experimental-features", "git-hashing"); } +}; + +TEST(GitMode, gitMode_directory) { + Mode m = Mode::Directory; + RawMode r = 0040000; + ASSERT_EQ(static_cast(m), r); + ASSERT_EQ(decodeMode(r), std::optional { m }); +}; + +TEST(GitMode, gitMode_executable) { + Mode m = Mode::Executable; + RawMode r = 0100755; + ASSERT_EQ(static_cast(m), r); + ASSERT_EQ(decodeMode(r), std::optional { m }); +}; + +TEST(GitMode, gitMode_regular) { + Mode m = Mode::Regular; + RawMode r = 0100644; + ASSERT_EQ(static_cast(m), r); + ASSERT_EQ(decodeMode(r), std::optional { m }); +}; + +TEST(GitMode, gitMode_symlink) { + Mode m = Mode::Symlink; + RawMode r = 0120000; + ASSERT_EQ(static_cast(m), r); + ASSERT_EQ(decodeMode(r), std::optional { m }); +}; + +TEST_F(GitTest, blob_read) { + readTest("hello-world-blob.bin", [&](const auto & encoded) { + StringSource in { encoded }; + StringSink out; + RegularFileSink out2 { out }; + parse(out2, "", in, [](auto &, auto) {}, mockXpSettings); + + auto expected = readFile(goldenMaster("hello-world.bin")); + + ASSERT_EQ(out.s, expected); + }); +} + +TEST_F(GitTest, blob_write) { + writeTest("hello-world-blob.bin", [&]() { + auto decoded = readFile(goldenMaster("hello-world.bin")); + StringSink s; + dumpBlobPrefix(decoded.size(), s, mockXpSettings); + s(decoded); + return s.s; + }); +} + +/** + * This data is for "shallow" tree tests. However, we use "real" hashes + * so that we can check our test data in the corresponding functional + * test (`git-hashing/unit-test-data`). + */ +const static Tree tree = { + { + "Foo", + { + .mode = Mode::Regular, + // hello world with special chars from above + .hash = Hash::parseAny("63ddb340119baf8492d2da53af47e8c7cfcd5eb2", htSHA1), + }, + }, + { + "bAr", + { + .mode = Mode::Executable, + // ditto + .hash = Hash::parseAny("63ddb340119baf8492d2da53af47e8c7cfcd5eb2", htSHA1), + }, + }, + { + "baZ/", + { + .mode = Mode::Directory, + // Empty directory hash + .hash = Hash::parseAny("4b825dc642cb6eb9a060e54bf8d69288fbee4904", htSHA1), + }, + }, +}; + +TEST_F(GitTest, tree_read) { + readTest("tree.bin", [&](const auto & encoded) { + StringSource in { encoded }; + NullParseSink out; + Tree got; + parse(out, "", in, [&](auto & name, auto entry) { + auto name2 = name; + if (entry.mode == Mode::Directory) + name2 += '/'; + got.insert_or_assign(name2, std::move(entry)); + }, mockXpSettings); + + ASSERT_EQ(got, tree); + }); +} + +TEST_F(GitTest, tree_write) { + writeTest("tree.bin", [&]() { + StringSink s; + dumpTree(tree, s, mockXpSettings); + return s.s; + }); } +TEST_F(GitTest, both_roundrip) { + using File = MemorySourceAccessor::File; + + MemorySourceAccessor files; + files.root = File::Directory { + .contents { + { + "foo", + File::Regular { + .contents = "hello\n\0\n\tworld!", + }, + }, + { + "bar", + File::Directory { + .contents = { + { + "baz", + File::Regular { + .executable = true, + .contents = "good day,\n\0\n\tworld!", + }, + }, + }, + }, + }, + }, + }; + + std::map cas; + + std::function dumpHook; + dumpHook = [&](const CanonPath & path) { + StringSink s; + HashSink hashSink { htSHA1 }; + TeeSink s2 { s, hashSink }; + auto mode = dump( + files, path, s2, dumpHook, + defaultPathFilter, mockXpSettings); + auto hash = hashSink.finish().first; + cas.insert_or_assign(hash, std::move(s.s)); + return TreeEntry { + .mode = mode, + .hash = hash, + }; + }; + + auto root = dumpHook(CanonPath::root); + + MemorySourceAccessor files2; + + MemorySink sinkFiles2 { files2 }; + + std::function mkSinkHook; + mkSinkHook = [&](const Path prefix, const Hash & hash) { + StringSource in { cas[hash] }; + parse(sinkFiles2, prefix, in, [&](const Path & name, const auto & entry) { + mkSinkHook(prefix + "/" + name, entry.hash); + }, mockXpSettings); + }; + + mkSinkHook("", root.hash); + + ASSERT_EQ(files, files2); +} + +TEST(GitLsRemote, parseSymrefLineWithReference) { + auto line = "ref: refs/head/main HEAD"; + auto res = parseLsRemoteLine(line); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res->kind, LsRemoteRefLine::Kind::Symbolic); + ASSERT_EQ(res->target, "refs/head/main"); + ASSERT_EQ(res->reference, "HEAD"); +} + +TEST(GitLsRemote, parseSymrefLineWithNoReference) { + auto line = "ref: refs/head/main"; + auto res = parseLsRemoteLine(line); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res->kind, LsRemoteRefLine::Kind::Symbolic); + ASSERT_EQ(res->target, "refs/head/main"); + ASSERT_EQ(res->reference, std::nullopt); +} + +TEST(GitLsRemote, parseObjectRefLine) { + auto line = "abc123 refs/head/main"; + auto res = parseLsRemoteLine(line); + ASSERT_TRUE(res.has_value()); + ASSERT_EQ(res->kind, LsRemoteRefLine::Kind::Object); + ASSERT_EQ(res->target, "abc123"); + ASSERT_EQ(res->reference, "refs/head/main"); +} + +} diff --git a/src/libutil/tests/local.mk b/src/libutil/tests/local.mk index 167915439fd..5a970c0f20b 100644 --- a/src/libutil/tests/local.mk +++ b/src/libutil/tests/local.mk @@ -27,3 +27,7 @@ libutil-tests_CXXFLAGS += -I src/libutil libutil-tests_LIBS = libutil libutil-tests_LDFLAGS := -lrapidcheck $(GTEST_LIBS) + +check: unit-test-data/libutil/git/check-data.sh.test + +$(eval $(call run-test,unit-test-data/libutil/git/check-data.sh)) diff --git a/unit-test-data/libutil/git/check-data.sh b/unit-test-data/libutil/git/check-data.sh new file mode 100644 index 00000000000..68b705c95c6 --- /dev/null +++ b/unit-test-data/libutil/git/check-data.sh @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +set -eu -o pipefail + +export TEST_ROOT=$(realpath ${TMPDIR:-/tmp}/nix-test)/git-hashing/unit-test-data +mkdir -p $TEST_ROOT + +repo="$TEST_ROOT/scratch" +git init "$repo" + +git -C "$repo" config user.email "you@example.com" +git -C "$repo" config user.name "Your Name" + +# `-w` to write for tree test +freshlyAddedHash=$(git -C "$repo" hash-object -w -t blob --stdin < "./hello-world.bin") +encodingHash=$(sha1sum -b < "./hello-world-blob.bin" | head -c 40) + +# If the hashes match, then `hello-world-blob.bin` must be the encoding +# of `hello-world.bin`. +[[ "$encodingHash" == "$freshlyAddedHash" ]] + +# Create empty directory object for tree test +echo -n | git -C "$repo" hash-object -w -t tree --stdin + +# Relies on both child hashes already existing in the git store +freshlyAddedHash=$(git -C "$repo" mktree < "./tree.txt") +encodingHash=$(sha1sum -b < "./tree.bin" | head -c 40) + +# If the hashes match, then `tree.bin` must be the encoding of the +# directory denoted by `tree.txt` interpreted as git directory listing. +[[ "$encodingHash" == "$freshlyAddedHash" ]] diff --git a/unit-test-data/libutil/git/hello-world-blob.bin b/unit-test-data/libutil/git/hello-world-blob.bin new file mode 100644 index 0000000000000000000000000000000000000000..255f5df55ccedb2dae5f541d516896ffffcdb526 GIT binary patch literal 24 fcmYew$xl)+G-L2c&B@7E;9}t7R0z*6%1HqLQkDjQ literal 0 HcmV?d00001 diff --git a/unit-test-data/libutil/git/hello-world.bin b/unit-test-data/libutil/git/hello-world.bin new file mode 100644 index 0000000000000000000000000000000000000000..63ddb340119baf8492d2da53af47e8c7cfcd5eb2 GIT binary patch literal 16 XcmeZB&B@7E;9}t7R0z*6%1HqLBqsz~ literal 0 HcmV?d00001 diff --git a/unit-test-data/libutil/git/tree.bin b/unit-test-data/libutil/git/tree.bin new file mode 100644 index 0000000000000000000000000000000000000000..5256ec140702fef5f88bd5750caf7cd57c03e5ac GIT binary patch literal 100 zcmXRZN=;R;G-5C`FfcPQQE(Ikg(Sx! ktkNb1K%kJ67{%b-6no6+bl%Pd2~WL$T$|MK`<*8X01f;sp#T5? literal 0 HcmV?d00001 diff --git a/unit-test-data/libutil/git/tree.txt b/unit-test-data/libutil/git/tree.txt new file mode 100644 index 00000000000..be3d02920c9 --- /dev/null +++ b/unit-test-data/libutil/git/tree.txt @@ -0,0 +1,3 @@ +100644 blob 63ddb340119baf8492d2da53af47e8c7cfcd5eb2 Foo +100755 blob 63ddb340119baf8492d2da53af47e8c7cfcd5eb2 bAr +040000 tree 4b825dc642cb6eb9a060e54bf8d69288fbee4904 baZ