1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-07-05 20:41:47 +02:00

Add Git object hashing to the store layer

Part of RFC 133

Extracted from our old IPFS branches.

Co-Authored-By: Matthew Bauer <mjbauer95@gmail.com>
Co-Authored-By: Carlo Nucera <carlo.nucera@protonmail.com>
Co-authored-by: Robert Hensing <roberth@users.noreply.github.com>
Co-authored-by: Florian Klink <flokli@flokli.de>
This commit is contained in:
John Ericson 2023-09-04 09:51:23 -04:00
parent 04836c73e5
commit 201551c937
27 changed files with 484 additions and 86 deletions

View file

@ -324,6 +324,7 @@ StorePath BinaryCacheStore::addToStoreFromDump(
nar = dump2.s;
break;
case FileIngestionMethod::Flat:
{
// The dump is Flat, so we need to convert it to NAR with a
// single file.
StringSink s;
@ -331,6 +332,10 @@ StorePath BinaryCacheStore::addToStoreFromDump(
nar = std::move(s.s);
break;
}
case FileIngestionMethod::Git:
unsupported("addToStoreFromDump");
break;
}
} else {
// Otherwise, we have to do th same hashing as NAR so our single
// hash will suffice for both purposes.
@ -450,7 +455,7 @@ StorePath BinaryCacheStore::addToStore(
non-recursive+sha256 so we can just use the default
implementation of this method in terms of addToStoreFromDump. */
auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter).first;
auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter);
auto source = sinkToSource([&](Sink & sink) {
accessor.dumpPath(path, sink, filter);

View file

@ -147,7 +147,7 @@ public:
void narFromPath(const StorePath & path, Sink & sink) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;
void addSignatures(const StorePath & storePath, const StringSet & sigs) override;

View file

@ -8,6 +8,7 @@
#include "finally.hh"
#include "util.hh"
#include "archive.hh"
#include "git.hh"
#include "compression.hh"
#include "daemon.hh"
#include "topo-sort.hh"
@ -2457,15 +2458,28 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
rewriteOutput(outputRewrites);
/* FIXME optimize and deduplicate with addToStore */
std::string oldHashPart { scratchPath->hashPart() };
auto got = ({
HashModuloSink caSink { outputHash.hashAlgo, oldHashPart };
auto got = [&]{
PosixSourceAccessor accessor;
dumpPath(
accessor, CanonPath { actualPath },
caSink,
outputHash.method.getFileIngestionMethod());
caSink.finish().first;
});
auto fim = outputHash.method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
{
HashModuloSink caSink { outputHash.hashAlgo, oldHashPart };
auto fim = outputHash.method.getFileIngestionMethod();
dumpPath(
accessor, CanonPath { actualPath },
caSink,
(FileSerialisationMethod) fim);
return caSink.finish().first;
}
case FileIngestionMethod::Git: {
return git::dumpHash(
outputHash.hashAlgo, accessor,
CanonPath { tmpDir + "/tmp" }).hash;
}
}
}();
ValidPathInfo newInfo0 {
worker.store,
@ -2491,7 +2505,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
PosixSourceAccessor accessor;
HashResult narHashAndSize = hashPath(
accessor, CanonPath { actualPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256);
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
newInfo0.narHash = narHashAndSize.first;
newInfo0.narSize = narHashAndSize.second;
}
@ -2515,7 +2529,7 @@ SingleDrvOutputs LocalDerivationGoal::registerOutputs()
PosixSourceAccessor accessor;
HashResult narHashAndSize = hashPath(
accessor, CanonPath { actualPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256);
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256);
ValidPathInfo newInfo0 { requiredFinalPath, narHashAndSize.first };
newInfo0.narSize = narHashAndSize.second;
auto refs = rewriteRefs();

View file

@ -529,11 +529,11 @@ bool Worker::pathContentsGood(const StorePath & path)
if (!pathExists(store.printStorePath(path)))
res = false;
else {
HashResult current = hashPath(
Hash current = hashPath(
*store.getFSAccessor(), CanonPath { store.printStorePath(path) },
FileIngestionMethod::Recursive, info->narHash.algo);
Hash nullHash(HashAlgorithm::SHA256);
res = info->narHash == nullHash || info->narHash == current.first;
res = info->narHash == nullHash || info->narHash == current;
}
pathContentsGoodCache.insert_or_assign(path, res);
if (!res)

View file

@ -11,6 +11,9 @@ std::string_view makeFileIngestionPrefix(FileIngestionMethod m)
return "";
case FileIngestionMethod::Recursive:
return "r:";
case FileIngestionMethod::Git:
experimentalFeatureSettings.require(Xp::GitHashing);
return "git:";
default:
throw Error("impossible, caught both cases");
}
@ -51,6 +54,10 @@ ContentAddressMethod ContentAddressMethod::parsePrefix(std::string_view & m)
if (splitPrefix(m, "r:")) {
return FileIngestionMethod::Recursive;
}
else if (splitPrefix(m, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
return FileIngestionMethod::Git;
}
else if (splitPrefix(m, "text:")) {
return TextIngestionMethod {};
}
@ -131,6 +138,10 @@ static std::pair<ContentAddressMethod, HashAlgorithm> parseContentAddressMethodP
auto method = FileIngestionMethod::Flat;
if (splitPrefix(rest, "r:"))
method = FileIngestionMethod::Recursive;
else if (splitPrefix(rest, "git:")) {
experimentalFeatureSettings.require(Xp::GitHashing);
method = FileIngestionMethod::Git;
}
HashAlgorithm hashAlgo = parseHashAlgorithm_();
return {
std::move(method),

View file

@ -13,6 +13,7 @@
#include "archive.hh"
#include "derivations.hh"
#include "args.hh"
#include "git.hh"
namespace nix::daemon {
@ -443,13 +444,17 @@ static void performOp(TunnelLogger * logger, ref<Store> store,
TeeSource savedNARSource(from, saved);
NullFileSystemObjectSink sink; /* just parse the NAR */
parseDump(sink, savedNARSource);
} else {
} else if (method == FileIngestionMethod::Flat) {
/* Incrementally parse the NAR file, stripping the
metadata, and streaming the sole file we expect into
`saved`. */
RegularFileSink savedRegular { saved };
parseDump(savedRegular, from);
if (!savedRegular.regular) throw Error("regular file expected");
} else {
/* Should have validated above that no other file ingestion
method was used. */
assert(false);
}
});
logger->startWork();

View file

@ -43,7 +43,7 @@ public:
LocalFSStore(const Params & params);
void narFromPath(const StorePath & path, Sink & sink) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath) override;
ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;
/**
* Creates symlink from the `gcRoot` to the `storePath` and

View file

@ -1,5 +1,6 @@
#include "local-store.hh"
#include "globals.hh"
#include "git.hh"
#include "archive.hh"
#include "pathlocks.hh"
#include "worker-protocol.hh"
@ -1097,19 +1098,29 @@ void LocalStore::addToStore(const ValidPathInfo & info, Source & source,
if (info.ca) {
auto & specified = *info.ca;
auto actualHash = ({
HashModuloSink caSink {
specified.hash.algo,
std::string { info.path.hashPart() },
};
PosixSourceAccessor accessor;
dumpPath(
*getFSAccessor(false),
CanonPath { printStorePath(info.path) },
caSink,
specified.method.getFileIngestionMethod());
auto accessor = getFSAccessor(false);
CanonPath path { printStorePath(info.path) };
Hash h { HashAlgorithm::SHA256 }; // throwaway def to appease C++
auto fim = specified.method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
{
HashModuloSink caSink {
specified.hash.algo,
std::string { info.path.hashPart() },
};
dumpPath(*accessor, path, caSink, (FileSerialisationMethod) fim);
h = caSink.finish().first;
break;
}
case FileIngestionMethod::Git:
h = git::dumpHash(specified.hash.algo, *accessor, path).hash;
break;
}
ContentAddress {
.method = specified.method,
.hash = caSink.finish().first,
.hash = std::move(h),
};
});
if (specified.hash != actualHash.hash) {
@ -1199,7 +1210,30 @@ StorePath LocalStore::addToStoreFromDump(
delTempDir = std::make_unique<AutoDelete>(tempDir);
tempPath = tempDir + "/x";
restorePath(tempPath, bothSource, method.getFileIngestionMethod());
auto fim = method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
restorePath(tempPath, bothSource, (FileSerialisationMethod) fim);
break;
case FileIngestionMethod::Git: {
RestoreSink sink;
sink.dstPath = tempPath;
auto accessor = getFSAccessor();
git::restore(sink, bothSource, [&](Hash childHash) {
return std::pair<SourceAccessor *, CanonPath> {
&*accessor,
CanonPath {
printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo {
.method = FileIngestionMethod::Git,
.hash = childHash,
}))
},
};
});
break;
}
}
dumpBuffer.reset();
dump = {};
@ -1238,7 +1272,30 @@ StorePath LocalStore::addToStoreFromDump(
if (inMemory) {
StringSource dumpSource { dump };
/* Restore from the buffer in memory. */
restorePath(realPath, dumpSource, method.getFileIngestionMethod());
auto fim = method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
restorePath(realPath, dumpSource, (FileSerialisationMethod) fim);
break;
case FileIngestionMethod::Git: {
RestoreSink sink;
sink.dstPath = realPath;
auto accessor = getFSAccessor();
git::restore(sink, dumpSource, [&](Hash childHash) {
return std::pair<SourceAccessor *, CanonPath> {
&*accessor,
CanonPath {
printStorePath(this->makeFixedOutputPath("git", FixedOutputInfo {
.method = FileIngestionMethod::Git,
.hash = childHash,
}))
},
};
});
break;
}
}
} else {
/* Move the temporary path we restored above. */
moveFile(tempPath, realPath);
@ -1367,7 +1424,7 @@ bool LocalStore::verifyStore(bool checkContents, RepairFlag repair)
PosixSourceAccessor accessor;
std::string hash = hashPath(
accessor, CanonPath { linkPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first.to_string(HashFormat::Nix32, false);
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).to_string(HashFormat::Nix32, false);
if (hash != link.name) {
printError("link '%s' was modified! expected hash '%s', got '%s'",
linkPath, link.name, hash);

View file

@ -151,7 +151,7 @@ void LocalStore::optimisePath_(Activity * act, OptimiseStats & stats,
PosixSourceAccessor accessor;
hashPath(
accessor, CanonPath { path },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first;
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256).first;
});
debug("'%1%' has hash '%2%'", path, hash.to_string(HashFormat::Nix32, true));
@ -166,7 +166,7 @@ void LocalStore::optimisePath_(Activity * act, OptimiseStats & stats,
PosixSourceAccessor accessor;
hashPath(
accessor, CanonPath { linkPath },
FileIngestionMethod::Recursive, HashAlgorithm::SHA256).first;
FileSerialisationMethod::Recursive, HashAlgorithm::SHA256).first;
})))
{
// XXX: Consider overwriting linkPath with our valid version.

View file

@ -13,6 +13,7 @@
#include "derivations.hh"
#include "pool.hh"
#include "finally.hh"
#include "git.hh"
#include "logging.hh"
#include "callback.hh"
#include "filetransfer.hh"

View file

@ -184,7 +184,7 @@ protected:
friend struct ConnectionHandle;
virtual ref<SourceAccessor> getFSAccessor(bool requireValidPath) override;
virtual ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override;
virtual void narFromPath(const StorePath & path, Sink & sink) override;

View file

@ -12,7 +12,9 @@
#include "references.hh"
#include "archive.hh"
#include "callback.hh"
#include "git.hh"
#include "remote-store.hh"
#include "posix-source-accessor.hh"
// FIXME this should not be here, see TODO below on
// `addMultipleToStore`.
#include "worker-protocol.hh"
@ -119,6 +121,9 @@ static std::string makeType(
StorePath StoreDirConfig::makeFixedOutputPath(std::string_view name, const FixedOutputInfo & info) const
{
if (info.method == FileIngestionMethod::Git && info.hash.algo != HashAlgorithm::SHA1)
throw Error("Git file ingestion must use SHA-1 hash");
if (info.hash.algo == HashAlgorithm::SHA256 && info.method == FileIngestionMethod::Recursive) {
return makeStorePath(makeType(*this, "source", info.references), info.hash, name);
} else {
@ -166,7 +171,7 @@ std::pair<StorePath, Hash> StoreDirConfig::computeStorePath(
const StorePathSet & references,
PathFilter & filter) const
{
auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter).first;
auto h = hashPath(accessor, path, method.getFileIngestionMethod(), hashAlgo, filter);
return {
makeFixedOutputPathFromCA(
name,
@ -193,7 +198,37 @@ StorePath Store::addToStore(
RepairFlag repair)
{
auto source = sinkToSource([&](Sink & sink) {
dumpPath(accessor, path, sink, method.getFileIngestionMethod(), filter);
auto fim = method.getFileIngestionMethod();
switch (fim) {
case FileIngestionMethod::Flat:
case FileIngestionMethod::Recursive:
{
dumpPath(accessor, path, sink, (FileSerialisationMethod) fim, filter);
break;
}
case FileIngestionMethod::Git:
{
git::dump(
accessor, path,
sink,
// recursively add to store if path is a directory
[&](const CanonPath & path) -> git::TreeEntry {
auto storePath = addToStore("git", accessor, path, method, hashAlgo, references, filter, repair);
auto info = queryPathInfo(storePath);
assert(info->ca);
assert(info->ca->method == FileIngestionMethod::Git);
auto stat = getFSAccessor()->lstat(CanonPath(printStorePath(storePath)));
auto gitModeOpt = git::convertMode(stat.type);
assert(gitModeOpt);
return {
.mode = *gitModeOpt,
.hash = info->ca->hash,
};
},
filter);
break;
}
}
});
return addToStoreFromDump(*source, name, method, hashAlgo, references, repair);
}
@ -355,9 +390,7 @@ ValidPathInfo Store::addToStoreSlow(
NullFileSystemObjectSink blank;
auto & parseSink = method.getFileIngestionMethod() == FileIngestionMethod::Flat
? (FileSystemObjectSink &) fileSink
: method.getFileIngestionMethod() == FileIngestionMethod::Recursive
? (FileSystemObjectSink &) blank
: (abort(), (FileSystemObjectSink &)*(FileSystemObjectSink *)nullptr); // handled both cases
: (FileSystemObjectSink &) blank; // for recursive or git we do recursive
/* The information that flows from tapped (besides being replicated in
narSink), is now put in parseSink. */
@ -369,6 +402,8 @@ ValidPathInfo Store::addToStoreSlow(
auto hash = method == FileIngestionMethod::Recursive && hashAlgo == HashAlgorithm::SHA256
? narHash
: method == FileIngestionMethod::Git
? git::dumpHash(hashAlgo, accessor, srcPath).hash
: caHashSink.finish().first;
if (expectedCAHash && expectedCAHash != hash)

View file

@ -35,7 +35,7 @@ public:
static std::set<std::string> uriSchemes()
{ return {"unix"}; }
ref<SourceAccessor> getFSAccessor(bool requireValidPath) override
ref<SourceAccessor> getFSAccessor(bool requireValidPath = true) override
{ return LocalFSStore::getFSAccessor(requireValidPath); }
void narFromPath(const StorePath & path, Sink & sink) override