mirror of
https://github.com/NixOS/nix
synced 2025-06-26 07:31:15 +02:00
Add a Git-based content-addressed tarball cache
GitArchiveInputScheme now streams tarballs into a Git repository. This deduplicates data a lot, e.g. when you're fetching different revisions of the Nixpkgs repo. It also warns if the tree hash returned by GitHub doesn't match the tree hash of the imported tarball.
This commit is contained in:
parent
a8fea5a54f
commit
b36857ac8d
5 changed files with 272 additions and 37 deletions
|
@ -4,6 +4,7 @@
|
|||
#include "finally.hh"
|
||||
#include "processes.hh"
|
||||
#include "signals.hh"
|
||||
#include "users.hh"
|
||||
|
||||
#include <boost/core/span.hpp>
|
||||
|
||||
|
@ -21,6 +22,9 @@
|
|||
#include <git2/submodule.h>
|
||||
#include <git2/tree.h>
|
||||
|
||||
#include "tarfile.hh"
|
||||
#include <archive_entry.h>
|
||||
|
||||
#include <unordered_set>
|
||||
#include <queue>
|
||||
#include <regex>
|
||||
|
@ -307,6 +311,158 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
|||
return std::nullopt;
|
||||
}
|
||||
|
||||
TarballInfo importTarball(Source & source) override
|
||||
{
|
||||
TarArchive archive(source);
|
||||
|
||||
struct PendingDir
|
||||
{
|
||||
std::string name;
|
||||
TreeBuilder builder;
|
||||
};
|
||||
|
||||
std::vector<PendingDir> pendingDirs;
|
||||
|
||||
auto pushBuilder = [&](std::string name)
|
||||
{
|
||||
git_treebuilder * b;
|
||||
if (git_treebuilder_new(&b, *this, nullptr))
|
||||
throw Error("creating a tree builder: %s", git_error_last()->message);
|
||||
pendingDirs.push_back({ .name = std::move(name), .builder = TreeBuilder(b) });
|
||||
};
|
||||
|
||||
auto popBuilder = [&]() -> std::pair<git_oid, std::string>
|
||||
{
|
||||
assert(!pendingDirs.empty());
|
||||
auto pending = std::move(pendingDirs.back());
|
||||
git_oid oid;
|
||||
if (git_treebuilder_write(&oid, pending.builder.get()))
|
||||
throw Error("creating a tree object: %s", git_error_last()->message);
|
||||
pendingDirs.pop_back();
|
||||
return {oid, pending.name};
|
||||
};
|
||||
|
||||
auto addToTree = [&](const std::string & name, const git_oid & oid, git_filemode_t mode)
|
||||
{
|
||||
assert(!pendingDirs.empty());
|
||||
auto & pending = pendingDirs.back();
|
||||
if (git_treebuilder_insert(nullptr, pending.builder.get(), name.c_str(), &oid, mode))
|
||||
throw Error("adding a file to a tree builder: %s", git_error_last()->message);
|
||||
};
|
||||
|
||||
auto updateBuilders = [&](boost::span<const std::string> names)
|
||||
{
|
||||
// Find the common prefix of pendingDirs and names.
|
||||
size_t prefixLen = 0;
|
||||
for (; prefixLen < names.size() && prefixLen + 1 < pendingDirs.size(); ++prefixLen)
|
||||
if (names[prefixLen] != pendingDirs[prefixLen + 1].name)
|
||||
break;
|
||||
|
||||
// Finish the builders that are not part of the common prefix.
|
||||
for (auto n = pendingDirs.size(); n > prefixLen + 1; --n) {
|
||||
auto [oid, name] = popBuilder();
|
||||
addToTree(name, oid, GIT_FILEMODE_TREE);
|
||||
}
|
||||
|
||||
// Create builders for the new directories.
|
||||
for (auto n = prefixLen; n < names.size(); ++n)
|
||||
pushBuilder(names[n]);
|
||||
};
|
||||
|
||||
pushBuilder("");
|
||||
|
||||
size_t componentsToStrip = 1;
|
||||
|
||||
time_t lastModified = 0;
|
||||
|
||||
for (;;) {
|
||||
// FIXME: merge with extract_archive
|
||||
struct archive_entry * entry;
|
||||
int r = archive_read_next_header(archive.archive, &entry);
|
||||
if (r == ARCHIVE_EOF) break;
|
||||
auto path = archive_entry_pathname(entry);
|
||||
if (!path)
|
||||
throw Error("cannot get archive member name: %s", archive_error_string(archive.archive));
|
||||
if (r == ARCHIVE_WARN)
|
||||
warn(archive_error_string(archive.archive));
|
||||
else
|
||||
archive.check(r);
|
||||
|
||||
lastModified = std::max(lastModified, archive_entry_mtime(entry));
|
||||
|
||||
auto pathComponents = tokenizeString<std::vector<std::string>>(path, "/");
|
||||
|
||||
boost::span<const std::string> pathComponents2{pathComponents};
|
||||
|
||||
if (pathComponents2.size() <= componentsToStrip) continue;
|
||||
pathComponents2 = pathComponents2.subspan(componentsToStrip);
|
||||
|
||||
updateBuilders(
|
||||
archive_entry_filetype(entry) == AE_IFDIR
|
||||
? pathComponents2
|
||||
: pathComponents2.first(pathComponents2.size() - 1));
|
||||
|
||||
switch (archive_entry_filetype(entry)) {
|
||||
|
||||
case AE_IFDIR:
|
||||
// Nothing to do right now.
|
||||
break;
|
||||
|
||||
case AE_IFREG: {
|
||||
|
||||
git_writestream * stream = nullptr;
|
||||
if (git_blob_create_from_stream(&stream, *this, nullptr))
|
||||
throw Error("creating a blob stream object: %s", git_error_last()->message);
|
||||
|
||||
while (true) {
|
||||
std::vector<unsigned char> buf(128 * 1024);
|
||||
auto n = archive_read_data(archive.archive, buf.data(), buf.size());
|
||||
if (n < 0)
|
||||
throw Error("cannot read file '%s' from tarball", path);
|
||||
if (n == 0) break;
|
||||
if (stream->write(stream, (const char *) buf.data(), n))
|
||||
throw Error("writing a blob for tarball member '%s': %s", path, git_error_last()->message);
|
||||
}
|
||||
|
||||
git_oid oid;
|
||||
if (git_blob_create_from_stream_commit(&oid, stream))
|
||||
throw Error("creating a blob object for tarball member '%s': %s", path, git_error_last()->message);
|
||||
|
||||
addToTree(*pathComponents.rbegin(), oid,
|
||||
archive_entry_mode(entry) & S_IXUSR
|
||||
? GIT_FILEMODE_BLOB_EXECUTABLE
|
||||
: GIT_FILEMODE_BLOB);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
case AE_IFLNK: {
|
||||
auto target = archive_entry_symlink(entry);
|
||||
|
||||
git_oid oid;
|
||||
if (git_blob_create_from_buffer(&oid, *this, target, strlen(target)))
|
||||
throw Error("creating a blob object for tarball symlink member '%s': %s", path, git_error_last()->message);
|
||||
|
||||
addToTree(*pathComponents.rbegin(), oid, GIT_FILEMODE_LINK);
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
throw Error("file '%s' in tarball has unsupported file type", path);
|
||||
}
|
||||
}
|
||||
|
||||
updateBuilders({});
|
||||
|
||||
auto [oid, _name] = popBuilder();
|
||||
|
||||
return TarballInfo {
|
||||
.treeHash = toHash(oid),
|
||||
.lastModified = lastModified
|
||||
};
|
||||
}
|
||||
|
||||
std::vector<std::tuple<Submodule, Hash>> getSubmodules(const Hash & rev) override;
|
||||
|
||||
std::string resolveSubmoduleUrl(
|
||||
|
@ -449,6 +605,22 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this<GitRepoImpl>
|
|||
else
|
||||
throw Error("Commit signature verification on commit %s failed: %s", rev.gitRev(), output);
|
||||
}
|
||||
|
||||
Hash treeHashToNarHash(const Hash & treeHash) override
|
||||
{
|
||||
auto accessor = getAccessor(treeHash);
|
||||
|
||||
fetchers::Attrs cacheKey({{"_what", "treeHashToNarHash"}, {"treeHash", treeHash.gitRev()}});
|
||||
|
||||
if (auto res = fetchers::getCache()->lookup(cacheKey))
|
||||
return Hash::parseAny(fetchers::getStrAttr(*res, "narHash"), htSHA256);
|
||||
|
||||
auto narHash = accessor->hashPath(CanonPath::root);
|
||||
|
||||
fetchers::getCache()->upsert(cacheKey, fetchers::Attrs({{"narHash", narHash.to_string(HashFormat::SRI, true)}}));
|
||||
|
||||
return narHash;
|
||||
}
|
||||
};
|
||||
|
||||
ref<GitRepo> GitRepo::openRepo(const CanonPath & path, bool create, bool bare)
|
||||
|
@ -673,5 +845,11 @@ std::vector<std::tuple<GitRepoImpl::Submodule, Hash>> GitRepoImpl::getSubmodules
|
|||
return result;
|
||||
}
|
||||
|
||||
ref<GitRepo> getTarballCache()
|
||||
{
|
||||
static CanonPath repoDir(getCacheDir() + "/nix/tarball-cache");
|
||||
|
||||
return make_ref<GitRepoImpl>(repoDir, true, true);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue