mirror of
https://github.com/NixOS/nix
synced 2025-07-07 10:11:47 +02:00
GitArchiveInputScheme: Revert to downloading tarballs
Tarballs are now unpacked into a content-addressed cache, specifically a Git repository in ~/.cache/nix/tarball-cache so that we can use GitAccessor to provide random access.
This commit is contained in:
parent
b14830b23a
commit
5c29abc5bd
4 changed files with 243 additions and 69 deletions
|
@ -1,5 +1,7 @@
|
||||||
#include "input-accessor.hh"
|
#include "input-accessor.hh"
|
||||||
|
|
||||||
|
#include <span>
|
||||||
|
|
||||||
#include <git2/blob.h>
|
#include <git2/blob.h>
|
||||||
#include <git2/commit.h>
|
#include <git2/commit.h>
|
||||||
#include <git2/errors.h>
|
#include <git2/errors.h>
|
||||||
|
@ -8,6 +10,9 @@
|
||||||
#include <git2/repository.h>
|
#include <git2/repository.h>
|
||||||
#include <git2/tree.h>
|
#include <git2/tree.h>
|
||||||
|
|
||||||
|
#include "tarfile.hh"
|
||||||
|
#include <archive_entry.h>
|
||||||
|
|
||||||
namespace nix {
|
namespace nix {
|
||||||
|
|
||||||
template<auto del>
|
template<auto del>
|
||||||
|
@ -17,26 +22,35 @@ struct Deleter
|
||||||
void operator()(T * p) const { del(p); };
|
void operator()(T * p) const { del(p); };
|
||||||
};
|
};
|
||||||
|
|
||||||
struct GitInputAccessor : InputAccessor
|
typedef std::unique_ptr<git_repository, Deleter<git_repository_free>> Repository;
|
||||||
|
typedef std::unique_ptr<git_tree_entry, Deleter<git_tree_entry_free>> TreeEntry;
|
||||||
|
typedef std::unique_ptr<git_tree, Deleter<git_tree_free>> Tree;
|
||||||
|
typedef std::unique_ptr<git_treebuilder, Deleter<git_treebuilder_free>> TreeBuilder;
|
||||||
|
typedef std::unique_ptr<git_blob, Deleter<git_blob_free>> Blob;
|
||||||
|
|
||||||
|
static void initLibGit2()
|
||||||
{
|
{
|
||||||
typedef std::unique_ptr<git_repository, Deleter<git_repository_free>> Repository;
|
|
||||||
typedef std::unique_ptr<git_tree_entry, Deleter<git_tree_entry_free>> TreeEntry;
|
|
||||||
typedef std::unique_ptr<git_tree, Deleter<git_tree_free>> Tree;
|
|
||||||
typedef std::unique_ptr<git_blob, Deleter<git_blob_free>> Blob;
|
|
||||||
|
|
||||||
Repository repo;
|
|
||||||
Tree root;
|
|
||||||
|
|
||||||
GitInputAccessor(const CanonPath & path, const Hash & rev)
|
|
||||||
{
|
|
||||||
if (git_libgit2_init() < 0)
|
if (git_libgit2_init() < 0)
|
||||||
throw Error("initialising libgit2': %s", path, git_error_last()->message);
|
throw Error("initialising libgit2: %s", git_error_last()->message);
|
||||||
|
}
|
||||||
|
|
||||||
|
static Repository openRepo(const CanonPath & path)
|
||||||
|
{
|
||||||
|
initLibGit2();
|
||||||
git_repository * _repo;
|
git_repository * _repo;
|
||||||
if (git_repository_open(&_repo, path.c_str()))
|
if (git_repository_open(&_repo, path.c_str()))
|
||||||
throw Error("opening Git repository '%s': %s", path, git_error_last()->message);
|
throw Error("opening Git repository '%s': %s", path, git_error_last()->message);
|
||||||
repo = Repository(_repo);
|
return Repository(_repo);
|
||||||
|
}
|
||||||
|
|
||||||
|
struct GitInputAccessor : InputAccessor
|
||||||
|
{
|
||||||
|
Repository repo;
|
||||||
|
Tree root;
|
||||||
|
|
||||||
|
GitInputAccessor(Repository && repo_, const Hash & rev)
|
||||||
|
: repo(std::move(repo_))
|
||||||
|
{
|
||||||
git_oid oid;
|
git_oid oid;
|
||||||
if (git_oid_fromstr(&oid, rev.gitRev().c_str()))
|
if (git_oid_fromstr(&oid, rev.gitRev().c_str()))
|
||||||
throw Error("cannot convert '%s' to a Git OID", rev.gitRev());
|
throw Error("cannot convert '%s' to a Git OID", rev.gitRev());
|
||||||
|
@ -203,8 +217,176 @@ struct GitInputAccessor : InputAccessor
|
||||||
|
|
||||||
ref<InputAccessor> makeGitInputAccessor(const CanonPath & path, const Hash & rev)
|
ref<InputAccessor> makeGitInputAccessor(const CanonPath & path, const Hash & rev)
|
||||||
{
|
{
|
||||||
return make_ref<GitInputAccessor>(path, rev);
|
return make_ref<GitInputAccessor>(openRepo(path), rev);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static Repository openTarballCache()
|
||||||
|
{
|
||||||
|
static CanonPath repoDir(getCacheDir() + "/nix/tarball-cache");
|
||||||
|
|
||||||
|
initLibGit2();
|
||||||
|
|
||||||
|
if (pathExists(repoDir.abs()))
|
||||||
|
return openRepo(repoDir);
|
||||||
|
else {
|
||||||
|
git_repository * _repo;
|
||||||
|
if (git_repository_init(&_repo, repoDir.c_str(), true))
|
||||||
|
throw Error("creating Git repository '%s': %s", repoDir, git_error_last()->message);
|
||||||
|
return Repository(_repo);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Hash importTarball(Source & source)
|
||||||
|
{
|
||||||
|
auto repo = openTarballCache();
|
||||||
|
|
||||||
|
TarArchive archive(source);
|
||||||
|
|
||||||
|
struct PendingDir
|
||||||
|
{
|
||||||
|
std::string name;
|
||||||
|
TreeBuilder builder;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::vector<PendingDir> pendingDirs;
|
||||||
|
|
||||||
|
auto pushBuilder = [&](std::string name)
|
||||||
|
{
|
||||||
|
git_treebuilder * b;
|
||||||
|
if (git_treebuilder_new(&b, repo.get(), nullptr))
|
||||||
|
throw Error("creating a tree builder: %s", git_error_last()->message);
|
||||||
|
pendingDirs.push_back({ .name = std::move(name), .builder = TreeBuilder(b) });
|
||||||
|
};
|
||||||
|
|
||||||
|
auto popBuilder = [&]() -> std::pair<git_oid, std::string>
|
||||||
|
{
|
||||||
|
assert(!pendingDirs.empty());
|
||||||
|
auto pending = std::move(pendingDirs.back());
|
||||||
|
git_oid oid;
|
||||||
|
if (git_treebuilder_write(&oid, pending.builder.get()))
|
||||||
|
throw Error("creating a tree object: %s", git_error_last()->message);
|
||||||
|
pendingDirs.pop_back();
|
||||||
|
return {oid, pending.name};
|
||||||
|
};
|
||||||
|
|
||||||
|
auto addToTree = [&](const std::string & name, const git_oid & oid, git_filemode_t mode)
|
||||||
|
{
|
||||||
|
assert(!pendingDirs.empty());
|
||||||
|
auto & pending = pendingDirs.back();
|
||||||
|
if (git_treebuilder_insert(nullptr, pending.builder.get(), name.c_str(), &oid, mode))
|
||||||
|
throw Error("adding a file to a tree builder: %s", git_error_last()->message);
|
||||||
|
};
|
||||||
|
|
||||||
|
auto updateBuilders = [&](std::span<const std::string> names)
|
||||||
|
{
|
||||||
|
// Find the common prefix of pendingDirs and names.
|
||||||
|
size_t prefixLen = 0;
|
||||||
|
for (; prefixLen < names.size() && prefixLen + 1 < pendingDirs.size(); ++prefixLen)
|
||||||
|
if (names[prefixLen] != pendingDirs[prefixLen + 1].name)
|
||||||
|
break;
|
||||||
|
|
||||||
|
// Finish the builders that are not part of the common prefix.
|
||||||
|
for (auto n = pendingDirs.size(); n > prefixLen + 1; --n) {
|
||||||
|
auto [oid, name] = popBuilder();
|
||||||
|
addToTree(name, oid, GIT_FILEMODE_TREE);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create builders for the new directories.
|
||||||
|
for (auto n = prefixLen; n < names.size(); ++n)
|
||||||
|
pushBuilder(names[n]);
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
pushBuilder("");
|
||||||
|
|
||||||
|
size_t componentsToStrip = 1;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
// FIXME: merge with extract_archive
|
||||||
|
struct archive_entry * entry;
|
||||||
|
int r = archive_read_next_header(archive.archive, &entry);
|
||||||
|
if (r == ARCHIVE_EOF) break;
|
||||||
|
auto path = archive_entry_pathname(entry);
|
||||||
|
if (!path)
|
||||||
|
throw Error("cannot get archive member name: %s", archive_error_string(archive.archive));
|
||||||
|
if (r == ARCHIVE_WARN)
|
||||||
|
warn(archive_error_string(archive.archive));
|
||||||
|
else
|
||||||
|
archive.check(r);
|
||||||
|
|
||||||
|
auto pathComponents = tokenizeString<std::vector<std::string>>(path, "/");
|
||||||
|
|
||||||
|
std::span<const std::string> pathComponents2{pathComponents};
|
||||||
|
|
||||||
|
if (pathComponents2.size() <= componentsToStrip) continue;
|
||||||
|
pathComponents2 = pathComponents2.subspan(componentsToStrip);
|
||||||
|
|
||||||
|
updateBuilders(
|
||||||
|
archive_entry_filetype(entry) == AE_IFDIR
|
||||||
|
? pathComponents2
|
||||||
|
: pathComponents2.first(pathComponents2.size() - 1));
|
||||||
|
|
||||||
|
switch (archive_entry_filetype(entry)) {
|
||||||
|
|
||||||
|
case AE_IFDIR:
|
||||||
|
// Nothing to do right now.
|
||||||
|
break;
|
||||||
|
|
||||||
|
case AE_IFREG: {
|
||||||
|
|
||||||
|
git_writestream * stream = nullptr;
|
||||||
|
if (git_blob_create_from_stream(&stream, repo.get(), nullptr))
|
||||||
|
throw Error("creating a blob stream object: %s", git_error_last()->message);
|
||||||
|
|
||||||
|
while (true) {
|
||||||
|
std::vector<unsigned char> buf(128 * 1024);
|
||||||
|
auto n = archive_read_data(archive.archive, buf.data(), buf.size());
|
||||||
|
if (n < 0)
|
||||||
|
throw Error("cannot read file '%s' from tarball", path);
|
||||||
|
if (n == 0) break;
|
||||||
|
if (stream->write(stream, (const char *) buf.data(), n))
|
||||||
|
throw Error("writing a blob for tarball member '%s': %s", path, git_error_last()->message);
|
||||||
|
}
|
||||||
|
|
||||||
|
git_oid oid;
|
||||||
|
if (git_blob_create_from_stream_commit(&oid, stream))
|
||||||
|
throw Error("creating a blob object for tarball member '%s': %s", path, git_error_last()->message);
|
||||||
|
|
||||||
|
addToTree(*pathComponents.rbegin(), oid,
|
||||||
|
archive_entry_mode(entry) & S_IXUSR
|
||||||
|
? GIT_FILEMODE_BLOB_EXECUTABLE
|
||||||
|
: GIT_FILEMODE_BLOB);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
case AE_IFLNK: {
|
||||||
|
auto target = archive_entry_symlink(entry);
|
||||||
|
|
||||||
|
git_oid oid;
|
||||||
|
if (git_blob_create_from_buffer(&oid, repo.get(), target, strlen(target)))
|
||||||
|
throw Error("creating a blob object for tarball symlink member '%s': %s", path, git_error_last()->message);
|
||||||
|
|
||||||
|
addToTree(*pathComponents.rbegin(), oid, GIT_FILEMODE_LINK);
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
default:
|
||||||
|
throw Error("file '%s' in tarball has unsupported file type", path);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
updateBuilders({});
|
||||||
|
|
||||||
|
auto [oid, _name] = popBuilder();
|
||||||
|
|
||||||
|
return Hash::parseAny(git_oid_tostr_s(&oid), htSHA1);
|
||||||
|
}
|
||||||
|
|
||||||
|
ref<InputAccessor> makeTarballCacheAccessor(const Hash & rev)
|
||||||
|
{
|
||||||
|
return make_ref<GitInputAccessor>(openTarballCache(), rev);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -180,7 +180,7 @@ struct GitArchiveInputScheme : InputScheme
|
||||||
|
|
||||||
virtual DownloadUrl getDownloadUrl(const Input & input) const = 0;
|
virtual DownloadUrl getDownloadUrl(const Input & input) const = 0;
|
||||||
|
|
||||||
std::pair<StorePath, Input> downloadArchive(ref<Store> store, Input input) const
|
std::pair<Input, Hash> downloadArchive(ref<Store> store, Input input) const
|
||||||
{
|
{
|
||||||
if (!maybeGetStrAttr(input.attrs, "ref")) input.attrs.insert_or_assign("ref", "HEAD");
|
if (!maybeGetStrAttr(input.attrs, "ref")) input.attrs.insert_or_assign("ref", "HEAD");
|
||||||
|
|
||||||
|
@ -190,62 +190,50 @@ struct GitArchiveInputScheme : InputScheme
|
||||||
input.attrs.erase("ref");
|
input.attrs.erase("ref");
|
||||||
input.attrs.insert_or_assign("rev", rev->gitRev());
|
input.attrs.insert_or_assign("rev", rev->gitRev());
|
||||||
|
|
||||||
Attrs lockedAttrs({
|
|
||||||
{"type", "git-zipball"},
|
|
||||||
{"rev", rev->gitRev()},
|
|
||||||
});
|
|
||||||
|
|
||||||
if (auto res = getCache()->lookup(store, lockedAttrs))
|
|
||||||
return {std::move(res->second), std::move(input)};
|
|
||||||
|
|
||||||
auto url = getDownloadUrl(input);
|
|
||||||
|
|
||||||
auto res = downloadFile(store, url.url, input.getName(), true, url.headers);
|
|
||||||
|
|
||||||
getCache()->add(
|
|
||||||
store,
|
|
||||||
lockedAttrs,
|
|
||||||
{
|
|
||||||
{"rev", rev->gitRev()},
|
|
||||||
},
|
|
||||||
res.storePath,
|
|
||||||
true);
|
|
||||||
|
|
||||||
return {res.storePath, std::move(input)};
|
|
||||||
}
|
|
||||||
|
|
||||||
std::pair<ref<InputAccessor>, Input> getAccessor(ref<Store> store, const Input & input) const override
|
|
||||||
{
|
|
||||||
auto [storePath, input2] = downloadArchive(store, input);
|
|
||||||
|
|
||||||
auto accessor = makeZipInputAccessor(CanonPath(store->toRealPath(storePath)));
|
|
||||||
|
|
||||||
/* Compute the NAR hash of the contents of the zip file. This
|
|
||||||
is checked against the NAR hash in the lock file in
|
|
||||||
Input::checkLocks(). */
|
|
||||||
auto key = fmt("zip-nar-hash-%s", store->toRealPath(storePath.to_string()));
|
|
||||||
|
|
||||||
auto cache = getCache();
|
auto cache = getCache();
|
||||||
|
|
||||||
auto narHash = [&]() {
|
auto treeHashKey = fmt("git-rev-to-tree-hash-%s", rev->gitRev());
|
||||||
if (auto narHashS = cache->queryFact(key)) {
|
|
||||||
return Hash::parseSRI(*narHashS);
|
if (auto treeHashS = cache->queryFact(treeHashKey)) {
|
||||||
} else {
|
auto treeHash = Hash::parseAny(*treeHashS, htSHA1);
|
||||||
auto narHash = accessor->hashPath(CanonPath::root);
|
// FIXME: verify that treeHash exists in the tarball cache.
|
||||||
cache->upsertFact(key, narHash.to_string(SRI, true));
|
return {std::move(input), treeHash};
|
||||||
return narHash;
|
|
||||||
}
|
}
|
||||||
}();
|
|
||||||
|
|
||||||
input2.attrs.insert_or_assign("narHash", narHash.to_string(SRI, true));
|
/* Stream the tarball into the tarball cache. */
|
||||||
|
auto url = getDownloadUrl(input);
|
||||||
|
|
||||||
|
auto source = sinkToSource([&](Sink & sink) {
|
||||||
|
FileTransferRequest req(url.url);
|
||||||
|
req.headers = url.headers;
|
||||||
|
getFileTransfer()->download(std::move(req), sink);
|
||||||
|
});
|
||||||
|
|
||||||
|
auto treeHash = importTarball(*source);
|
||||||
|
|
||||||
|
// FIXME: verify against locked tree hash.
|
||||||
|
input.attrs.insert_or_assign("treeHash", treeHash.gitRev());
|
||||||
|
|
||||||
|
cache->upsertFact(treeHashKey, treeHash.gitRev());
|
||||||
|
|
||||||
|
return {std::move(input), treeHash};
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<ref<InputAccessor>, Input> getAccessor(ref<Store> store, const Input & _input) const override
|
||||||
|
{
|
||||||
|
auto [input, treeHash] = downloadArchive(store, _input);
|
||||||
|
|
||||||
|
auto accessor = makeTarballCacheAccessor(treeHash);
|
||||||
|
|
||||||
|
#if 0
|
||||||
auto lastModified = accessor->getLastModified();
|
auto lastModified = accessor->getLastModified();
|
||||||
assert(lastModified);
|
assert(lastModified);
|
||||||
input2.attrs.insert_or_assign("lastModified", uint64_t(*lastModified));
|
input.attrs.insert_or_assign("lastModified", uint64_t(*lastModified));
|
||||||
|
#endif
|
||||||
|
|
||||||
accessor->setPathDisplay("«" + input2.to_string() + "»");
|
accessor->setPathDisplay("«" + input.to_string() + "»");
|
||||||
|
|
||||||
return {accessor, input2};
|
return {accessor, input};
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isLocked(const Input & input) const override
|
bool isLocked(const Input & input) const override
|
||||||
|
@ -314,10 +302,10 @@ struct GitHubInputScheme : GitArchiveInputScheme
|
||||||
// urls so we do not run into rate limits.
|
// urls so we do not run into rate limits.
|
||||||
const auto urlFmt =
|
const auto urlFmt =
|
||||||
host != "github.com"
|
host != "github.com"
|
||||||
? "https://%s/api/v3/repos/%s/%s/zipball/%s"
|
? "https://%s/api/v3/repos/%s/%s/tarball/%s"
|
||||||
: headers.empty()
|
: headers.empty()
|
||||||
? "https://%s/%s/%s/archive/%s.zip"
|
? "https://%s/%s/%s/archive/%s.tar.gz"
|
||||||
: "https://api.%s/repos/%s/%s/zipball/%s";
|
: "https://api.%s/repos/%s/%s/tarball/%s";
|
||||||
|
|
||||||
const auto url = fmt(urlFmt, host, getOwner(input), getRepo(input),
|
const auto url = fmt(urlFmt, host, getOwner(input), getRepo(input),
|
||||||
input.getRev()->to_string(Base16, false));
|
input.getRev()->to_string(Base16, false));
|
||||||
|
@ -384,7 +372,7 @@ struct GitLabInputScheme : GitArchiveInputScheme
|
||||||
// is 10 reqs/sec/ip-addr. See
|
// is 10 reqs/sec/ip-addr. See
|
||||||
// https://docs.gitlab.com/ee/user/gitlab_com/index.html#gitlabcom-specific-rate-limits
|
// https://docs.gitlab.com/ee/user/gitlab_com/index.html#gitlabcom-specific-rate-limits
|
||||||
auto host = maybeGetStrAttr(input.attrs, "host").value_or("gitlab.com");
|
auto host = maybeGetStrAttr(input.attrs, "host").value_or("gitlab.com");
|
||||||
auto url = fmt("https://%s/api/v4/projects/%s%%2F%s/repository/archive.zip?sha=%s",
|
auto url = fmt("https://%s/api/v4/projects/%s%%2F%s/repository/archive.tar.gz?sha=%s",
|
||||||
host, getStrAttr(input.attrs, "owner"), getStrAttr(input.attrs, "repo"),
|
host, getStrAttr(input.attrs, "owner"), getStrAttr(input.attrs, "repo"),
|
||||||
input.getRev()->to_string(Base16, false));
|
input.getRev()->to_string(Base16, false));
|
||||||
|
|
||||||
|
|
|
@ -119,6 +119,10 @@ ref<InputAccessor> makePatchingInputAccessor(
|
||||||
|
|
||||||
ref<InputAccessor> makeGitInputAccessor(const CanonPath & path, const Hash & rev);
|
ref<InputAccessor> makeGitInputAccessor(const CanonPath & path, const Hash & rev);
|
||||||
|
|
||||||
|
Hash importTarball(Source & source);
|
||||||
|
|
||||||
|
ref<InputAccessor> makeTarballCacheAccessor(const Hash & rev);
|
||||||
|
|
||||||
struct SourcePath
|
struct SourcePath
|
||||||
{
|
{
|
||||||
ref<InputAccessor> accessor;
|
ref<InputAccessor> accessor;
|
||||||
|
|
|
@ -8,6 +8,6 @@ libfetchers_SOURCES := $(wildcard $(d)/*.cc)
|
||||||
|
|
||||||
libfetchers_CXXFLAGS += -I src/libutil -I src/libstore
|
libfetchers_CXXFLAGS += -I src/libutil -I src/libstore
|
||||||
|
|
||||||
libfetchers_LDFLAGS += -pthread -lzip -lgit2
|
libfetchers_LDFLAGS += -pthread -lzip -lgit2 -larchive
|
||||||
|
|
||||||
libfetchers_LIBS = libutil libstore
|
libfetchers_LIBS = libutil libstore
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue