1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-07-02 05:11:47 +02:00

Merge remote-tracking branch 'origin/master' into lazy-trees

This commit is contained in:
Eelco Dolstra 2022-05-09 11:26:39 +02:00
commit 30ca717558
175 changed files with 4032 additions and 1936 deletions

View file

@ -5,9 +5,13 @@
#include "store-api.hh"
#include "url-parts.hh"
#include "pathlocks.hh"
#include "util.hh"
#include "git.hh"
#include "fetch-settings.hh"
#include <regex>
#include <string.h>
#include <sys/time.h>
#include <sys/wait.h>
@ -15,24 +19,240 @@ using namespace std::string_literals;
namespace nix::fetchers {
namespace {
// Explicit initial branch of our bare repo to suppress warnings from new version of git.
// The value itself does not matter, since we always fetch a specific revision or branch.
// It is set with `-c init.defaultBranch=` instead of `--initial-branch=` to stay compatible with
// old version of git, which will ignore unrecognized `-c` options.
const std::string gitInitialBranch = "__nix_dummy_branch";
static std::string readHead(const Path & path)
std::string getGitDir()
{
return chomp(runProgram("git", true, { "-C", path, "rev-parse", "--abbrev-ref", "HEAD" }));
return getEnv("GIT_DIR").value_or(".git");
}
static bool isNotDotGitDirectory(const Path & path)
bool isCacheFileWithinTtl(time_t now, const struct stat & st)
{
static const std::regex gitDirRegex("^(?:.*/)?\\.git$");
return not std::regex_match(path, gitDirRegex);
return st.st_mtime + settings.tarballTtl > now;
}
bool touchCacheFile(const Path & path, time_t touch_time)
{
struct timeval times[2];
times[0].tv_sec = touch_time;
times[0].tv_usec = 0;
times[1].tv_sec = touch_time;
times[1].tv_usec = 0;
return lutimes(path.c_str(), times) == 0;
}
Path getCachePath(std::string_view key)
{
return getCacheDir() + "/nix/gitv3/" +
hashString(htSHA256, key).to_string(Base32, false);
}
// Returns the name of the HEAD branch.
//
// Returns the head branch name as reported by git ls-remote --symref, e.g., if
// ls-remote returns the output below, "main" is returned based on the ref line.
//
// ref: refs/heads/main HEAD
// ...
std::optional<std::string> readHead(const Path & path)
{
auto [exit_code, output] = runProgram(RunOptions {
.program = "git",
// FIXME: use 'HEAD'
.args = {"ls-remote", "--symref", path},
});
if (exit_code != 0) {
return std::nullopt;
}
std::string_view line = output;
line = line.substr(0, line.find("\n"));
if (const auto parseResult = git::parseLsRemoteLine(line)) {
switch (parseResult->kind) {
case git::LsRemoteRefLine::Kind::Symbolic:
debug("resolved HEAD ref '%s' for repo '%s'", parseResult->target, path);
break;
case git::LsRemoteRefLine::Kind::Object:
debug("resolved HEAD rev '%s' for repo '%s'", parseResult->target, path);
break;
}
return parseResult->target;
}
return std::nullopt;
}
// Persist the HEAD ref from the remote repo in the local cached repo.
bool storeCachedHead(std::string_view actualUrl, std::string headRef)
{
Path cacheDir = getCachePath(actualUrl);
try {
runProgram("git", true, { "-C", cacheDir, "symbolic-ref", "--", "HEAD", headRef });
} catch (ExecError & e) {
if (!WIFEXITED(e.status)) throw;
return false;
}
/* No need to touch refs/HEAD, because `git symbolic-ref` updates the mtime. */
return true;
}
std::optional<std::string> readHeadCached(std::string_view actualUrl)
{
// Create a cache path to store the branch of the HEAD ref. Append something
// in front of the URL to prevent collision with the repository itself.
Path cacheDir = getCachePath(actualUrl);
Path headRefFile = cacheDir + "/HEAD";
time_t now = time(0);
struct stat st;
std::optional<std::string> cachedRef;
if (stat(headRefFile.c_str(), &st) == 0) {
cachedRef = readHead(cacheDir);
if (cachedRef != std::nullopt &&
*cachedRef != gitInitialBranch &&
isCacheFileWithinTtl(now, st))
{
debug("using cached HEAD ref '%s' for repo '%s'", *cachedRef, actualUrl);
return cachedRef;
}
}
auto ref = readHead(actualUrl);
if (ref) return ref;
if (cachedRef) {
// If the cached git ref is expired in fetch() below, and the 'git fetch'
// fails, it falls back to continuing with the most recent version.
// This function must behave the same way, so we return the expired
// cached ref here.
warn("could not get HEAD ref for repository '%s'; using expired cached ref '%s'", actualUrl, *cachedRef);
return *cachedRef;
}
return std::nullopt;
}
bool isNotDotGitDirectory(const Path & path)
{
return baseNameOf(path) != ".git";
}
struct WorkdirInfo
{
bool clean = false;
bool hasHead = false;
};
// Returns whether a git workdir is clean and has commits.
WorkdirInfo getWorkdirInfo(const Input & input, const Path & workdir)
{
const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
auto gitDir = getGitDir();
auto env = getEnv();
// Set LC_ALL to C: because we rely on the error messages from git rev-parse to determine what went wrong
// that way unknown errors can lead to a failure instead of continuing through the wrong code path
env["LC_ALL"] = "C";
/* Check whether HEAD points to something that looks like a commit,
since that is the refrence we want to use later on. */
auto result = runProgram(RunOptions {
.program = "git",
.args = { "-C", workdir, "--git-dir", gitDir, "rev-parse", "--verify", "--no-revs", "HEAD^{commit}" },
.environment = env,
.mergeStderrToStdout = true
});
auto exitCode = WEXITSTATUS(result.first);
auto errorMessage = result.second;
if (errorMessage.find("fatal: not a git repository") != std::string::npos) {
throw Error("'%s' is not a Git repository", workdir);
} else if (errorMessage.find("fatal: Needed a single revision") != std::string::npos) {
// indicates that the repo does not have any commits
// we want to proceed and will consider it dirty later
} else if (exitCode != 0) {
// any other errors should lead to a failure
throw Error("getting the HEAD of the Git tree '%s' failed with exit code %d:\n%s", workdir, exitCode, errorMessage);
}
bool clean = false;
bool hasHead = exitCode == 0;
try {
if (hasHead) {
// Using git diff is preferrable over lower-level operations here,
// because its conceptually simpler and we only need the exit code anyways.
auto gitDiffOpts = Strings({ "-C", workdir, "diff", "HEAD", "--quiet"});
if (!submodules) {
// Changes in submodules should only make the tree dirty
// when those submodules will be copied as well.
gitDiffOpts.emplace_back("--ignore-submodules");
}
gitDiffOpts.emplace_back("--");
runProgram("git", true, gitDiffOpts);
clean = true;
}
} catch (ExecError & e) {
if (!WIFEXITED(e.status) || WEXITSTATUS(e.status) != 1) throw;
}
return WorkdirInfo { .clean = clean, .hasHead = hasHead };
}
std::pair<StorePath, Input> fetchFromWorkdir(ref<Store> store, Input & input, const Path & workdir, const WorkdirInfo & workdirInfo)
{
const bool submodules = maybeGetBoolAttr(input.attrs, "submodules").value_or(false);
if (!fetchSettings.allowDirty)
throw Error("Git tree '%s' is dirty", workdir);
if (fetchSettings.warnDirty)
warn("Git tree '%s' is dirty", workdir);
auto gitOpts = Strings({ "-C", workdir, "ls-files", "-z" });
if (submodules)
gitOpts.emplace_back("--recurse-submodules");
auto files = tokenizeString<std::set<std::string>>(
runProgram("git", true, gitOpts), "\0"s);
Path actualPath(absPath(workdir));
PathFilter filter = [&](const Path & p) -> bool {
assert(hasPrefix(p, actualPath));
std::string file(p, actualPath.size() + 1);
auto st = lstat(p);
if (S_ISDIR(st.st_mode)) {
auto prefix = file + "/";
auto i = files.lower_bound(prefix);
return i != files.end() && hasPrefix(*i, prefix);
}
return files.count(file);
};
auto storePath = store->addToStore(input.getName(), actualPath, FileIngestionMethod::Recursive, htSHA256, filter);
// FIXME: maybe we should use the timestamp of the last
// modified dirty file?
input.attrs.insert_or_assign(
"lastModified",
workdirInfo.hasHead ? std::stoull(runProgram("git", true, { "-C", actualPath, "log", "-1", "--format=%ct", "--no-show-signature", "HEAD" })) : 0);
return {std::move(storePath), input};
}
} // end namespace
struct GitInputScheme : InputScheme
{
std::optional<Input> inputFromURL(const ParsedURL & url) override
@ -50,7 +270,7 @@ struct GitInputScheme : InputScheme
Attrs attrs;
attrs.emplace("type", "git");
for (auto &[name, value] : url.query) {
for (auto & [name, value] : url.query) {
if (name == "rev" || name == "ref")
attrs.emplace(name, value);
else if (name == "shallow" || name == "submodules")
@ -152,13 +372,14 @@ struct GitInputScheme : InputScheme
{
auto sourcePath = getSourcePath(input);
assert(sourcePath);
auto gitDir = getGitDir();
runProgram("git", true,
{ "-C", *sourcePath, "add", "--force", "--intent-to-add", "--", std::string(file) });
{ "-C", *sourcePath, "--git-dir", gitDir, "add", "--force", "--intent-to-add", "--", std::string(file) });
if (commitMsg)
runProgram("git", true,
{ "-C", *sourcePath, "commit", std::string(file), "-m", *commitMsg });
{ "-C", *sourcePath, "--git-dir", gitDir, "commit", std::string(file), "-m", *commitMsg });
}
struct RepoInfo
@ -371,6 +592,15 @@ struct GitInputScheme : InputScheme
repoDir = repoInfo.url;
} else {
const bool useHeadRef = !input.getRef();
if (useHeadRef) {
auto head = readHeadCached(actualUrl);
if (!head) {
warn("could not read HEAD ref from repo at '%s', using 'master'", actualUrl);
head = "master";
}
input.attrs.insert_or_assign("ref", *head);
}
if (auto res = getCache()->lookup(store, unlockedAttrs)) {
auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), htSHA1);
@ -382,6 +612,7 @@ struct GitInputScheme : InputScheme
Path cacheDir = getCacheDir() + "/nix/gitv3/" + hashString(htSHA256, repoInfo.url).to_string(Base32, false);
repoDir = cacheDir;
gitDir = ".";
createDirs(dirOf(cacheDir));
PathLocks cacheDirLock({cacheDir + ".lock"});
@ -402,7 +633,7 @@ struct GitInputScheme : InputScheme
repo. */
if (input.getRev()) {
try {
runProgram("git", true, { "-C", repoDir, "cat-file", "-e", input.getRev()->gitRev() });
runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "cat-file", "-e", input.getRev()->gitRev() });
doFetch = false;
} catch (ExecError & e) {
if (WIFEXITED(e.status)) {
@ -419,7 +650,7 @@ struct GitInputScheme : InputScheme
git fetch to update the local ref to the remote ref. */
struct stat st;
doFetch = stat(localRefFile.c_str(), &st) != 0 ||
(uint64_t) st.st_mtime + settings.tarballTtl <= (uint64_t) now;
!isCacheFileWithinTtl(now, st);
}
}
@ -443,13 +674,10 @@ struct GitInputScheme : InputScheme
warn("could not update local clone of Git repository '%s'; continuing with the most recent version", repoInfo.url);
}
struct timeval times[2];
times[0].tv_sec = now;
times[0].tv_usec = 0;
times[1].tv_sec = now;
times[1].tv_usec = 0;
utimes(localRefFile.c_str(), times);
if (!touchCacheFile(localRefFile, now))
warn("could not update mtime for file '%s': %s", localRefFile, strerror(errno));
if (useHeadRef && !storeCachedHead(actualUrl, *input.getRef()))
warn("could not update cached head '%s' for '%s'", *input.getRef(), actualUrl);
}
if (!input.getRev())
@ -458,7 +686,7 @@ struct GitInputScheme : InputScheme
// cache dir lock is removed at scope end; we will only use read-only operations on specific revisions in the remainder
}
bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "rev-parse", "--is-shallow-repository" })) == "true";
bool isShallow = chomp(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-parse", "--is-shallow-repository" })) == "true";
if (isShallow && !repoInfo.shallow)
throw Error("'%s' is a shallow Git repository, but a non-shallow repository is needed", repoInfo.url);
@ -478,7 +706,7 @@ struct GitInputScheme : InputScheme
auto result = runProgram(RunOptions {
.program = "git",
.args = { "-C", repoDir, "cat-file", "commit", input.getRev()->gitRev() },
.args = { "-C", repoDir, "--git-dir", gitDir, "cat-file", "commit", input.getRev()->gitRev() },
.mergeStderrToStdout = true
});
if (WEXITSTATUS(result.first) == 128
@ -517,7 +745,7 @@ struct GitInputScheme : InputScheme
auto source = sinkToSource([&](Sink & sink) {
runProgram2({
.program = "git",
.args = { "-C", repoDir, "archive", input.getRev()->gitRev() },
.args = { "-C", repoDir, "--git-dir", gitDir, "archive", input.getRev()->gitRev() },
.standardOut = &sink
});
});
@ -527,7 +755,7 @@ struct GitInputScheme : InputScheme
auto storePath = store->addToStore(name, tmpDir, FileIngestionMethod::Recursive, htSHA256, filter);
auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() }));
auto lastModified = std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "log", "-1", "--format=%ct", "--no-show-signature", input.getRev()->gitRev() }));
Attrs infoAttrs({
{"rev", input.getRev()->gitRev()},
@ -536,7 +764,7 @@ struct GitInputScheme : InputScheme
if (!repoInfo.shallow)
infoAttrs.insert_or_assign("revCount",
std::stoull(runProgram("git", true, { "-C", repoDir, "rev-list", "--count", input.getRev()->gitRev() })));
std::stoull(runProgram("git", true, { "-C", repoDir, "--git-dir", gitDir, "rev-list", "--count", input.getRev()->gitRev() })));
if (!_input.getRev())
getCache()->add(