diff --git a/doc/manual/rl-next/curl-cloexec.md b/doc/manual/rl-next/curl-cloexec.md new file mode 100644 index 000000000..2fcdfb0d1 --- /dev/null +++ b/doc/manual/rl-next/curl-cloexec.md @@ -0,0 +1,10 @@ +--- +synopsis: Set FD_CLOEXEC on sockets created by curl +issues: [] +prs: [12439] +--- + + +Curl creates sockets without setting FD_CLOEXEC/SOCK_CLOEXEC, this can cause connections to remain open forever when using commands like `nix shell` + +This change sets the FD_CLOEXEC flag using a CURLOPT_SOCKOPTFUNCTION callback. diff --git a/doc/manual/rl-next/git-lfs-support.md b/doc/manual/rl-next/git-lfs-support.md new file mode 100644 index 000000000..2990fc76c --- /dev/null +++ b/doc/manual/rl-next/git-lfs-support.md @@ -0,0 +1,11 @@ +--- +synopsis: "Git LFS support" +prs: [10153] +--- + +The Git fetcher now supports Large File Storage (LFS). This can be enabled by passing the attribute `lfs = true` to the fetcher, e.g. +```console +nix flake prefetch 'git+ssh://git@github.com/Apress/repo-with-large-file-storage.git?lfs=1' +``` + +Author: [**@b-camacho**](https://github.com/b-camacho), [**@kip93**](https://github.com/kip93) diff --git a/packaging/components.nix b/packaging/components.nix index d1bfe83bf..07bb209cd 100644 --- a/packaging/components.nix +++ b/packaging/components.nix @@ -56,7 +56,7 @@ in nix-cli = callPackage ../src/nix/package.nix { version = fineVersion; }; - nix-functional-tests = callPackage ../src/nix-functional-tests/package.nix { + nix-functional-tests = callPackage ../tests/functional/package.nix { version = fineVersion; }; diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 92dd8edab..f7f79c287 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -2384,7 +2384,7 @@ StorePath EvalState::copyPathToStore(NixStringContext & context, const SourcePat : [&]() { auto dstPath = fetchToStore( *store, - path.resolveSymlinks(), + path.resolveSymlinks(SymlinkResolution::Ancestors), settings.readOnlyMode ? FetchMode::DryRun : FetchMode::Copy, path.baseName(), ContentAddressMethod::Raw::NixArchive, diff --git a/src/libexpr/primops/fetchTree.cc b/src/libexpr/primops/fetchTree.cc index c4b8b2999..bd013eab2 100644 --- a/src/libexpr/primops/fetchTree.cc +++ b/src/libexpr/primops/fetchTree.cc @@ -367,6 +367,12 @@ static RegisterPrimOp primop_fetchTree({ Default: `false` + - `lfs` (Bool, optional) + + Fetch any [Git LFS](https://git-lfs.com/) files. + + Default: `false` + - `allRefs` (Bool, optional) By default, this has no effect. This becomes relevant only once `shallow` cloning is disabled. @@ -691,6 +697,13 @@ static RegisterPrimOp primop_fetchGit({ Make a shallow clone when fetching the Git tree. When this is enabled, the options `ref` and `allRefs` have no effect anymore. + + - `lfs` (default: `false`) + + A boolean that when `true` specifies that [Git LFS] files should be fetched. + + [Git LFS]: https://git-lfs.com/ + - `allRefs` Whether to fetch all references (eg. branches and tags) of the repository. diff --git a/src/libfetchers-tests/git-utils.cc b/src/libfetchers-tests/git-utils.cc index 0bf3076dc..10e98141f 100644 --- a/src/libfetchers-tests/git-utils.cc +++ b/src/libfetchers-tests/git-utils.cc @@ -7,6 +7,7 @@ #include #include "fs-sink.hh" #include "serialise.hh" +#include "git-lfs-fetch.hh" namespace nix { @@ -109,4 +110,131 @@ TEST_F(GitUtilsTest, sink_hardlink) } }; +namespace lfs { + +TEST_F(GitUtilsTest, parseGitRemoteUrl) +{ + { + GitUrl result = parseGitUrl("git@example.com:path/repo.git"); + EXPECT_EQ(result.protocol, "ssh"); + EXPECT_EQ(result.user, "git"); + EXPECT_EQ(result.host, "example.com"); + EXPECT_EQ(result.port, ""); + EXPECT_EQ(result.path, "path/repo.git"); + } + + { + GitUrl result = parseGitUrl("example.com:/path/repo.git"); + EXPECT_EQ(result.protocol, "ssh"); + EXPECT_EQ(result.user, ""); + EXPECT_EQ(result.host, "example.com"); + EXPECT_EQ(result.port, ""); + EXPECT_EQ(result.path, "/path/repo.git"); + } + + { + GitUrl result = parseGitUrl("example.com:path/repo.git"); + EXPECT_EQ(result.protocol, "ssh"); + EXPECT_EQ(result.user, ""); + EXPECT_EQ(result.host, "example.com"); + EXPECT_EQ(result.port, ""); + EXPECT_EQ(result.path, "path/repo.git"); + } + + { + GitUrl result = parseGitUrl("https://example.com/path/repo.git"); + EXPECT_EQ(result.protocol, "https"); + EXPECT_EQ(result.user, ""); + EXPECT_EQ(result.host, "example.com"); + EXPECT_EQ(result.port, ""); + EXPECT_EQ(result.path, "path/repo.git"); + } + + { + GitUrl result = parseGitUrl("ssh://git@example.com/path/repo.git"); + EXPECT_EQ(result.protocol, "ssh"); + EXPECT_EQ(result.user, "git"); + EXPECT_EQ(result.host, "example.com"); + EXPECT_EQ(result.port, ""); + EXPECT_EQ(result.path, "path/repo.git"); + } + + { + GitUrl result = parseGitUrl("ssh://example/path/repo.git"); + EXPECT_EQ(result.protocol, "ssh"); + EXPECT_EQ(result.user, ""); + EXPECT_EQ(result.host, "example"); + EXPECT_EQ(result.port, ""); + EXPECT_EQ(result.path, "path/repo.git"); + } + + { + GitUrl result = parseGitUrl("http://example.com:8080/path/repo.git"); + EXPECT_EQ(result.protocol, "http"); + EXPECT_EQ(result.user, ""); + EXPECT_EQ(result.host, "example.com"); + EXPECT_EQ(result.port, "8080"); + EXPECT_EQ(result.path, "path/repo.git"); + } + + { + GitUrl result = parseGitUrl("invalid-url"); + EXPECT_EQ(result.protocol, ""); + EXPECT_EQ(result.user, ""); + EXPECT_EQ(result.host, ""); + EXPECT_EQ(result.port, ""); + EXPECT_EQ(result.path, ""); + } + + { + GitUrl result = parseGitUrl(""); + EXPECT_EQ(result.protocol, ""); + EXPECT_EQ(result.user, ""); + EXPECT_EQ(result.host, ""); + EXPECT_EQ(result.port, ""); + EXPECT_EQ(result.path, ""); + } +} +TEST_F(GitUtilsTest, gitUrlToHttp) +{ + { + const GitUrl url = parseGitUrl("git@github.com:user/repo.git"); + EXPECT_EQ(url.toHttp(), "https://github.com/user/repo.git"); + } + { + const GitUrl url = parseGitUrl("https://github.com/user/repo.git"); + EXPECT_EQ(url.toHttp(), "https://github.com/user/repo.git"); + } + { + const GitUrl url = parseGitUrl("http://github.com/user/repo.git"); + EXPECT_EQ(url.toHttp(), "http://github.com/user/repo.git"); + } + { + const GitUrl url = parseGitUrl("ssh://git@github.com:22/user/repo.git"); + EXPECT_EQ(url.toHttp(), "https://github.com:22/user/repo.git"); + } + { + const GitUrl url = parseGitUrl("invalid-url"); + EXPECT_EQ(url.toHttp(), ""); + } +} + +TEST_F(GitUtilsTest, gitUrlToSsh) +{ + { + const GitUrl url = parseGitUrl("https://example.com/user/repo.git"); + const auto [host, path] = url.toSsh(); + EXPECT_EQ(host, "example.com"); + EXPECT_EQ(path, "user/repo.git"); + } + { + const GitUrl url = parseGitUrl("git@example.com:user/repo.git"); + const auto [host, path] = url.toSsh(); + EXPECT_EQ(host, "git@example.com"); + EXPECT_EQ(path, "user/repo.git"); + } +} + +} // namespace lfs + } // namespace nix diff --git a/src/libfetchers/git-lfs-fetch.cc b/src/libfetchers/git-lfs-fetch.cc new file mode 100644 index 000000000..bd6c01435 --- /dev/null +++ b/src/libfetchers/git-lfs-fetch.cc @@ -0,0 +1,279 @@ +#include "git-lfs-fetch.hh" +#include "git-utils.hh" +#include "filetransfer.hh" +#include "processes.hh" +#include "url.hh" +#include "users.hh" +#include "hash.hh" + +#include +#include +#include +#include + +#include + +namespace nix::lfs { + +// if authHeader is "", downloadToSink assumes no auth is expected +static void downloadToSink( + const std::string & url, + const std::string & authHeader, + // FIXME: passing a StringSink is superfluous, we may as well + // return a string. Or use an abstract Sink for streaming. + StringSink & sink, + std::string sha256Expected, + size_t sizeExpected) +{ + FileTransferRequest request(url); + Headers headers; + if (!authHeader.empty()) + headers.push_back({"Authorization", authHeader}); + request.headers = headers; + getFileTransfer()->download(std::move(request), sink); + + auto sizeActual = sink.s.length(); + if (sizeExpected != sizeActual) + throw Error("size mismatch while fetching %s: expected %d but got %d", url, sizeExpected, sizeActual); + + auto sha256Actual = hashString(HashAlgorithm::SHA256, sink.s).to_string(HashFormat::Base16, false); + if (sha256Actual != sha256Expected) + throw Error( + "hash mismatch while fetching %s: expected sha256:%s but got sha256:%s", url, sha256Expected, sha256Actual); +} + +static std::string getLfsApiToken(const ParsedURL & url) +{ + auto [status, output] = runProgram(RunOptions{ + .program = "ssh", + .args = {*url.authority, "git-lfs-authenticate", url.path, "download"}, + }); + + if (output.empty()) + throw Error( + "git-lfs-authenticate: no output (cmd: ssh %s git-lfs-authenticate %s download)", + url.authority.value_or(""), + url.path); + + auto queryResp = nlohmann::json::parse(output); + if (!queryResp.contains("header")) + throw Error("no header in git-lfs-authenticate response"); + if (!queryResp["header"].contains("Authorization")) + throw Error("no Authorization in git-lfs-authenticate response"); + + return queryResp["header"]["Authorization"].get(); +} + +typedef std::unique_ptr> GitConfig; +typedef std::unique_ptr> GitConfigEntry; + +static std::string getLfsEndpointUrl(git_repository * repo) +{ + GitConfig config; + if (git_repository_config(Setter(config), repo)) { + GitConfigEntry entry; + if (!git_config_get_entry(Setter(entry), config.get(), "lfs.url")) { + auto value = std::string(entry->value); + if (!value.empty()) { + debug("Found explicit lfs.url value: %s", value); + return value; + } + } + } + + git_remote * remote = nullptr; + if (git_remote_lookup(&remote, repo, "origin")) + return ""; + + const char * url_c_str = git_remote_url(remote); + if (!url_c_str) + return ""; + + return std::string(url_c_str); +} + +static std::optional parseLfsPointer(std::string_view content, std::string_view filename) +{ + // https://github.com/git-lfs/git-lfs/blob/2ef4108/docs/spec.md + // + // example git-lfs pointer file: + // version https://git-lfs.github.com/spec/v1 + // oid sha256:f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf + // size 10000000 + // (ending \n) + + if (!content.starts_with("version ")) { + // Invalid pointer file + return std::nullopt; + } + + if (!content.starts_with("version https://git-lfs.github.com/spec/v1")) { + // In case there's new spec versions in the future, but for now only v1 exists + debug("Invalid version found on potential lfs pointer file, skipping"); + return std::nullopt; + } + + std::string oid; + std::string size; + + for (auto & line : tokenizeString(content, "\n")) { + if (line.starts_with("version ")) { + continue; + } + if (line.starts_with("oid sha256:")) { + oid = line.substr(11); // skip "oid sha256:" + continue; + } + if (line.starts_with("size ")) { + size = line.substr(5); // skip "size " + continue; + } + + debug("Custom extension '%s' found, ignoring", line); + } + + if (oid.length() != 64 || !std::all_of(oid.begin(), oid.end(), ::isxdigit)) { + debug("Invalid sha256 %s, skipping", oid); + return std::nullopt; + } + + if (size.length() == 0 || !std::all_of(size.begin(), size.end(), ::isdigit)) { + debug("Invalid size %s, skipping", size); + return std::nullopt; + } + + return std::make_optional(Pointer{oid, std::stoul(size)}); +} + +Fetch::Fetch(git_repository * repo, git_oid rev) +{ + this->repo = repo; + this->rev = rev; + + const auto remoteUrl = lfs::getLfsEndpointUrl(repo); + + this->url = nix::parseURL(nix::fixGitURL(remoteUrl)).canonicalise(); +} + +bool Fetch::shouldFetch(const CanonPath & path) const +{ + const char * attr = nullptr; + git_attr_options opts = GIT_ATTR_OPTIONS_INIT; + opts.attr_commit_id = this->rev; + opts.flags = GIT_ATTR_CHECK_INCLUDE_COMMIT | GIT_ATTR_CHECK_NO_SYSTEM; + if (git_attr_get_ext(&attr, (git_repository *) (this->repo), &opts, path.rel_c_str(), "filter")) + throw Error("cannot get git-lfs attribute: %s", git_error_last()->message); + debug("Git filter for '%s' is '%s'", path, attr ? attr : "null"); + return attr != nullptr && !std::string(attr).compare("lfs"); +} + +static nlohmann::json pointerToPayload(const std::vector & items) +{ + nlohmann::json jArray = nlohmann::json::array(); + for (const auto & pointer : items) + jArray.push_back({{"oid", pointer.oid}, {"size", pointer.size}}); + return jArray; +} + +std::vector Fetch::fetchUrls(const std::vector & pointers) const +{ + ParsedURL httpUrl(url); + httpUrl.scheme = url.scheme == "ssh" ? "https" : url.scheme; + FileTransferRequest request(httpUrl.to_string() + "/info/lfs/objects/batch"); + request.post = true; + Headers headers; + if (this->url.scheme == "ssh") + headers.push_back({"Authorization", lfs::getLfsApiToken(this->url)}); + headers.push_back({"Content-Type", "application/vnd.git-lfs+json"}); + headers.push_back({"Accept", "application/vnd.git-lfs+json"}); + request.headers = headers; + nlohmann::json oidList = pointerToPayload(pointers); + nlohmann::json data = {{"operation", "download"}}; + data["objects"] = oidList; + request.data = data.dump(); + + FileTransferResult result = getFileTransfer()->upload(request); + auto responseString = result.data; + + std::vector objects; + // example resp here: + // {"objects":[{"oid":"f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","size":10000000,"actions":{"download":{"href":"https://gitlab.com/b-camacho/test-lfs.git/gitlab-lfs/objects/f5e02aa71e67f41d79023a128ca35bad86cf7b6656967bfe0884b3a3c4325eaf","header":{"Authorization":"Basic + // Yi1jYW1hY2hvOmV5SjBlWEFpT2lKS1YxUWlMQ0poYkdjaU9pSklVekkxTmlKOS5leUprWVhSaElqcDdJbUZqZEc5eUlqb2lZaTFqWVcxaFkyaHZJbjBzSW1wMGFTSTZJbUptTURZNFpXVTFMVEprWmpVdE5HWm1ZUzFpWWpRMExUSXpNVEV3WVRReU1qWmtaaUlzSW1saGRDSTZNVGN4TkRZeE16ZzBOU3dpYm1KbUlqb3hOekUwTmpFek9EUXdMQ0psZUhBaU9qRTNNVFEyTWpFd05EVjkuZk9yMDNkYjBWSTFXQzFZaTBKRmJUNnJTTHJPZlBwVW9lYllkT0NQZlJ4QQ=="}}},"authenticated":true}]} + + try { + auto resp = nlohmann::json::parse(responseString); + if (resp.contains("objects")) + objects.insert(objects.end(), resp["objects"].begin(), resp["objects"].end()); + else + throw Error("response does not contain 'objects'"); + + return objects; + } catch (const nlohmann::json::parse_error & e) { + printMsg(lvlTalkative, "Full response: '%1%'", responseString); + throw Error("response did not parse as json: %s", e.what()); + } +} + +void Fetch::fetch( + const std::string & content, + const CanonPath & pointerFilePath, + StringSink & sink, + std::function sizeCallback) const +{ + debug("trying to fetch '%s' using git-lfs", pointerFilePath); + + if (content.length() >= 1024) { + warn("encountered file '%s' that should have been a git-lfs pointer, but is too large", pointerFilePath); + sizeCallback(content.length()); + sink(content); + return; + } + + const auto pointer = parseLfsPointer(content, pointerFilePath.rel()); + if (pointer == std::nullopt) { + warn("encountered file '%s' that should have been a git-lfs pointer, but is invalid", pointerFilePath); + sizeCallback(content.length()); + sink(content); + return; + } + + Path cacheDir = getCacheDir() + "/git-lfs"; + std::string key = hashString(HashAlgorithm::SHA256, pointerFilePath.rel()).to_string(HashFormat::Base16, false) + + "/" + pointer->oid; + Path cachePath = cacheDir + "/" + key; + if (pathExists(cachePath)) { + debug("using cache entry %s -> %s", key, cachePath); + sink(readFile(cachePath)); + return; + } + debug("did not find cache entry for %s", key); + + std::vector pointers; + pointers.push_back(pointer.value()); + const auto objUrls = fetchUrls(pointers); + + const auto obj = objUrls[0]; + try { + std::string sha256 = obj.at("oid"); // oid is also the sha256 + std::string ourl = obj.at("actions").at("download").at("href"); + std::string authHeader = ""; + if (obj.at("actions").at("download").contains("header") + && obj.at("actions").at("download").at("header").contains("Authorization")) { + authHeader = obj["actions"]["download"]["header"]["Authorization"]; + } + const uint64_t size = obj.at("size"); + sizeCallback(size); + downloadToSink(ourl, authHeader, sink, sha256, size); + + debug("creating cache entry %s -> %s", key, cachePath); + if (!pathExists(dirOf(cachePath))) + createDirs(dirOf(cachePath)); + writeFile(cachePath, sink.s); + + debug("%s fetched with git-lfs", pointerFilePath); + } catch (const nlohmann::json::out_of_range & e) { + throw Error("bad json from /info/lfs/objects/batch: %s %s", obj, e.what()); + } +} + +} // namespace nix::lfs diff --git a/src/libfetchers/git-lfs-fetch.hh b/src/libfetchers/git-lfs-fetch.hh new file mode 100644 index 000000000..36df91962 --- /dev/null +++ b/src/libfetchers/git-lfs-fetch.hh @@ -0,0 +1,43 @@ +#include "canon-path.hh" +#include "serialise.hh" +#include "url.hh" + +#include + +#include + +namespace nix::lfs { + +/** + * git-lfs pointer + * @see https://github.com/git-lfs/git-lfs/blob/2ef4108/docs/spec.md + */ +struct Pointer +{ + std::string oid; // git-lfs managed object id. you give this to the lfs server + // for downloads + size_t size; // in bytes +}; + +struct Fetch +{ + // Reference to the repository + const git_repository * repo; + + // Git commit being fetched + git_oid rev; + + // derived from git remote url + nix::ParsedURL url; + + Fetch(git_repository * repo, git_oid rev); + bool shouldFetch(const CanonPath & path) const; + void fetch( + const std::string & content, + const CanonPath & pointerFilePath, + StringSink & sink, + std::function sizeCallback) const; + std::vector fetchUrls(const std::vector & pointers) const; +}; + +} // namespace nix::lfs diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index a6b13fb31..a2761a543 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -1,4 +1,5 @@ #include "git-utils.hh" +#include "git-lfs-fetch.hh" #include "cache.hh" #include "finally.hh" #include "processes.hh" @@ -60,14 +61,6 @@ namespace nix { struct GitSourceAccessor; -// Some wrapper types that ensure that the git_*_free functions get called. -template -struct Deleter -{ - template - void operator()(T * p) const { del(p); }; -}; - typedef std::unique_ptr> Repository; typedef std::unique_ptr> TreeEntry; typedef std::unique_ptr> Tree; @@ -85,20 +78,6 @@ typedef std::unique_ptr> ObjectDb; typedef std::unique_ptr> PackBuilder; typedef std::unique_ptr> Indexer; -// A helper to ensure that we don't leak objects returned by libgit2. -template -struct Setter -{ - T & t; - typename T::pointer p = nullptr; - - Setter(T & t) : t(t) { } - - ~Setter() { if (p) t = T(p); } - - operator typename T::pointer * () { return &p; } -}; - Hash toHash(const git_oid & oid) { #ifdef GIT_EXPERIMENTAL_SHA256 @@ -506,12 +485,15 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this /** * A 'GitSourceAccessor' with no regard for export-ignore or any other transformations. */ - ref getRawAccessor(const Hash & rev); + ref getRawAccessor( + const Hash & rev, + bool smudgeLfs = false); ref getAccessor( const Hash & rev, bool exportIgnore, - std::string displayPrefix) override; + std::string displayPrefix, + bool smudgeLfs = false) override; ref getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError e) override; @@ -670,24 +652,40 @@ ref GitRepo::openRepo(const std::filesystem::path & path, bool create, /** * Raw git tree input accessor. */ + struct GitSourceAccessor : SourceAccessor { ref repo; Object root; + std::optional lfsFetch = std::nullopt; - GitSourceAccessor(ref repo_, const Hash & rev) + GitSourceAccessor(ref repo_, const Hash & rev, bool smudgeLfs) : repo(repo_) , root(peelToTreeOrBlob(lookupObject(*repo, hashToOID(rev)).get())) { + if (smudgeLfs) + lfsFetch = std::make_optional(lfs::Fetch(*repo, hashToOID(rev))); } std::string readBlob(const CanonPath & path, bool symlink) { - auto blob = getBlob(path, symlink); + const auto blob = getBlob(path, symlink); - auto data = std::string_view((const char *) git_blob_rawcontent(blob.get()), git_blob_rawsize(blob.get())); + if (lfsFetch) { + if (lfsFetch->shouldFetch(path)) { + StringSink s; + try { + auto contents = std::string((const char *) git_blob_rawcontent(blob.get()), git_blob_rawsize(blob.get())); + lfsFetch->fetch(contents, path, s, [&s](uint64_t size){ s.s.reserve(size); }); + } catch (Error & e) { + e.addTrace({}, "while smudging git-lfs file '%s'", path); + throw; + } + return s.s; + } + } - return std::string(data); + return std::string((const char *) git_blob_rawcontent(blob.get()), git_blob_rawsize(blob.get())); } std::string readFile(const CanonPath & path) override @@ -1191,19 +1189,22 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink } }; -ref GitRepoImpl::getRawAccessor(const Hash & rev) +ref GitRepoImpl::getRawAccessor( + const Hash & rev, + bool smudgeLfs) { auto self = ref(shared_from_this()); - return make_ref(self, rev); + return make_ref(self, rev, smudgeLfs); } ref GitRepoImpl::getAccessor( const Hash & rev, bool exportIgnore, - std::string displayPrefix) + std::string displayPrefix, + bool smudgeLfs) { auto self = ref(shared_from_this()); - ref rawGitAccessor = getRawAccessor(rev); + ref rawGitAccessor = getRawAccessor(rev, smudgeLfs); rawGitAccessor->setPathDisplay(std::move(displayPrefix)); if (exportIgnore) return make_ref(self, rawGitAccessor, rev); diff --git a/src/libfetchers/git-utils.hh b/src/libfetchers/git-utils.hh index 9677f5079..c683bd058 100644 --- a/src/libfetchers/git-utils.hh +++ b/src/libfetchers/git-utils.hh @@ -89,7 +89,8 @@ struct GitRepo virtual ref getAccessor( const Hash & rev, bool exportIgnore, - std::string displayPrefix) = 0; + std::string displayPrefix, + bool smudgeLfs = false) = 0; virtual ref getAccessor(const WorkdirInfo & wd, bool exportIgnore, MakeNotAllowedError makeNotAllowedError) = 0; @@ -126,4 +127,26 @@ struct GitRepo ref getTarballCache(); +// A helper to ensure that the `git_*_free` functions get called. +template +struct Deleter +{ + template + void operator()(T * p) const { del(p); }; +}; + +// A helper to ensure that we don't leak objects returned by libgit2. +template +struct Setter +{ + T & t; + typename T::pointer p = nullptr; + + Setter(T & t) : t(t) { } + + ~Setter() { if (p) t = T(p); } + + operator typename T::pointer * () { return &p; } +}; + } diff --git a/src/libfetchers/git.cc b/src/libfetchers/git.cc index 0d423a7a3..f7c4e6d5b 100644 --- a/src/libfetchers/git.cc +++ b/src/libfetchers/git.cc @@ -185,7 +185,7 @@ struct GitInputScheme : InputScheme for (auto & [name, value] : url.query) { if (name == "rev" || name == "ref" || name == "keytype" || name == "publicKey" || name == "publicKeys") attrs.emplace(name, value); - else if (name == "shallow" || name == "submodules" || name == "exportIgnore" || name == "allRefs" || name == "verifyCommit") + else if (name == "shallow" || name == "submodules" || name == "lfs" || name == "exportIgnore" || name == "allRefs" || name == "verifyCommit") attrs.emplace(name, Explicit { value == "1" }); else url2.query.emplace(name, value); @@ -210,6 +210,7 @@ struct GitInputScheme : InputScheme "rev", "shallow", "submodules", + "lfs", "exportIgnore", "lastModified", "revCount", @@ -262,6 +263,8 @@ struct GitInputScheme : InputScheme if (auto ref = input.getRef()) url.query.insert_or_assign("ref", *ref); if (getShallowAttr(input)) url.query.insert_or_assign("shallow", "1"); + if (getLfsAttr(input)) + url.query.insert_or_assign("lfs", "1"); if (getSubmodulesAttr(input)) url.query.insert_or_assign("submodules", "1"); if (maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false)) @@ -411,6 +414,11 @@ struct GitInputScheme : InputScheme return maybeGetBoolAttr(input.attrs, "submodules").value_or(false); } + bool getLfsAttr(const Input & input) const + { + return maybeGetBoolAttr(input.attrs, "lfs").value_or(false); + } + bool getExportIgnoreAttr(const Input & input) const { return maybeGetBoolAttr(input.attrs, "exportIgnore").value_or(false); @@ -678,7 +686,8 @@ struct GitInputScheme : InputScheme verifyCommit(input, repo); bool exportIgnore = getExportIgnoreAttr(input); - auto accessor = repo->getAccessor(rev, exportIgnore, "«" + input.to_string() + "»"); + bool smudgeLfs = getLfsAttr(input); + auto accessor = repo->getAccessor(rev, exportIgnore, "«" + input.to_string() + "»", smudgeLfs); /* If the repo has submodules, fetch them and return a mounted input accessor consisting of the accessor for the top-level @@ -698,6 +707,7 @@ struct GitInputScheme : InputScheme attrs.insert_or_assign("rev", submoduleRev.gitRev()); attrs.insert_or_assign("exportIgnore", Explicit{ exportIgnore }); attrs.insert_or_assign("submodules", Explicit{ true }); + attrs.insert_or_assign("lfs", Explicit{ smudgeLfs }); attrs.insert_or_assign("allRefs", Explicit{ true }); auto submoduleInput = fetchers::Input::fromAttrs(*input.settings, std::move(attrs)); auto [submoduleAccessor, submoduleInput2] = @@ -838,7 +848,7 @@ struct GitInputScheme : InputScheme { auto makeFingerprint = [&](const Hash & rev) { - return rev.gitRev() + (getSubmodulesAttr(input) ? ";s" : "") + (getExportIgnoreAttr(input) ? ";e" : ""); + return rev.gitRev() + (getSubmodulesAttr(input) ? ";s" : "") + (getExportIgnoreAttr(input) ? ";e" : "") + (getLfsAttr(input) ? ";l" : ""); }; if (auto rev = input.getRev()) diff --git a/src/libfetchers/meson.build b/src/libfetchers/meson.build index 58afbb7d0..ac69ab8dc 100644 --- a/src/libfetchers/meson.build +++ b/src/libfetchers/meson.build @@ -48,6 +48,7 @@ sources = files( 'fetch-to-store.cc', 'fetchers.cc', 'filtering-source-accessor.cc', + 'git-lfs-fetch.cc', 'git-utils.cc', 'git.cc', 'github.cc', @@ -69,6 +70,7 @@ headers = files( 'fetch-to-store.hh', 'fetchers.hh', 'filtering-source-accessor.hh', + 'git-lfs-fetch.hh', 'git-utils.hh', 'mounted-source-accessor.hh', 'registry.hh', diff --git a/src/libflake/flake/flake.cc b/src/libflake/flake/flake.cc index a0ba404cd..d3ce3762e 100644 --- a/src/libflake/flake/flake.cc +++ b/src/libflake/flake/flake.cc @@ -647,12 +647,13 @@ LockedFlake lockFlake( /* Get the input flake, resolve 'path:./...' flakerefs relative to the parent flake. */ - auto getInputFlake = [&]() + auto getInputFlake = [&](const FlakeRef & ref) { - if (auto resolvedPath = resolveRelativePath()) - return readFlake(state, *input.ref, *input.ref, *input.ref, *resolvedPath, inputAttrPath); - else - return getFlake(state, *input.ref, useRegistries, flakeCache, inputAttrPath, inputCopyMode); + if (auto resolvedPath = resolveRelativePath()) { + return readFlake(state, ref, ref, ref, *resolvedPath, inputAttrPath); + } else { + return getFlake(state, ref, useRegistries, flakeCache, inputAttrPath, inputCopyMode); + } }; /* Do we have an entry in the existing lock file? @@ -732,7 +733,7 @@ LockedFlake lockFlake( } if (mustRefetch) { - auto inputFlake = getInputFlake(); + auto inputFlake = getInputFlake(oldLock->lockedRef); nodePaths.emplace(childNode, inputFlake.path.parent()); computeLocks(inputFlake.inputs, childNode, inputAttrPath, oldLock, followsPrefix, inputFlake.path, false); @@ -760,7 +761,7 @@ LockedFlake lockFlake( auto ref = (input2.ref && explicitCliOverrides.contains(inputAttrPath)) ? *input2.ref : *input.ref; if (input.isFlake) { - auto inputFlake = getInputFlake(); + auto inputFlake = getInputFlake(*input.ref); auto childNode = make_ref( inputFlake.lockedRef, diff --git a/src/libstore/build/derivation-goal.cc b/src/libstore/build/derivation-goal.cc index d09da1f55..a167d9261 100644 --- a/src/libstore/build/derivation-goal.cc +++ b/src/libstore/build/derivation-goal.cc @@ -185,41 +185,44 @@ Goal::Co DerivationGoal::haveDerivation() if (!drv->type().hasKnownOutputPaths()) experimentalFeatureSettings.require(Xp::CaDerivations); - if (drv->type().isImpure()) { - experimentalFeatureSettings.require(Xp::ImpureDerivations); - - for (auto & [outputName, output] : drv->outputs) { - auto randomPath = StorePath::random(outputPathName(drv->name, outputName)); - assert(!worker.store.isValidPath(randomPath)); - initialOutputs.insert({ - outputName, - InitialOutput { - .wanted = true, - .outputHash = impureOutputHash, - .known = InitialOutputStatus { - .path = randomPath, - .status = PathStatus::Absent - } - } - }); - } - - co_return gaveUpOnSubstitution(); - } - for (auto & i : drv->outputsAndOptPaths(worker.store)) if (i.second.second) worker.store.addTempRoot(*i.second.second); - auto outputHashes = staticOutputHashes(worker.evalStore, *drv); - for (auto & [outputName, outputHash] : outputHashes) - initialOutputs.insert({ - outputName, - InitialOutput { + { + bool impure = drv->type().isImpure(); + + if (impure) experimentalFeatureSettings.require(Xp::ImpureDerivations); + + auto outputHashes = staticOutputHashes(worker.evalStore, *drv); + for (auto & [outputName, outputHash] : outputHashes) { + InitialOutput v{ .wanted = true, // Will be refined later .outputHash = outputHash + }; + + /* TODO we might want to also allow randomizing the paths + for regular CA derivations, e.g. for sake of checking + determinism. */ + if (impure) { + v.known = InitialOutputStatus { + .path = StorePath::random(outputPathName(drv->name, outputName)), + .status = PathStatus::Absent, + }; } - }); + + initialOutputs.insert({ + outputName, + std::move(v), + }); + } + + if (impure) { + /* We don't yet have any safe way to cache an impure derivation at + this step. */ + co_return gaveUpOnSubstitution(); + } + } { /* Check what outputs paths are not already valid. */ diff --git a/src/libstore/derivations.cc b/src/libstore/derivations.cc index 5d01c577c..b54838a0a 100644 --- a/src/libstore/derivations.cc +++ b/src/libstore/derivations.cc @@ -843,16 +843,6 @@ DrvHash hashDerivationModulo(Store & store, const Derivation & drv, bool maskOut }; } - if (type.isImpure()) { - std::map outputHashes; - for (const auto & [outputName, _] : drv.outputs) - outputHashes.insert_or_assign(outputName, impureOutputHash); - return DrvHash { - .hashes = outputHashes, - .kind = DrvHash::Kind::Deferred, - }; - } - auto kind = std::visit(overloaded { [](const DerivationType::InputAddressed & ia) { /* This might be a "pesimistically" deferred output, so we don't @@ -865,7 +855,7 @@ DrvHash hashDerivationModulo(Store & store, const Derivation & drv, bool maskOut : DrvHash::Kind::Deferred; }, [](const DerivationType::Impure &) -> DrvHash::Kind { - assert(false); + return DrvHash::Kind::Deferred; } }, drv.type().raw); diff --git a/src/libstore/derivations.hh b/src/libstore/derivations.hh index 7856aa9b9..5b2101ed5 100644 --- a/src/libstore/derivations.hh +++ b/src/libstore/derivations.hh @@ -526,6 +526,4 @@ void writeDerivation(Sink & out, const StoreDirConfig & store, const BasicDeriva */ std::string hashPlaceholder(const OutputNameView outputName); -extern const Hash impureOutputHash; - } diff --git a/src/libstore/filetransfer.cc b/src/libstore/filetransfer.cc index 8439cc39c..f2430631d 100644 --- a/src/libstore/filetransfer.cc +++ b/src/libstore/filetransfer.cc @@ -94,7 +94,7 @@ struct curlFileTransfer : public FileTransfer : fileTransfer(fileTransfer) , request(request) , act(*logger, lvlTalkative, actFileTransfer, - fmt(request.data ? "uploading '%s'" : "downloading '%s'", request.uri), + request.post ? "" : fmt(request.data ? "uploading '%s'" : "downloading '%s'", request.uri), {request.uri}, request.parentAct) , callback(std::move(callback)) , finalSink([this](std::string_view data) { @@ -271,11 +271,21 @@ struct curlFileTransfer : public FileTransfer return getInterrupted(); } + int silentProgressCallback(double dltotal, double dlnow) + { + return getInterrupted(); + } + static int progressCallbackWrapper(void * userp, double dltotal, double dlnow, double ultotal, double ulnow) { return ((TransferItem *) userp)->progressCallback(dltotal, dlnow); } + static int silentProgressCallbackWrapper(void * userp, double dltotal, double dlnow, double ultotal, double ulnow) + { + return ((TransferItem *) userp)->silentProgressCallback(dltotal, dlnow); + } + static int debugCallback(CURL * handle, curl_infotype type, char * data, size_t size, void * userptr) { if (type == CURLINFO_TEXT) @@ -300,6 +310,14 @@ struct curlFileTransfer : public FileTransfer return ((TransferItem *) userp)->readCallback(buffer, size, nitems); } + #if !defined(_WIN32) && LIBCURL_VERSION_NUM >= 0x071000 + static int cloexec_callback(void *, curl_socket_t curlfd, curlsocktype purpose) { + unix::closeOnExec(curlfd); + vomit("cloexec set for fd %i", curlfd); + return CURL_SOCKOPT_OK; + } + #endif + void init() { if (!req) req = curl_easy_init(); @@ -332,8 +350,11 @@ struct curlFileTransfer : public FileTransfer curl_easy_setopt(req, CURLOPT_HEADERFUNCTION, TransferItem::headerCallbackWrapper); curl_easy_setopt(req, CURLOPT_HEADERDATA, this); - curl_easy_setopt(req, CURLOPT_PROGRESSFUNCTION, progressCallbackWrapper); - curl_easy_setopt(req, CURLOPT_PROGRESSDATA, this); + if (request.post) + curl_easy_setopt(req, CURLOPT_XFERINFOFUNCTION, silentProgressCallbackWrapper); + else + curl_easy_setopt(req, CURLOPT_XFERINFOFUNCTION, progressCallbackWrapper); + curl_easy_setopt(req, CURLOPT_XFERINFODATA, this); curl_easy_setopt(req, CURLOPT_NOPROGRESS, 0); curl_easy_setopt(req, CURLOPT_HTTPHEADER, requestHeaders); @@ -345,7 +366,10 @@ struct curlFileTransfer : public FileTransfer curl_easy_setopt(req, CURLOPT_NOBODY, 1); if (request.data) { - curl_easy_setopt(req, CURLOPT_UPLOAD, 1L); + if (request.post) + curl_easy_setopt(req, CURLOPT_POST, 1L); + else + curl_easy_setopt(req, CURLOPT_UPLOAD, 1L); curl_easy_setopt(req, CURLOPT_READFUNCTION, readCallbackWrapper); curl_easy_setopt(req, CURLOPT_READDATA, this); curl_easy_setopt(req, CURLOPT_INFILESIZE_LARGE, (curl_off_t) request.data->length()); @@ -359,6 +383,10 @@ struct curlFileTransfer : public FileTransfer curl_easy_setopt(req, CURLOPT_SSL_VERIFYHOST, 0); } + #if !defined(_WIN32) && LIBCURL_VERSION_NUM >= 0x071000 + curl_easy_setopt(req, CURLOPT_SOCKOPTFUNCTION, cloexec_callback); + #endif + curl_easy_setopt(req, CURLOPT_CONNECTTIMEOUT, fileTransferSettings.connectTimeout.get()); curl_easy_setopt(req, CURLOPT_LOW_SPEED_LIMIT, 1L); @@ -418,7 +446,8 @@ struct curlFileTransfer : public FileTransfer if (httpStatus == 304 && result.etag == "") result.etag = request.expectedETag; - act.progress(result.bodySize, result.bodySize); + if (!request.post) + act.progress(result.bodySize, result.bodySize); done = true; callback(std::move(result)); } diff --git a/src/libstore/filetransfer.hh b/src/libstore/filetransfer.hh index 43a384d71..0ecc7f376 100644 --- a/src/libstore/filetransfer.hh +++ b/src/libstore/filetransfer.hh @@ -65,6 +65,7 @@ struct FileTransferRequest std::string expectedETag; bool verifyTLS = true; bool head = false; + bool post = false; size_t tries = fileTransferSettings.tries; unsigned int baseRetryTimeMs = 250; ActivityId parentAct; diff --git a/src/libutil/serialise.cc b/src/libutil/serialise.cc index 381e7ae38..d612c11b2 100644 --- a/src/libutil/serialise.cc +++ b/src/libutil/serialise.cc @@ -227,8 +227,7 @@ std::unique_ptr sourceToSink(std::function fun) throw EndOfFile("coroutine has finished"); } - size_t n = std::min(cur.size(), out_len); - memcpy(out, cur.data(), n); + size_t n = cur.copy(out, out_len); cur.remove_prefix(n); return n; }); @@ -260,7 +259,7 @@ std::unique_ptr sinkToSource( { struct SinkToSource : Source { - typedef boost::coroutines2::coroutine coro_t; + typedef boost::coroutines2::coroutine coro_t; std::function fun; std::function eof; @@ -271,33 +270,37 @@ std::unique_ptr sinkToSource( { } - std::string cur; - size_t pos = 0; + std::string_view cur; size_t read(char * data, size_t len) override { - if (!coro) { + bool hasCoro = coro.has_value(); + if (!hasCoro) { coro = coro_t::pull_type([&](coro_t::push_type & yield) { LambdaSink sink([&](std::string_view data) { - if (!data.empty()) yield(std::string(data)); + if (!data.empty()) { + yield(data); + } }); fun(sink); }); } - if (!*coro) { eof(); unreachable(); } - - if (pos == cur.size()) { - if (!cur.empty()) { + if (cur.empty()) { + if (hasCoro) { (*coro)(); } - cur = coro->get(); - pos = 0; + if (*coro) { + cur = coro->get(); + } else { + coro.reset(); + eof(); + unreachable(); + } } - auto n = std::min(cur.size() - pos, len); - memcpy(data, cur.data() + pos, n); - pos += n; + size_t n = cur.copy(data, len); + cur.remove_prefix(n); return n; } diff --git a/src/nix/flake-prefetch.md b/src/nix/flake-prefetch.md index a1cf0289a..4666aadc4 100644 --- a/src/nix/flake-prefetch.md +++ b/src/nix/flake-prefetch.md @@ -5,10 +5,14 @@ R""( * Download a tarball and unpack it: ```console - # nix flake prefetch https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.5.tar.xz + # nix flake prefetch https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.5.tar.xz --out-link ./result Downloaded 'https://cdn.kernel.org/pub/linux/kernel/v5.x/linux-5.10.5.tar.xz?narHash=sha256-3XYHZANT6AFBV0BqegkAZHbba6oeDkIUCDwbATLMhAY=' to '/nix/store/sl5vvk8mb4ma1sjyy03kwpvkz50hd22d-source' (hash 'sha256-3XYHZANT6AFBV0BqegkAZHbba6oeDkIUCDwbATLMhAY='). + + # cat ./result/README + Linux kernel + … ``` * Download the `dwarffs` flake (looked up in the flake registry): diff --git a/src/nix/flake.cc b/src/nix/flake.cc index 37df51f37..db07c82a3 100644 --- a/src/nix/flake.cc +++ b/src/nix/flake.cc @@ -18,6 +18,7 @@ #include "markdown.hh" #include "users.hh" #include "fetch-to-store.hh" +#include "local-fs-store.hh" #include #include @@ -1436,8 +1437,18 @@ struct CmdFlakeShow : FlakeCommand, MixJSON struct CmdFlakePrefetch : FlakeCommand, MixJSON { + std::optional outLink; + CmdFlakePrefetch() { + addFlag({ + .longName = "out-link", + .shortName = 'o', + .description = "Create symlink named *path* to the resulting store path.", + .labels = {"path"}, + .handler = {&outLink}, + .completer = completePath + }); } std::string description() override @@ -1473,6 +1484,13 @@ struct CmdFlakePrefetch : FlakeCommand, MixJSON store->printStorePath(storePath), hash.to_string(HashFormat::SRI, true)); } + + if (outLink) { + if (auto store2 = store.dynamic_pointer_cast()) + createOutLinks(*outLink, {BuiltPath::Opaque{storePath}}, *store2); + else + throw Error("'--out-link' is not supported for this Nix store"); + } } }; diff --git a/tests/functional/meson.build b/tests/functional/meson.build index dee003e42..3342ee870 100644 --- a/tests/functional/meson.build +++ b/tests/functional/meson.build @@ -164,6 +164,7 @@ suites = [ 'debugger.sh', 'extra-sandbox-profile.sh', 'help.sh', + 'symlinks.sh', ], 'workdir': meson.current_source_dir(), }, diff --git a/tests/functional/simple.sh b/tests/functional/simple.sh index 8afa369c2..c1f2eef41 100755 --- a/tests/functional/simple.sh +++ b/tests/functional/simple.sh @@ -15,7 +15,7 @@ echo "output path is $outPath" [[ ! -w $outPath ]] text=$(cat "$outPath/hello") -if test "$text" != "Hello World!"; then exit 1; fi +[[ "$text" = "Hello World!" ]] TODO_NixOS diff --git a/tests/functional/symlinks.sh b/tests/functional/symlinks.sh new file mode 100644 index 000000000..5eb22b3f9 --- /dev/null +++ b/tests/functional/symlinks.sh @@ -0,0 +1,16 @@ +#!/usr/bin/env bash + +source common.sh + +# Check that when we have a derivation attribute that refers to a +# symlink, we copy the symlink, not its target. +# shellcheck disable=SC2016 +nix build --impure --no-link --expr ' + with import ./config.nix; + + mkDerivation { + name = "simple"; + builder = builtins.toFile "builder.sh" "[[ -L \"$symlink\" ]]; mkdir $out"; + symlink = ./lang/symlink-resolution/foo/overlays; + } +' diff --git a/tests/functional/tarball.sh b/tests/functional/tarball.sh index 720b3688f..53807603c 100755 --- a/tests/functional/tarball.sh +++ b/tests/functional/tarball.sh @@ -73,13 +73,13 @@ test_tarball .gz gzip # All entries in tree.tar.gz refer to the same file, and all have the same inode when unpacked by GNU tar. # We don't preserve the hard links, because that's an optimization we think is not worth the complexity, # so we only make sure that the contents are copied correctly. -path="$(nix flake prefetch --json "tarball+file://$(pwd)/tree.tar.gz" | jq -r .storePath)" -[[ $(cat "$path/a/b/foo") = bar ]] -[[ $(cat "$path/a/b/xyzzy") = bar ]] -[[ $(cat "$path/a/yyy") = bar ]] -[[ $(cat "$path/a/zzz") = bar ]] -[[ $(cat "$path/c/aap") = bar ]] -[[ $(cat "$path/fnord") = bar ]] +nix flake prefetch --json "tarball+file://$(pwd)/tree.tar.gz" --out-link "$TEST_ROOT/result" +[[ $(cat "$TEST_ROOT/result/a/b/foo") = bar ]] +[[ $(cat "$TEST_ROOT/result/a/b/xyzzy") = bar ]] +[[ $(cat "$TEST_ROOT/result/a/yyy") = bar ]] +[[ $(cat "$TEST_ROOT/result/a/zzz") = bar ]] +[[ $(cat "$TEST_ROOT/result/c/aap") = bar ]] +[[ $(cat "$TEST_ROOT/result/fnord") = bar ]] # Test a tarball that has multiple top-level directories. rm -rf "$TEST_ROOT/tar_root" diff --git a/tests/nixos/fetch-git/test-cases/lfs/default.nix b/tests/nixos/fetch-git/test-cases/lfs/default.nix new file mode 100644 index 000000000..a6b4fc77a --- /dev/null +++ b/tests/nixos/fetch-git/test-cases/lfs/default.nix @@ -0,0 +1,197 @@ +{ + # mostly copied from https://github.com/NixOS/nix/blob/358c26fd13a902d9a4032a00e6683571be07a384/tests/nixos/fetch-git/test-cases/fetchTree-shallow/default.nix#L1 + # ty @DavHau + description = "fetchGit smudges LFS pointers if lfs=true"; + script = '' + from tempfile import TemporaryDirectory + + expected_max_size_lfs_pointer = 1024 # 1 KiB (values >= than this cannot be pointers, and test files are 1 MiB) + + # purge nix git cache to make sure we start with a clean slate + client.succeed("rm -rf ~/.cache/nix") + + + with subtest("Request lfs fetch without any .gitattributes file"): + client.succeed(f"dd if=/dev/urandom of={repo.path}/regular bs=1M count=1 >&2") + client.succeed(f"{repo.git} add : >&2") + client.succeed(f"{repo.git} commit -m 'no .gitattributes' >&2") + client.succeed(f"{repo.git} push origin main >&2") + + # memorize the revision + no_gitattributes_rev = client.succeed(f"{repo.git} rev-parse HEAD").strip() + + # fetch with lfs=true, and check that the lack of .gitattributes does not break anything + fetchGit_no_gitattributes_expr = f""" + builtins.fetchGit {{ + url = "{repo.remote}"; + rev = "{no_gitattributes_rev}"; + ref = "main"; + lfs = true; + }} + """ + fetched_no_gitattributes = client.succeed(f""" + nix eval --debug --impure --raw --expr '({fetchGit_no_gitattributes_expr}).outPath' + """) + client.succeed(f"cmp {repo.path}/regular {fetched_no_gitattributes}/regular >&2") + + + with subtest("Add a file that should be tracked by lfs, but isn't"): + # (git lfs cli only throws a warning "Encountered 1 file that should have + # been a pointer, but wasn't") + + client.succeed(f"dd if=/dev/urandom of={repo.path}/black_sheep bs=1M count=1 >&2") + client.succeed(f"echo 'black_sheep filter=lfs -text' >>{repo.path}/.gitattributes") + client.succeed(f"{repo.git} add : >&2") + client.succeed(f"{repo.git} commit -m 'add misleading file' >&2") + client.succeed(f"{repo.git} push origin main >&2") + + # memorize the revision + bad_lfs_rev = client.succeed(f"{repo.git} rev-parse HEAD").strip() + + # test assumption that it can be cloned with regular git first + # (here we see the warning as stated above) + with TemporaryDirectory() as tempdir: + client.succeed(f"git clone -n {repo.remote} {tempdir} >&2") + client.succeed(f"git -C {tempdir} lfs install >&2") + client.succeed(f"git -C {tempdir} checkout {bad_lfs_rev} >&2") + + # check that the file is not a pointer, as expected + file_size_git = client.succeed(f"stat -c %s {tempdir}/black_sheep").strip() + assert int(file_size_git) == 1024 * 1024, \ + f"non lfs file is {file_size_git}b (!= 1MiB), probably a test implementation error" + + lfs_files = client.succeed(f"git -C {tempdir} lfs ls-files").strip() + assert lfs_files == "", "non lfs file is tracked by lfs, probably a test implementation error" + + client.succeed(f"cmp {repo.path}/black_sheep {tempdir}/black_sheep >&2") + + # now fetch without lfs, check that the file is not a pointer + fetchGit_bad_lfs_without_lfs_expr = f""" + builtins.fetchGit {{ + url = "{repo.remote}"; + rev = "{bad_lfs_rev}"; + ref = "main"; + lfs = false; + }} + """ + fetched_bad_lfs_without_lfs = client.succeed(f""" + nix eval --debug --impure --raw --expr '({fetchGit_bad_lfs_without_lfs_expr}).outPath' + """) + + # check that file was not somehow turned into a pointer + file_size_bad_lfs_without_lfs = client.succeed(f"stat -c %s {fetched_bad_lfs_without_lfs}/black_sheep").strip() + + assert int(file_size_bad_lfs_without_lfs) == 1024 * 1024, \ + f"non lfs-enrolled file is {file_size_bad_lfs_without_lfs}b (!= 1MiB), probably a test implementation error" + client.succeed(f"cmp {repo.path}/black_sheep {fetched_bad_lfs_without_lfs}/black_sheep >&2") + + # finally fetch with lfs=true, and check that the bad file does not break anything + fetchGit_bad_lfs_with_lfs_expr = f""" + builtins.fetchGit {{ + url = "{repo.remote}"; + rev = "{bad_lfs_rev}"; + ref = "main"; + lfs = true; + }} + """ + fetchGit_bad_lfs_with_lfs = client.succeed(f""" + nix eval --debug --impure --raw --expr '({fetchGit_bad_lfs_with_lfs_expr}).outPath' + """) + + client.succeed(f"cmp {repo.path}/black_sheep {fetchGit_bad_lfs_with_lfs}/black_sheep >&2") + + + with subtest("Add an lfs-enrolled file to the repo"): + client.succeed(f"dd if=/dev/urandom of={repo.path}/beeg bs=1M count=1 >&2") + client.succeed(f"{repo.git} lfs install >&2") + client.succeed(f"{repo.git} lfs track --filename beeg >&2") + client.succeed(f"{repo.git} add : >&2") + client.succeed(f"{repo.git} commit -m 'add lfs file' >&2") + client.succeed(f"{repo.git} push origin main >&2") + + # memorize the revision + lfs_file_rev = client.succeed(f"{repo.git} rev-parse HEAD").strip() + + # first fetch without lfs, check that we did not smudge the file + fetchGit_nolfs_expr = f""" + builtins.fetchGit {{ + url = "{repo.remote}"; + rev = "{lfs_file_rev}"; + ref = "main"; + lfs = false; + }} + """ + fetched_nolfs = client.succeed(f""" + nix eval --debug --impure --raw --expr '({fetchGit_nolfs_expr}).outPath' + """) + + # check that file was not smudged + file_size_nolfs = client.succeed(f"stat -c %s {fetched_nolfs}/beeg").strip() + + assert int(file_size_nolfs) < expected_max_size_lfs_pointer, \ + f"did not set lfs=true, yet lfs-enrolled file is {file_size_nolfs}b (>= 1KiB), probably smudged when we should not have" + + # now fetch with lfs=true and check that the file was smudged + fetchGit_lfs_expr = f""" + builtins.fetchGit {{ + url = "{repo.remote}"; + rev = "{lfs_file_rev}"; + ref = "main"; + lfs = true; + }} + """ + fetched_lfs = client.succeed(f""" + nix eval --debug --impure --raw --expr '({fetchGit_lfs_expr}).outPath' + """) + + assert fetched_lfs != fetched_nolfs, \ + f"fetching with and without lfs yielded the same store path {fetched_lfs}, fingerprinting error?" + + # check that file was smudged + file_size_lfs = client.succeed(f"stat -c %s {fetched_lfs}/beeg").strip() + assert int(file_size_lfs) == 1024 * 1024, \ + f"set lfs=true, yet lfs-enrolled file is {file_size_lfs}b (!= 1MiB), probably did not smudge when we should have" + + + with subtest("Check that default is lfs=false"): + fetchGit_default_expr = f""" + builtins.fetchGit {{ + url = "{repo.remote}"; + rev = "{lfs_file_rev}"; + ref = "main"; + }} + """ + fetched_default = client.succeed(f""" + nix eval --debug --impure --raw --expr '({fetchGit_default_expr}).outPath' + """) + + # check that file was not smudged + file_size_default = client.succeed(f"stat -c %s {fetched_default}/beeg").strip() + + assert int(file_size_default) < expected_max_size_lfs_pointer, \ + f"did not set lfs, yet lfs-enrolled file is {file_size_default}b (>= 1KiB), probably bad default value" + + with subtest("Use as flake input"): + # May seem reduntant, but this has minor differences compared to raw + # fetchGit which caused failures before + with TemporaryDirectory() as tempdir: + client.succeed(f"mkdir -p {tempdir}") + client.succeed(f""" + printf '{{ + inputs = {{ + foo = {{ + url = "git+{repo.remote}?ref=main&rev={lfs_file_rev}&lfs=1"; + flake = false; + }}; + }}; + outputs = {{ foo, self }}: {{ inherit (foo) outPath; }}; + }}' >{tempdir}/flake.nix + """) + fetched_flake = client.succeed(f""" + nix eval --debug --raw {tempdir}#.outPath + """) + + assert fetched_lfs == fetched_flake, \ + f"fetching as flake input (store path {fetched_flake}) yielded a different result than using fetchGit (store path {fetched_lfs})" + ''; +} diff --git a/tests/nixos/fetch-git/testsupport/gitea.nix b/tests/nixos/fetch-git/testsupport/gitea.nix index 9409acff7..e63182639 100644 --- a/tests/nixos/fetch-git/testsupport/gitea.nix +++ b/tests/nixos/fetch-git/testsupport/gitea.nix @@ -29,9 +29,16 @@ in { pkgs, ... }: { services.gitea.enable = true; - services.gitea.settings.service.DISABLE_REGISTRATION = true; - services.gitea.settings.log.LEVEL = "Info"; - services.gitea.settings.database.LOG_SQL = false; + services.gitea.lfs.enable = true; + services.gitea.settings = { + service.DISABLE_REGISTRATION = true; + server = { + DOMAIN = "gitea"; + HTTP_PORT = 3000; + }; + log.LEVEL = "Info"; + database.LOG_SQL = false; + }; services.openssh.enable = true; networking.firewall.allowedTCPPorts = [ 3000 ]; environment.systemPackages = [ @@ -54,7 +61,10 @@ in client = { pkgs, ... }: { - environment.systemPackages = [ pkgs.git ]; + environment.systemPackages = [ + pkgs.git + pkgs.git-lfs + ]; }; }; defaults =