diff --git a/src/libfetchers-tests/git-utils.cc b/src/libfetchers-tests/git-utils.cc index 6b3c1ca94..9164f7e5b 100644 --- a/src/libfetchers-tests/git-utils.cc +++ b/src/libfetchers-tests/git-utils.cc @@ -235,44 +235,6 @@ TEST_F(GitUtilsTest, gitUrlToSsh) } } -class FetchAttributeTest : public ::testing::Test -{ -protected: - void SetUp() override - { - // test literal (non-wildcard) matches too - std::string content1 = "litfile filter=lfs diff=lfs merge=lfs -text"; - auto rules1 = parseGitAttrFile(content1); - fetch1.rules = std::move(rules1); - - std::string content2 = "*.wildcard filter=lfs diff=lfs merge=lfs -text"; - auto rules2 = parseGitAttrFile(content2); - fetch2.rules = std::move(rules2); - } - - Fetch fetch1; - Fetch fetch2; -}; - -TEST_F(FetchAttributeTest, ExactMatch) -{ - EXPECT_TRUE(fetch1.hasAttribute("litfile", "filter", "lfs")); - EXPECT_FALSE(fetch1.hasAttribute("other", "filter", "lfs")); -} - -TEST_F(FetchAttributeTest, WildcardMatch) -{ - EXPECT_TRUE(fetch2.hasAttribute("match.wildcard", "filter", "lfs")); - EXPECT_FALSE(fetch2.hasAttribute("nomatch.otherext", "filter", "lfs")); - EXPECT_FALSE(fetch2.hasAttribute("nomatch.wildcard.extra", "filter", "lfs")); -} - -TEST_F(FetchAttributeTest, EmptyPath) -{ - EXPECT_FALSE(fetch1.hasAttribute("", "filter", "lfs")); - EXPECT_FALSE(fetch2.hasAttribute("", "filter", "lfs")); -} - } // namespace lfs } // namespace nix diff --git a/src/libfetchers/git-lfs-fetch.hh b/src/libfetchers/git-lfs-fetch.hh index 8f63f4af4..b85398030 100644 --- a/src/libfetchers/git-lfs-fetch.hh +++ b/src/libfetchers/git-lfs-fetch.hh @@ -21,51 +21,6 @@ namespace fs = std::filesystem; namespace nix { namespace lfs { -// see Fetch::rules -struct AttrRule -{ - std::string pattern; - std::unordered_map attributes; - git_pathspec * pathspec = nullptr; - - AttrRule() = default; - - explicit AttrRule(std::string pat) - : pattern(std::move(pat)) - { - initPathspec(); - } - - ~AttrRule() - { - if (pathspec) { - git_pathspec_free(pathspec); - } - } - - AttrRule(const AttrRule & other) - : pattern(other.pattern) - , attributes(other.attributes) - , pathspec(nullptr) - { - if (!pattern.empty()) { - initPathspec(); - } - } - - void initPathspec() - { - git_strarray patterns = {0}; - const char * pattern_str = pattern.c_str(); - patterns.strings = const_cast(&pattern_str); - patterns.count = 1; - - if (git_pathspec_new(&pathspec, &patterns) != 0) { - throw std::runtime_error("Failed to create git pathspec"); - } - } -}; - // git-lfs metadata about a file struct Md { @@ -105,8 +60,11 @@ struct GitUrl struct Fetch { - // only true after init() - bool ready = false; + // Reference to the repository + git_repository const * repo; + + // Git commit being fetched + git_oid rev; // from shelling out to ssh, used for 2 subsequent fetches: // list of URLs to fetch from, and fetching the data itself @@ -115,13 +73,8 @@ struct Fetch // derived from git remote url GitUrl gitUrl = GitUrl{}; - // parsed contents of .gitattributes - // .gitattributes contains a list of path patterns, and list of attributes (=key-value tags) for each pattern - // paths tagged with `filter=lfs` need to be smudged by downloading from lfs server - std::vector rules = {}; - - void init(git_repository * repo, const std::string & gitattributesContent); - bool hasAttribute(const std::string & path, const std::string & attrName, const std::string & attrValue) const; + Fetch(git_repository * repo, git_oid rev); + bool shouldFetch(const std::string & path) const; void fetch( const git_blob * pointerBlob, const std::string & pointerFilePath, @@ -240,22 +193,6 @@ std::string getLfsEndpointUrl(git_repository * repo) return std::string(url_c_str); } -std::string git_attr_value_to_string(git_attr_value_t value) -{ - switch (value) { - case GIT_ATTR_VALUE_UNSPECIFIED: - return "GIT_ATTR_VALUE_UNSPECIFIED"; - case GIT_ATTR_VALUE_TRUE: - return "GIT_ATTR_VALUE_TRUE"; - case GIT_ATTR_VALUE_FALSE: - return "GIT_ATTR_VALUE_FALSE"; - case GIT_ATTR_VALUE_STRING: - return "GIT_ATTR_VALUE_STRING"; - default: - return "Unknown value"; - } -} - std::optional parseLfsMetadata(const std::string & content, const std::string & filename) { // https://github.com/git-lfs/git-lfs/blob/2ef4108/docs/spec.md @@ -343,115 +280,29 @@ GitUrl parseGitUrl(const std::string & url) return result; } -std::vector parseGitAttrFile(const std::string & content) +Fetch::Fetch(git_repository * repo, git_oid rev) { - std::vector rules; - std::string content_str(content); - std::istringstream iss(content_str); - std::string line; + this->repo = repo; + this->rev = rev; - while (std::getline(iss, line)) { - if (line.empty() || line[0] == '#') - continue; - - if (!line.empty() && line.back() == '\r') - line.pop_back(); - - size_t pattern_end = line.find_first_of(" \t"); // matches space OR tab - if (pattern_end == std::string::npos) - continue; - - AttrRule rule; - rule.pattern = line.substr(0, pattern_end); - // Workaround for libgit2 matching issues when the pattern starts with a recursive glob - // These should effectively match the same files - // https://github.com/libgit2/libgit2/issues/6946 - if (rule.pattern.starts_with("/**/")) { - rule.pattern = rule.pattern.substr(4); - } - while (rule.pattern.starts_with("**/")) { - rule.pattern = rule.pattern.substr(3); - } - - git_strarray patterns = {0}; - const char * pattern_str = rule.pattern.c_str(); - patterns.strings = const_cast(&pattern_str); - patterns.count = 1; - - if (git_pathspec_new(&rule.pathspec, &patterns) != 0) { - auto error = git_error_last(); - std::stringstream ss; - ss << "git_pathspec_new parsing '" << line << "': " << (error ? error->message : "unknown error") - << std::endl; - warn(ss.str()); - continue; - } - - size_t attr_start = line.find_first_not_of(" \t", pattern_end); - if (attr_start != std::string::npos) { - std::string_view rest(line); - rest.remove_prefix(attr_start); - - while (!rest.empty()) { - size_t attr_end = rest.find_first_of(" \t"); - std::string_view attr = rest.substr(0, attr_end); - - if (attr[0] == '-') { - rule.attributes[std::string(attr.substr(1))] = "false"; - } else if (auto equals_pos = attr.find('='); equals_pos != std::string_view::npos) { - auto key = attr.substr(0, equals_pos); - auto value = attr.substr(equals_pos + 1); - rule.attributes[std::string(key)] = std::string(value); - } else { - rule.attributes[std::string(attr)] = "true"; - } - - if (attr_end == std::string_view::npos) - break; - - rest = rest.substr(attr_end); - size_t next_attr = rest.find_first_not_of(" \t"); - if (next_attr == std::string_view::npos) - break; - rest = rest.substr(next_attr); - } - } - - rules.push_back(std::move(rule)); - } - - return rules; -} - -void Fetch::init(git_repository * repo, const std::string & gitattributesContent) -{ const auto remoteUrl = lfs::getLfsEndpointUrl(repo); this->gitUrl = parseGitUrl(remoteUrl); if (this->gitUrl.protocol == "ssh") { this->token = lfs::getLfsApiToken(this->gitUrl); } - this->rules = lfs::parseGitAttrFile(gitattributesContent); - this->ready = true; } -bool Fetch::hasAttribute(const std::string & path, const std::string & attrName, const std::string & attrValue) const +bool Fetch::shouldFetch(const std::string & path) const { - for (auto it = rules.rbegin(); it != rules.rend(); ++it) { - int match = git_pathspec_matches_path( - it->pathspec, - 0, // no flags - path.c_str()); - - if (match > 0) { - auto attr = it->attributes.find(attrName); - if (attr != it->attributes.end()) { - return attr->second == attrValue; - } else { - } - } - } - return false; + const char * attr = nullptr; + git_attr_options opts = GIT_ATTR_OPTIONS_INIT; + opts.attr_commit_id = this->rev; + opts.flags = GIT_ATTR_CHECK_INCLUDE_COMMIT | GIT_ATTR_CHECK_NO_SYSTEM; + if (git_attr_get_ext(&attr, (git_repository *) (this->repo), &opts, path.c_str(), "filter")) + throw Error("cannot get git-lfs attribute: %s", git_error_last()->message); + debug("Git filter for %s is %s", path, attr ? attr : "null"); + return attr != nullptr && !std::string(attr).compare("lfs"); } nlohmann::json mdToPayload(const std::vector & items) @@ -532,6 +383,7 @@ void Fetch::fetch( Sink & sink, std::function sizeCallback) const { + debug("Trying to fetch %s using git-lfs", pointerFilePath); constexpr git_object_size_t chunkSize = 128 * 1024; // 128 KiB auto pointerSize = git_blob_rawsize(pointerBlob); @@ -575,6 +427,7 @@ void Fetch::fetch( const uint64_t size = obj.at("size"); sizeCallback(size); downloadToSink(ourl, authHeader, sink, oid); // oid is also the sha256 + debug("%s fetched with git-lfs", pointerFilePath); } catch (const nlohmann::json::out_of_range & e) { std::stringstream ss; ss << "bad json from /info/lfs/objects/batch: " << obj << " " << e.what(); diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 80da391de..8aac2361c 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -669,31 +669,24 @@ struct GitSourceAccessor : SourceAccessor { ref repo; Object root; - std::optional lfsFetch; + std::optional lfsFetch = std::nullopt; - GitSourceAccessor(ref repo_, const Hash & rev, std::optional lfsFetch) + GitSourceAccessor(ref repo_, const Hash & rev, bool smudgeLfs) : repo(repo_) , root(peelToTreeOrBlob(lookupObject(*repo, hashToOID(rev)).get())) - , lfsFetch(lfsFetch) { - if (lfsFetch && !lookup(CanonPath(".gitattributes"))) { - warn("Requested to fetch lfs files, but no .gitattributes file was found, ignoring"); - } + if (smudgeLfs) + lfsFetch = std::make_optional(lfs::Fetch(*repo, hashToOID(rev))); } std::string readBlob(const CanonPath & path, bool symlink) { const auto blob = getBlob(path, symlink); - if (lfsFetch && path != CanonPath(".gitattributes") && lookup(CanonPath(".gitattributes"))) { + if (lfsFetch) { auto& _lfsFetch = *lfsFetch; - if (!_lfsFetch.ready) { - const auto contents = readFile(CanonPath(".gitattributes")); - _lfsFetch.init(*repo, contents); - } - auto pathStr = std::string(path.rel()); - if (_lfsFetch.hasAttribute(pathStr, "filter", "lfs")) { + if (_lfsFetch.shouldFetch(pathStr)) { StringSink s; try { _lfsFetch.fetch(blob.get(), pathStr, s, [&s](uint64_t size){ s.s.reserve(size); }); @@ -714,15 +707,10 @@ struct GitSourceAccessor : SourceAccessor std::function sizeCallback = [](uint64_t size){}) override { auto blob = getBlob(path, false); - if (lfsFetch && path != CanonPath(".gitattributes") && lookup(CanonPath(".gitattributes"))) { + if (lfsFetch) { auto& _lfsFetch = *lfsFetch; - if (!_lfsFetch.ready) { - const auto contents = readFile(CanonPath(".gitattributes")); - _lfsFetch.init(*repo, contents); - } - auto pathStr = std::string(path.rel()); - if (_lfsFetch.hasAttribute(pathStr, "filter", "lfs")) { + if (_lfsFetch.shouldFetch(pathStr)) { try { _lfsFetch.fetch(blob.get(), pathStr, sink, sizeCallback); } catch (Error &e) { @@ -730,6 +718,8 @@ struct GitSourceAccessor : SourceAccessor throw; } return; + } else { + debug("Skip git-lfs, not matching .gitattributes patterns: %s", pathStr); } } @@ -1246,13 +1236,7 @@ struct GitFileSystemObjectSinkImpl : GitFileSystemObjectSink ref GitRepoImpl::getRawAccessor(const Hash & rev, bool smudgeLfs) { auto self = ref(shared_from_this()); - if (smudgeLfs) { - auto lfsFetch = lfs::Fetch{}; - return make_ref(self, rev, std::make_optional(lfsFetch)); - } - else { - return make_ref(self, rev, std::nullopt); - } + return make_ref(self, rev, smudgeLfs); } ref GitRepoImpl::getAccessor(const Hash & rev, bool exportIgnore, bool smudgeLfs)