From 4bce2d723d044fdf425626fa068f3bfd60be82aa Mon Sep 17 00:00:00 2001 From: Eelco Dolstra Date: Wed, 4 Jun 2025 21:37:17 +0200 Subject: [PATCH] GitSourceAccessor: Make thread-safe --- src/libfetchers/git-utils.cc | 93 ++++++++++++++++++++++-------------- 1 file changed, 56 insertions(+), 37 deletions(-) diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index c1bdf2097..9fe271fe8 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -665,28 +665,40 @@ ref GitRepo::openRepo(const std::filesystem::path & path, bool create, struct GitSourceAccessor : SourceAccessor { - ref repo; - Object root; - std::optional lfsFetch = std::nullopt; + struct State + { + ref repo; + Object root; + std::optional lfsFetch = std::nullopt; + }; + + Sync state_; GitSourceAccessor(ref repo_, const Hash & rev, bool smudgeLfs) - : repo(repo_) - , root(peelToTreeOrBlob(lookupObject(*repo, hashToOID(rev)).get())) + : state_{ + State { + .repo = repo_, + .root = peelToTreeOrBlob(lookupObject(*repo_, hashToOID(rev)).get()), + .lfsFetch = smudgeLfs ? std::make_optional(lfs::Fetch(*repo_, hashToOID(rev))) : std::nullopt, + } + } { - if (smudgeLfs) - lfsFetch = std::make_optional(lfs::Fetch(*repo, hashToOID(rev))); } std::string readBlob(const CanonPath & path, bool symlink) { - const auto blob = getBlob(path, symlink); + auto state(state_.lock()); - if (lfsFetch) { - if (lfsFetch->shouldFetch(path)) { + const auto blob = getBlob(*state, path, symlink); + + if (state->lfsFetch) { + if (state->lfsFetch->shouldFetch(path)) { StringSink s; try { + // FIXME: do we need to hold the state lock while + // doing this? auto contents = std::string((const char *) git_blob_rawcontent(blob.get()), git_blob_rawsize(blob.get())); - lfsFetch->fetch(contents, path, s, [&s](uint64_t size){ s.s.reserve(size); }); + state->lfsFetch->fetch(contents, path, s, [&s](uint64_t size){ s.s.reserve(size); }); } catch (Error & e) { e.addTrace({}, "while smudging git-lfs file '%s'", path); throw; @@ -705,15 +717,18 @@ struct GitSourceAccessor : SourceAccessor bool pathExists(const CanonPath & path) override { - return path.isRoot() ? true : (bool) lookup(path); + auto state(state_.lock()); + return path.isRoot() ? true : (bool) lookup(*state, path); } std::optional maybeLstat(const CanonPath & path) override { - if (path.isRoot()) - return Stat { .type = git_object_type(root.get()) == GIT_OBJECT_TREE ? tDirectory : tRegular }; + auto state(state_.lock()); - auto entry = lookup(path); + if (path.isRoot()) + return Stat { .type = git_object_type(state->root.get()) == GIT_OBJECT_TREE ? tDirectory : tRegular }; + + auto entry = lookup(*state, path); if (!entry) return std::nullopt; @@ -741,6 +756,8 @@ struct GitSourceAccessor : SourceAccessor DirEntries readDirectory(const CanonPath & path) override { + auto state(state_.lock()); + return std::visit(overloaded { [&](Tree tree) { DirEntries res; @@ -758,7 +775,7 @@ struct GitSourceAccessor : SourceAccessor [&](Submodule) { return DirEntries(); } - }, getTree(path)); + }, getTree(*state, path)); } std::string readLink(const CanonPath & path) override @@ -772,7 +789,9 @@ struct GitSourceAccessor : SourceAccessor */ std::optional getSubmoduleRev(const CanonPath & path) { - auto entry = lookup(path); + auto state(state_.lock()); + + auto entry = lookup(*state, path); if (!entry || git_tree_entry_type(entry) != GIT_OBJECT_COMMIT) return std::nullopt; @@ -783,7 +802,7 @@ struct GitSourceAccessor : SourceAccessor std::unordered_map lookupCache; /* Recursively look up 'path' relative to the root. */ - git_tree_entry * lookup(const CanonPath & path) + git_tree_entry * lookup(State & state, const CanonPath & path) { auto i = lookupCache.find(path); if (i != lookupCache.end()) return i->second.get(); @@ -793,7 +812,7 @@ struct GitSourceAccessor : SourceAccessor auto name = path.baseName().value(); - auto parentTree = lookupTree(*parent); + auto parentTree = lookupTree(state, *parent); if (!parentTree) return nullptr; auto count = git_tree_entrycount(parentTree->get()); @@ -822,29 +841,29 @@ struct GitSourceAccessor : SourceAccessor return res; } - std::optional lookupTree(const CanonPath & path) + std::optional lookupTree(State & state, const CanonPath & path) { if (path.isRoot()) { - if (git_object_type(root.get()) == GIT_OBJECT_TREE) - return dupObject((git_tree *) &*root); + if (git_object_type(state.root.get()) == GIT_OBJECT_TREE) + return dupObject((git_tree *) &*state.root); else return std::nullopt; } - auto entry = lookup(path); + auto entry = lookup(state, path); if (!entry || git_tree_entry_type(entry) != GIT_OBJECT_TREE) return std::nullopt; Tree tree; - if (git_tree_entry_to_object((git_object * *) (git_tree * *) Setter(tree), *repo, entry)) + if (git_tree_entry_to_object((git_object * *) (git_tree * *) Setter(tree), *state.repo, entry)) throw Error("looking up directory '%s': %s", showPath(path), git_error_last()->message); return tree; } - git_tree_entry * need(const CanonPath & path) + git_tree_entry * need(State & state, const CanonPath & path) { - auto entry = lookup(path); + auto entry = lookup(state, path); if (!entry) throw Error("'%s' does not exist", showPath(path)); return entry; @@ -852,16 +871,16 @@ struct GitSourceAccessor : SourceAccessor struct Submodule { }; - std::variant getTree(const CanonPath & path) + std::variant getTree(State & state, const CanonPath & path) { if (path.isRoot()) { - if (git_object_type(root.get()) == GIT_OBJECT_TREE) - return dupObject((git_tree *) &*root); + if (git_object_type(state.root.get()) == GIT_OBJECT_TREE) + return dupObject((git_tree *) &*state.root); else - throw Error("Git root object '%s' is not a directory", *git_object_id(root.get())); + throw Error("Git root object '%s' is not a directory", *git_object_id(state.root.get())); } - auto entry = need(path); + auto entry = need(state, path); if (git_tree_entry_type(entry) == GIT_OBJECT_COMMIT) return Submodule(); @@ -870,16 +889,16 @@ struct GitSourceAccessor : SourceAccessor throw Error("'%s' is not a directory", showPath(path)); Tree tree; - if (git_tree_entry_to_object((git_object * *) (git_tree * *) Setter(tree), *repo, entry)) + if (git_tree_entry_to_object((git_object * *) (git_tree * *) Setter(tree), *state.repo, entry)) throw Error("looking up directory '%s': %s", showPath(path), git_error_last()->message); return tree; } - Blob getBlob(const CanonPath & path, bool expectSymlink) + Blob getBlob(State & state, const CanonPath & path, bool expectSymlink) { - if (!expectSymlink && git_object_type(root.get()) == GIT_OBJECT_BLOB) - return dupObject((git_blob *) &*root); + if (!expectSymlink && git_object_type(state.root.get()) == GIT_OBJECT_BLOB) + return dupObject((git_blob *) &*state.root); auto notExpected = [&]() { @@ -892,7 +911,7 @@ struct GitSourceAccessor : SourceAccessor if (path.isRoot()) notExpected(); - auto entry = need(path); + auto entry = need(state, path); if (git_tree_entry_type(entry) != GIT_OBJECT_BLOB) notExpected(); @@ -907,7 +926,7 @@ struct GitSourceAccessor : SourceAccessor } Blob blob; - if (git_tree_entry_to_object((git_object * *) (git_blob * *) Setter(blob), *repo, entry)) + if (git_tree_entry_to_object((git_object * *) (git_blob * *) Setter(blob), *state.repo, entry)) throw Error("looking up file '%s': %s", showPath(path), git_error_last()->message); return blob;