diff --git a/src/libfetchers/cache.cc b/src/libfetchers/cache.cc index e071b4717..87a8e4702 100644 --- a/src/libfetchers/cache.cc +++ b/src/libfetchers/cache.cc @@ -11,12 +11,11 @@ namespace nix::fetchers { static const char * schema = R"sql( create table if not exists Cache ( - input text not null, - info text not null, - path text not null, - immutable integer not null, + domain text not null, + key text not null, + value text not null, timestamp integer not null, - primary key (input) + primary key (domain, key) ); )sql"; @@ -28,7 +27,7 @@ struct CacheImpl : Cache struct State { SQLite db; - SQLiteStmt add, lookup; + SQLiteStmt upsert, lookup; }; Sync _state; @@ -37,137 +36,130 @@ struct CacheImpl : Cache { auto state(_state.lock()); - auto dbPath = getCacheDir() + "/nix/fetcher-cache-v1.sqlite"; + auto dbPath = getCacheDir() + "/nix/fetcher-cache-v2.sqlite"; createDirs(dirOf(dbPath)); state->db = SQLite(dbPath); state->db.isCache(); state->db.exec(schema); - state->add.create(state->db, - "insert or replace into Cache(input, info, path, immutable, timestamp) values (?, ?, ?, ?, ?)"); + state->upsert.create(state->db, + "insert or replace into Cache(domain, key, value, timestamp) values (?, ?, ?, ?)"); state->lookup.create(state->db, - "select info, path, immutable, timestamp from Cache where input = ?"); + "select value, timestamp from Cache where domain = ? and key = ?"); } void upsert( - const Attrs & inAttrs, - const Attrs & infoAttrs) override + const Key & key, + const Attrs & value) override { - _state.lock()->add.use() - (attrsToJSON(inAttrs).dump()) - (attrsToJSON(infoAttrs).dump()) - ("") // no path - (false) + _state.lock()->upsert.use() + (key.first) + (attrsToJSON(key.second).dump()) + (attrsToJSON(value).dump()) (time(0)).exec(); } - std::optional lookup(const Attrs & inAttrs) override + std::optional lookup( + const Key & key) override { - if (auto res = lookupExpired(inAttrs)) - return std::move(res->infoAttrs); + if (auto res = lookupExpired(key)) + return std::move(res->value); return {}; } - std::optional lookupWithTTL(const Attrs & inAttrs) override + std::optional lookupWithTTL( + const Key & key) override { - if (auto res = lookupExpired(inAttrs)) { + if (auto res = lookupExpired(key)) { if (!res->expired) - return std::move(res->infoAttrs); - debug("ignoring expired cache entry '%s'", - attrsToJSON(inAttrs).dump()); - } - return {}; - } - - std::optional lookupExpired(const Attrs & inAttrs) override - { - auto state(_state.lock()); - - auto inAttrsJSON = attrsToJSON(inAttrs).dump(); - - auto stmt(state->lookup.use()(inAttrsJSON)); - if (!stmt.next()) { - debug("did not find cache entry for '%s'", inAttrsJSON); - return {}; - } - - auto infoJSON = stmt.getStr(0); - auto locked = stmt.getInt(2) != 0; - auto timestamp = stmt.getInt(3); - - debug("using cache entry '%s' -> '%s'", inAttrsJSON, infoJSON); - - return Result2 { - .expired = !locked && (settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0)), - .infoAttrs = jsonToAttrs(nlohmann::json::parse(infoJSON)), - }; - } - - void add( - Store & store, - const Attrs & inAttrs, - const Attrs & infoAttrs, - const StorePath & storePath, - bool locked) override - { - _state.lock()->add.use() - (attrsToJSON(inAttrs).dump()) - (attrsToJSON(infoAttrs).dump()) - (store.printStorePath(storePath)) - (locked) - (time(0)).exec(); - } - - std::optional> lookup( - Store & store, - const Attrs & inAttrs) override - { - if (auto res = lookupExpired(store, inAttrs)) { - if (!res->expired) - return std::make_pair(std::move(res->infoAttrs), std::move(res->storePath)); - debug("ignoring expired cache entry '%s'", - attrsToJSON(inAttrs).dump()); + return std::move(res->value); + debug("ignoring expired cache entry '%s:%s'", + key.first, attrsToJSON(key.second).dump()); } return {}; } std::optional lookupExpired( - Store & store, - const Attrs & inAttrs) override + const Key & key) override { auto state(_state.lock()); - auto inAttrsJSON = attrsToJSON(inAttrs).dump(); + auto keyJSON = attrsToJSON(key.second).dump(); - auto stmt(state->lookup.use()(inAttrsJSON)); + auto stmt(state->lookup.use()(key.first)(keyJSON)); if (!stmt.next()) { - debug("did not find cache entry for '%s'", inAttrsJSON); + debug("did not find cache entry for '%s:%s'", key.first, keyJSON); return {}; } - auto infoJSON = stmt.getStr(0); - auto storePath = store.parseStorePath(stmt.getStr(1)); - auto locked = stmt.getInt(2) != 0; - auto timestamp = stmt.getInt(3); + auto valueJSON = stmt.getStr(0); + auto timestamp = stmt.getInt(1); - store.addTempRoot(storePath); - if (!store.isValidPath(storePath)) { - // FIXME: we could try to substitute 'storePath'. - debug("ignoring disappeared cache entry '%s'", inAttrsJSON); - return {}; - } - - debug("using cache entry '%s' -> '%s', '%s'", - inAttrsJSON, infoJSON, store.printStorePath(storePath)); + debug("using cache entry '%s:%s' -> '%s'", key.first, keyJSON, valueJSON); return Result { - .expired = !locked && (settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0)), - .infoAttrs = jsonToAttrs(nlohmann::json::parse(infoJSON)), - .storePath = std::move(storePath) + .expired = settings.tarballTtl.get() == 0 || timestamp + settings.tarballTtl < time(0), + .value = jsonToAttrs(nlohmann::json::parse(valueJSON)), }; } + + void upsert( + Key key, + Store & store, + Attrs value, + const StorePath & storePath) + { + /* Add the store prefix to the cache key to handle multiple + store prefixes. */ + key.second.insert_or_assign("store", store.storeDir); + + value.insert_or_assign("storePath", (std::string) storePath.to_string()); + + upsert(key, value); + } + + std::optional lookupStorePath( + Key key, + Store & store) override + { + key.second.insert_or_assign("store", store.storeDir); + + auto res = lookupExpired(key); + if (!res) return std::nullopt; + + auto storePathS = getStrAttr(res->value, "storePath"); + res->value.erase("storePath"); + + ResultWithStorePath res2(*res, StorePath(storePathS)); + + store.addTempRoot(res2.storePath); + if (!store.isValidPath(res2.storePath)) { + // FIXME: we could try to substitute 'storePath'. + debug("ignoring disappeared cache entry '%s:%s' -> '%s'", + key.first, + attrsToJSON(key.second).dump(), + store.printStorePath(res2.storePath)); + return std::nullopt; + } + + debug("using cache entry '%s:%s' -> '%s', '%s'", + key.first, + attrsToJSON(key.second).dump(), + attrsToJSON(res2.value).dump(), + store.printStorePath(res2.storePath)); + + return res2; + } + + std::optional lookupStorePathWithTTL( + Key key, + Store & store) override + { + auto res = lookupStorePath(std::move(key), store); + return res && !res->expired ? res : std::nullopt; + } }; ref getCache() diff --git a/src/libfetchers/cache.hh b/src/libfetchers/cache.hh index 791d77025..4d834fe0c 100644 --- a/src/libfetchers/cache.hh +++ b/src/libfetchers/cache.hh @@ -15,61 +15,80 @@ struct Cache virtual ~Cache() { } /** - * Add a value to the cache. The cache is an arbitrary mapping of - * Attrs to Attrs. + * A domain is a partition of the key/value cache for a particular + * purpose, e.g. git revision to revcount. + */ + using Domain = std::string_view; + + /** + * A cache key is a domain and an arbitrary set of attributes. + */ + using Key = std::pair; + + /** + * Add a key/value pair to the cache. */ virtual void upsert( - const Attrs & inAttrs, - const Attrs & infoAttrs) = 0; + const Key & key, + const Attrs & value) = 0; /** * Look up a key with infinite TTL. */ virtual std::optional lookup( - const Attrs & inAttrs) = 0; + const Key & key) = 0; /** * Look up a key. Return nothing if its TTL has exceeded * `settings.tarballTTL`. */ virtual std::optional lookupWithTTL( - const Attrs & inAttrs) = 0; + const Key & key) = 0; - struct Result2 + struct Result { bool expired = false; - Attrs infoAttrs; + Attrs value; }; /** * Look up a key. Return a bool denoting whether its TTL has * exceeded `settings.tarballTTL`. */ - virtual std::optional lookupExpired( - const Attrs & inAttrs) = 0; + virtual std::optional lookupExpired( + const Key & key) = 0; - /* Old cache for things that have a store path. */ - virtual void add( + /** + * Insert a cache entry that has a store path associated with + * it. Such cache entries are always considered stale if the + * associated store path is invalid. + */ + virtual void upsert( + Key key, Store & store, - const Attrs & inAttrs, - const Attrs & infoAttrs, - const StorePath & storePath, - bool locked) = 0; + Attrs value, + const StorePath & storePath) = 0; - virtual std::optional> lookup( - Store & store, - const Attrs & inAttrs) = 0; - - struct Result + struct ResultWithStorePath : Result { - bool expired = false; - Attrs infoAttrs; StorePath storePath; }; - virtual std::optional lookupExpired( - Store & store, - const Attrs & inAttrs) = 0; + /** + * Look up a store path in the cache. The returned store path will + * be valid, but it may be expired. + */ + virtual std::optional lookupStorePath( + Key key, + Store & store) = 0; + + /** + * Look up a store path in the cache. Return nothing if its TTL + * has exceeded `settings.tarballTTL`. + */ + virtual std::optional lookupStorePathWithTTL( + Key key, + Store & store) = 0; }; ref getCache(); diff --git a/src/libfetchers/fetch-to-store.cc b/src/libfetchers/fetch-to-store.cc index 4fce6180d..65aa72a6c 100644 --- a/src/libfetchers/fetch-to-store.cc +++ b/src/libfetchers/fetch-to-store.cc @@ -16,20 +16,18 @@ StorePath fetchToStore( // FIXME: add an optimisation for the case where the accessor is // a `PosixSourceAccessor` pointing to a store path. - std::optional cacheKey; + std::optional cacheKey; if (!filter && path.accessor->fingerprint) { - cacheKey = fetchers::Attrs{ - {"_what", "fetchToStore"}, - {"store", store.storeDir}, + cacheKey = fetchers::Cache::Key{"fetchToStore", { {"name", std::string{name}}, {"fingerprint", *path.accessor->fingerprint}, {"method", std::string{method.render()}}, {"path", path.path.abs()} - }; - if (auto res = fetchers::getCache()->lookup(store, *cacheKey)) { + }}; + if (auto res = fetchers::getCache()->lookupStorePath(*cacheKey, store)) { debug("store path cache hit for '%s'", path); - return res->second; + return res->storePath; } } else debug("source path '%s' is uncacheable", path); @@ -47,10 +45,9 @@ StorePath fetchToStore( name, path, method, HashAlgorithm::SHA256, {}, filter2, repair); if (cacheKey && mode == FetchMode::Copy) - fetchers::getCache()->add(store, *cacheKey, {}, storePath, true); + fetchers::getCache()->upsert(*cacheKey, store, {}, storePath); return storePath; } - } diff --git a/src/libfetchers/git-utils.cc b/src/libfetchers/git-utils.cc index 160d1ac05..2ea1e15ed 100644 --- a/src/libfetchers/git-utils.cc +++ b/src/libfetchers/git-utils.cc @@ -452,7 +452,7 @@ struct GitRepoImpl : GitRepo, std::enable_shared_from_this { auto accessor = getAccessor(treeHash, false); - fetchers::Attrs cacheKey({{"_what", "treeHashToNarHash"}, {"treeHash", treeHash.gitRev()}}); + fetchers::Cache::Key cacheKey{"treeHashToNarHash", {{"treeHash", treeHash.gitRev()}}}; if (auto res = fetchers::getCache()->lookup(cacheKey)) return Hash::parseAny(fetchers::getStrAttr(*res, "narHash"), HashAlgorithm::SHA256); diff --git a/src/libfetchers/github.cc b/src/libfetchers/github.cc index b9a3d5c0d..d62a7482e 100644 --- a/src/libfetchers/github.cc +++ b/src/libfetchers/github.cc @@ -225,8 +225,8 @@ struct GitArchiveInputScheme : InputScheme auto cache = getCache(); - Attrs treeHashKey{{"_what", "gitRevToTreeHash"}, {"rev", rev->gitRev()}}; - Attrs lastModifiedKey{{"_what", "gitRevToLastModified"}, {"rev", rev->gitRev()}}; + Cache::Key treeHashKey{"gitRevToTreeHash", {{"rev", rev->gitRev()}}}; + Cache::Key lastModifiedKey{"gitRevToLastModified", {{"rev", rev->gitRev()}}}; if (auto treeHashAttrs = cache->lookup(treeHashKey)) { if (auto lastModifiedAttrs = cache->lookup(lastModifiedKey)) { diff --git a/src/libfetchers/tarball.cc b/src/libfetchers/tarball.cc index 33c8f3c0c..e19b18505 100644 --- a/src/libfetchers/tarball.cc +++ b/src/libfetchers/tarball.cc @@ -22,21 +22,20 @@ DownloadFileResult downloadFile( { // FIXME: check store - Attrs inAttrs({ - {"type", "file"}, + Cache::Key key{"file", {{ {"url", url}, {"name", name}, - }); + }}}; - auto cached = getCache()->lookupExpired(*store, inAttrs); + auto cached = getCache()->lookupStorePath(key, *store); auto useCached = [&]() -> DownloadFileResult { return { .storePath = std::move(cached->storePath), - .etag = getStrAttr(cached->infoAttrs, "etag"), - .effectiveUrl = getStrAttr(cached->infoAttrs, "url"), - .immutableUrl = maybeGetStrAttr(cached->infoAttrs, "immutableUrl"), + .etag = getStrAttr(cached->value, "etag"), + .effectiveUrl = getStrAttr(cached->value, "url"), + .immutableUrl = maybeGetStrAttr(cached->value, "immutableUrl"), }; }; @@ -46,7 +45,7 @@ DownloadFileResult downloadFile( FileTransferRequest request(url); request.headers = headers; if (cached) - request.expectedETag = getStrAttr(cached->infoAttrs, "etag"); + request.expectedETag = getStrAttr(cached->value, "etag"); FileTransferResult res; try { res = getFileTransfer()->download(request); @@ -92,14 +91,9 @@ DownloadFileResult downloadFile( /* Cache metadata for all URLs in the redirect chain. */ for (auto & url : res.urls) { - inAttrs.insert_or_assign("url", url); + key.second.insert_or_assign("url", url); infoAttrs.insert_or_assign("url", *res.urls.rbegin()); - getCache()->add( - *store, - inAttrs, - infoAttrs, - *storePath, - false); + getCache()->upsert(key, *store, infoAttrs, *storePath); } return { @@ -114,12 +108,9 @@ DownloadTarballResult downloadTarball( const std::string & url, const Headers & headers) { - Attrs inAttrs({ - {"_what", "tarballCache"}, - {"url", url}, - }); + Cache::Key cacheKey{"tarball", {{"url", url}}}; - auto cached = getCache()->lookupExpired(inAttrs); + auto cached = getCache()->lookupExpired(cacheKey); auto attrsToResult = [&](const Attrs & infoAttrs) { @@ -132,19 +123,19 @@ DownloadTarballResult downloadTarball( }; }; - if (cached && !getTarballCache()->hasObject(getRevAttr(cached->infoAttrs, "treeHash"))) + if (cached && !getTarballCache()->hasObject(getRevAttr(cached->value, "treeHash"))) cached.reset(); if (cached && !cached->expired) /* We previously downloaded this tarball and it's younger than `tarballTtl`, so no need to check the server. */ - return attrsToResult(cached->infoAttrs); + return attrsToResult(cached->value); auto _res = std::make_shared>(); auto source = sinkToSource([&](Sink & sink) { FileTransferRequest req(url); - req.expectedETag = cached ? getStrAttr(cached->infoAttrs, "etag") : ""; + req.expectedETag = cached ? getStrAttr(cached->value, "etag") : ""; getFileTransfer()->download(std::move(req), sink, [_res](FileTransferResult r) { @@ -167,7 +158,7 @@ DownloadTarballResult downloadTarball( if (res->cached) { /* The server says that the previously downloaded version is still current. */ - infoAttrs = cached->infoAttrs; + infoAttrs = cached->value; } else { infoAttrs.insert_or_assign("etag", res->etag); infoAttrs.insert_or_assign("treeHash", parseSink->sync().gitRev()); @@ -178,8 +169,8 @@ DownloadTarballResult downloadTarball( /* Insert a cache entry for every URL in the redirect chain. */ for (auto & url : res->urls) { - inAttrs.insert_or_assign("url", url); - getCache()->upsert(inAttrs, infoAttrs); + cacheKey.second.insert_or_assign("url", url); + getCache()->upsert(cacheKey, infoAttrs); } // FIXME: add a cache entry for immutableUrl? That could allow diff --git a/src/libfetchers/unix/git.cc b/src/libfetchers/unix/git.cc index 46263c872..fbdac1fe6 100644 --- a/src/libfetchers/unix/git.cc +++ b/src/libfetchers/unix/git.cc @@ -426,7 +426,7 @@ struct GitInputScheme : InputScheme uint64_t getLastModified(const RepoInfo & repoInfo, const std::string & repoDir, const Hash & rev) const { - Attrs key{{"_what", "gitLastModified"}, {"rev", rev.gitRev()}}; + Cache::Key key{"gitLastModified", {{"rev", rev.gitRev()}}}; auto cache = getCache(); @@ -435,14 +435,14 @@ struct GitInputScheme : InputScheme auto lastModified = GitRepo::openRepo(repoDir)->getLastModified(rev); - cache->upsert(key, Attrs{{"lastModified", lastModified}}); + cache->upsert(key, {{"lastModified", lastModified}}); return lastModified; } uint64_t getRevCount(const RepoInfo & repoInfo, const std::string & repoDir, const Hash & rev) const { - Attrs key{{"_what", "gitRevCount"}, {"rev", rev.gitRev()}}; + Cache::Key key{"gitRevCount", {{"rev", rev.gitRev()}}}; auto cache = getCache(); diff --git a/src/libfetchers/unix/mercurial.cc b/src/libfetchers/unix/mercurial.cc index 049faffa8..7bdf1e937 100644 --- a/src/libfetchers/unix/mercurial.cc +++ b/src/libfetchers/unix/mercurial.cc @@ -222,22 +222,16 @@ struct MercurialInputScheme : InputScheme if (!input.getRef()) input.attrs.insert_or_assign("ref", "default"); - auto checkHashAlgorithm = [&](const std::optional & hash) + auto revInfoKey = [&](const Hash & rev) { - if (hash.has_value() && hash->algo != HashAlgorithm::SHA1) - throw Error("Hash '%s' is not supported by Mercurial. Only sha1 is supported.", hash->to_string(HashFormat::Base16, true)); - }; + if (rev.algo != HashAlgorithm::SHA1) + throw Error("Hash '%s' is not supported by Mercurial. Only sha1 is supported.", rev.to_string(HashFormat::Base16, true)); - - auto getLockedAttrs = [&]() - { - checkHashAlgorithm(input.getRev()); - - return Attrs({ - {"type", "hg"}, + return Cache::Key{"hgRev", { + {"store", store->storeDir}, {"name", name}, - {"rev", input.getRev()->gitRev()}, - }); + {"rev", input.getRev()->gitRev()} + }}; }; auto makeResult = [&](const Attrs & infoAttrs, const StorePath & storePath) -> StorePath @@ -248,26 +242,21 @@ struct MercurialInputScheme : InputScheme return storePath; }; - if (input.getRev()) { - if (auto res = getCache()->lookup(*store, getLockedAttrs())) - return makeResult(res->first, std::move(res->second)); + /* Check the cache for the most recent rev for this URL/ref. */ + Cache::Key refToRevKey{"hgRefToRev", { + {"url", actualUrl}, + {"ref", *input.getRef()} + }}; + + if (!input.getRev()) { + if (auto res = getCache()->lookupWithTTL(refToRevKey)) + input.attrs.insert_or_assign("rev", getRevAttr(*res, "rev").gitRev()); } - auto revOrRef = input.getRev() ? input.getRev()->gitRev() : *input.getRef(); - - Attrs unlockedAttrs({ - {"type", "hg"}, - {"name", name}, - {"url", actualUrl}, - {"ref", *input.getRef()}, - }); - - if (auto res = getCache()->lookup(*store, unlockedAttrs)) { - auto rev2 = Hash::parseAny(getStrAttr(res->first, "rev"), HashAlgorithm::SHA1); - if (!input.getRev() || input.getRev() == rev2) { - input.attrs.insert_or_assign("rev", rev2.gitRev()); - return makeResult(res->first, std::move(res->second)); - } + /* If we have a rev, check if we have a cached store path. */ + if (auto rev = input.getRev()) { + if (auto res = getCache()->lookupStorePath(revInfoKey(*rev), *store)) + return makeResult(res->value, res->storePath); } Path cacheDir = fmt("%s/nix/hg/%s", getCacheDir(), hashString(HashAlgorithm::SHA256, actualUrl).to_string(HashFormat::Nix32, false)); @@ -300,45 +289,42 @@ struct MercurialInputScheme : InputScheme } } + /* Fetch the remote rev or ref. */ auto tokens = tokenizeString>( - runHg({ "log", "-R", cacheDir, "-r", revOrRef, "--template", "{node} {rev} {branch}" })); + runHg({ + "log", "-R", cacheDir, + "-r", input.getRev() ? input.getRev()->gitRev() : *input.getRef(), + "--template", "{node} {rev} {branch}" + })); assert(tokens.size() == 3); - input.attrs.insert_or_assign("rev", Hash::parseAny(tokens[0], HashAlgorithm::SHA1).gitRev()); + auto rev = Hash::parseAny(tokens[0], HashAlgorithm::SHA1); + input.attrs.insert_or_assign("rev", rev.gitRev()); auto revCount = std::stoull(tokens[1]); input.attrs.insert_or_assign("ref", tokens[2]); - if (auto res = getCache()->lookup(*store, getLockedAttrs())) - return makeResult(res->first, std::move(res->second)); + /* Now that we have the rev, check the cache again for a + cached store path. */ + if (auto res = getCache()->lookupStorePath(revInfoKey(rev), *store)) + return makeResult(res->value, res->storePath); Path tmpDir = createTempDir(); AutoDelete delTmpDir(tmpDir, true); - runHg({ "archive", "-R", cacheDir, "-r", input.getRev()->gitRev(), tmpDir }); + runHg({ "archive", "-R", cacheDir, "-r", rev.gitRev(), tmpDir }); deletePath(tmpDir + "/.hg_archival.txt"); auto storePath = store->addToStore(name, {getFSSourceAccessor(), CanonPath(tmpDir)}); Attrs infoAttrs({ - {"rev", input.getRev()->gitRev()}, {"revCount", (uint64_t) revCount}, }); if (!origRev) - getCache()->add( - *store, - unlockedAttrs, - infoAttrs, - storePath, - false); + getCache()->upsert(refToRevKey, {{"rev", rev.gitRev()}}); - getCache()->add( - *store, - getLockedAttrs(), - infoAttrs, - storePath, - true); + getCache()->upsert(revInfoKey(rev), *store, infoAttrs, storePath); return makeResult(infoAttrs, std::move(storePath)); } diff --git a/tests/functional/fetchMercurial.sh b/tests/functional/fetchMercurial.sh index e133df1f8..9f7cef7b2 100644 --- a/tests/functional/fetchMercurial.sh +++ b/tests/functional/fetchMercurial.sh @@ -101,6 +101,7 @@ path4=$(nix eval --impure --refresh --raw --expr "(builtins.fetchMercurial file: [[ $path2 = $path4 ]] echo paris > $repo/hello + # Passing a `name` argument should be reflected in the output path path5=$(nix eval -vvvvv --impure --refresh --raw --expr "(builtins.fetchMercurial { url = \"file://$repo\"; name = \"foo\"; } ).outPath") [[ $path5 =~ -foo$ ]]