From ced8d311a593fcf9c3823e4e118474ac132d8e60 Mon Sep 17 00:00:00 2001 From: Picnoir Date: Wed, 20 Nov 2024 13:33:00 +0100 Subject: [PATCH] gc: resume GC after a pathinuse error First the motivation: I recently faced a bug that I assume is coming from the topoSortPaths function where the GC was trying to delete a path having some alive referrers. I resolved this by manually deleting the faulty path referrers using nix-store --query --referrers. I sadly did not manage to reproduce this bug. This bug alone is not a big deal. However, this bug is triggering a cascading failure: invalidatePathChecked is throwing a PathInUse exception. This exception is not catched and fails the whole GC run. From there, the machine (a builder machine) was unable to GC its Nix store, which led to an almost full disk with no way to automatically delete the dead Nix paths. Instead, I think we should log the error for the specific store path we're trying to delete, specifying we can't delete this path because it still has referrers. Once we're done with logging that, the GC run should continue to delete the dead store paths it can delete. --- src/libstore/gc.cc | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/libstore/gc.cc b/src/libstore/gc.cc index 73195794a..45dfe4ad8 100644 --- a/src/libstore/gc.cc +++ b/src/libstore/gc.cc @@ -4,6 +4,7 @@ #include "finally.hh" #include "unix-domain-socket.hh" #include "signals.hh" +#include "posix-fs-canonicalise.hh" #if !defined(__linux__) // For shelling out to lsof @@ -763,13 +764,18 @@ void LocalStore::collectGarbage(const GCOptions & options, GCResults & results) } } } - for (auto & path : topoSortPaths(visited)) { if (!dead.insert(path).second) continue; if (shouldDelete) { - invalidatePathChecked(path); - deleteFromStore(path.to_string()); - referrersCache.erase(path); + try { + invalidatePathChecked(path); + deleteFromStore(path.to_string()); + referrersCache.erase(path); + } catch (PathInUse &e) { + // If we end up here, it's likely a new occurence + // of https://github.com/NixOS/nix/issues/11923 + printError("BUG: %s", e.what()); + } } } };