From 49f486d8e088d4633872dfef342fe9fac4f83b6d Mon Sep 17 00:00:00 2001 From: John Ericson Date: Sun, 23 Mar 2025 19:11:17 -0400 Subject: [PATCH] `MonitorFdHup`: Don't sleep anymore After the previous commit it should not be necessary. Furthermore, if we *do* sleep, we'll exacerbate a race condition (in conjunction with getting rid of the thread cancellation) that will cause test failures. --- src/libutil/unix/monitor-fd.hh | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/src/libutil/unix/monitor-fd.hh b/src/libutil/unix/monitor-fd.hh index 334506146..d59832452 100644 --- a/src/libutil/unix/monitor-fd.hh +++ b/src/libutil/unix/monitor-fd.hh @@ -98,11 +98,24 @@ public: if (fds[1].revents & POLLHUP) { break; } - // On macOS, it is possible (although not observed on macOS - // 14.5) that in some limited cases on buggy kernel versions, - // all the non-POLLHUP events for the socket get delivered. - // Sleeping avoids pointlessly spinning a thread on those. - sleep(1); + // On macOS, (jade thinks that) it is possible (although not + // observed on macOS 14.5) that in some limited cases on buggy + // kernel versions, all the non-POLLHUP events for the socket + // get delivered. + // + // We could sleep to avoid pointlessly spinning a thread on + // those, but this opens up a different problem, which is that + // if do sleep, it will be longer before the daemon fork for a + // client exits. Imagine a sequential shell script, running Nix + // commands, each of which talk to the daemon. If the previous + // command registered a temp root, exits, and then the next + // command issues a delete request before the temp root is + // cleaned up, that delete request might fail. + // + // Not sleeping doesn't actually fix the race condition --- we + // would need to block on the old connections' tempt roots being + // cleaned up in in the new connection --- but it does make it + // much less likely. } }); };