diff --git a/maintainers/flake-module.nix b/maintainers/flake-module.nix index f18e9b41e..4c75df246 100644 --- a/maintainers/flake-module.nix +++ b/maintainers/flake-module.nix @@ -396,7 +396,6 @@ ''^src/libutil/types\.hh$'' ''^src/libutil/unix/file-descriptor\.cc$'' ''^src/libutil/unix/file-path\.cc$'' - ''^src/libutil/unix/monitor-fd\.hh$'' ''^src/libutil/unix/processes\.cc$'' ''^src/libutil/unix/signals-impl\.hh$'' ''^src/libutil/unix/signals\.cc$'' diff --git a/src/libutil-tests/monitorfdhup.cc b/src/libutil-tests/monitorfdhup.cc new file mode 100644 index 000000000..01ecb92d9 --- /dev/null +++ b/src/libutil-tests/monitorfdhup.cc @@ -0,0 +1,18 @@ +#include "util.hh" +#include "monitor-fd.hh" + +#include +#include + +namespace nix { +TEST(MonitorFdHup, shouldNotBlock) +{ + Pipe p; + p.create(); + { + // when monitor gets destroyed it should cancel the + // background thread and do not block + MonitorFdHup monitor(p.readSide.get()); + } +} +} diff --git a/src/libutil/unix/monitor-fd.hh b/src/libutil/unix/monitor-fd.hh index b6610feff..d59832452 100644 --- a/src/libutil/unix/monitor-fd.hh +++ b/src/libutil/unix/monitor-fd.hh @@ -14,35 +14,74 @@ namespace nix { - class MonitorFdHup { private: std::thread thread; + Pipe notifyPipe; public: MonitorFdHup(int fd) { - thread = std::thread([fd]() { + notifyPipe.create(); + thread = std::thread([this, fd]() { while (true) { - /* Wait indefinitely until a POLLHUP occurs. */ - struct pollfd fds[1]; - fds[0].fd = fd; - /* Polling for no specific events (i.e. just waiting - for an error/hangup) doesn't work on macOS - anymore. So wait for read events and ignore - them. */ - fds[0].events = - #ifdef __APPLE__ - POLLRDNORM - #else + // There is a POSIX violation on macOS: you have to listen for + // at least POLLHUP to receive HUP events for a FD. POSIX says + // this is not so, and you should just receive them regardless. + // However, as of our testing on macOS 14.5, the events do not + // get delivered if in the all-bits-unset case, but do get + // delivered if `POLLHUP` is set. + // + // This bug filed as rdar://37537852 + // (https://openradar.appspot.com/37537852). + // + // macOS's own man page + // (https://developer.apple.com/library/archive/documentation/System/Conceptual/ManPages_iPhoneOS/man2/poll.2.html) + // additionally says that `POLLHUP` is ignored as an input. It + // seems the likely order of events here was + // + // 1. macOS did not follow the POSIX spec + // + // 2. Somebody ninja-fixed this other spec violation to make + // sure `POLLHUP` was not forgotten about, even though they + // "fixed" this issue in a spec-non-compliant way. Whatever, + // we'll use the fix. + // + // Relevant code, current version, which shows the : + // https://github.com/apple-oss-distributions/xnu/blob/94d3b452840153a99b38a3a9659680b2a006908e/bsd/kern/sys_generic.c#L1751-L1758 + // + // The `POLLHUP` detection was added in + // https://github.com/apple-oss-distributions/xnu/commit/e13b1fa57645afc8a7b2e7d868fe9845c6b08c40#diff-a5aa0b0e7f4d866ca417f60702689fc797e9cdfe33b601b05ccf43086c35d395R1468 + // That means added in 2007 or earlier. Should be good enough + // for us. + short hangup_events = +#ifdef __APPLE__ + POLLHUP +#else 0 - #endif +#endif ; - auto count = poll(fds, 1, -1); - if (count == -1) - unreachable(); + /* Wait indefinitely until a POLLHUP occurs. */ + constexpr size_t num_fds = 2; + struct pollfd fds[num_fds] = { + { + .fd = fd, + .events = hangup_events, + }, + { + .fd = notifyPipe.readSide.get(), + .events = hangup_events, + }, + }; + + auto count = poll(fds, num_fds, -1); + if (count == -1) { + if (errno == EINTR || errno == EAGAIN) + continue; + throw SysError("failed to poll() in MonitorFdHup"); + } /* This shouldn't happen, but can on macOS due to a bug. See rdar://37550628. @@ -50,25 +89,42 @@ public: coordination with the main thread if spinning proves too harmful. */ - if (count == 0) continue; + if (count == 0) + continue; if (fds[0].revents & POLLHUP) { unix::triggerInterrupt(); break; } - /* This will only happen on macOS. We sleep a bit to - avoid waking up too often if the client is sending - input. */ - sleep(1); + if (fds[1].revents & POLLHUP) { + break; + } + // On macOS, (jade thinks that) it is possible (although not + // observed on macOS 14.5) that in some limited cases on buggy + // kernel versions, all the non-POLLHUP events for the socket + // get delivered. + // + // We could sleep to avoid pointlessly spinning a thread on + // those, but this opens up a different problem, which is that + // if do sleep, it will be longer before the daemon fork for a + // client exits. Imagine a sequential shell script, running Nix + // commands, each of which talk to the daemon. If the previous + // command registered a temp root, exits, and then the next + // command issues a delete request before the temp root is + // cleaned up, that delete request might fail. + // + // Not sleeping doesn't actually fix the race condition --- we + // would need to block on the old connections' tempt roots being + // cleaned up in in the new connection --- but it does make it + // much less likely. } }); }; ~MonitorFdHup() { - pthread_cancel(thread.native_handle()); + close(notifyPipe.writeSide.get()); thread.join(); } }; - }