From ea19cb2f5002449ae9fa4dfbbafe722bf5577646 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?F=C3=A9lix=20Baylac=20Jacqu=C3=A9?= Date: Tue, 12 Sep 2023 13:38:29 +0200 Subject: [PATCH] `MonitorFdHup`: replace `pthread_cancel` trick with a notification pipe On https://github.com/NixOS/nix/issues/8946, we faced a surprising behaviour wrt. exception when using pthread_cancel. In a nutshell when a thread is inside a catch block and it's getting pthread_cancel by another one, then the original exception is bubbled up and crashes the process. We now poll on the notification pipe from the thread and exit when the main thread closes its end. This solution does not exhibit surprising behaviour wrt. exceptions. Co-authored-by: Mic92 Fixes https://github.com/NixOS/nix/issues/8946 See also Lix https://gerrit.lix.systems/c/lix/+/1605 which is very similar by coincidence. Pulled a comment from that. (cherry picked from commit 1c636284a3f4c39dcab88c804a2c96a729c47b85) --- src/libutil-tests/monitorfdhup.cc | 18 +++++++++++++ src/libutil/unix/monitor-fd.hh | 42 +++++++++++++++++++++++-------- 2 files changed, 49 insertions(+), 11 deletions(-) create mode 100644 src/libutil-tests/monitorfdhup.cc diff --git a/src/libutil-tests/monitorfdhup.cc b/src/libutil-tests/monitorfdhup.cc new file mode 100644 index 000000000..01ecb92d9 --- /dev/null +++ b/src/libutil-tests/monitorfdhup.cc @@ -0,0 +1,18 @@ +#include "util.hh" +#include "monitor-fd.hh" + +#include +#include + +namespace nix { +TEST(MonitorFdHup, shouldNotBlock) +{ + Pipe p; + p.create(); + { + // when monitor gets destroyed it should cancel the + // background thread and do not block + MonitorFdHup monitor(p.readSide.get()); + } +} +} diff --git a/src/libutil/unix/monitor-fd.hh b/src/libutil/unix/monitor-fd.hh index ca1770342..d6ec47f49 100644 --- a/src/libutil/unix/monitor-fd.hh +++ b/src/libutil/unix/monitor-fd.hh @@ -18,27 +18,45 @@ class MonitorFdHup { private: std::thread thread; + Pipe notifyPipe; public: MonitorFdHup(int fd) { - thread = std::thread([fd]() { + notifyPipe.create(); + thread = std::thread([this, fd]() { while (true) { - /* Wait indefinitely until a POLLHUP occurs. */ - constexpr size_t num_fds = 1; - struct pollfd fds[num_fds] = { - { - .fd = fd, - .events = /* Polling for no specific events (i.e. just waiting for an error/hangup) doesn't work on macOS anymore. So wait for read events and ignore them. */ + // FIXME(jade): we have looked at the XNU kernel code and as + // far as we can tell, the above is bogus. It should be the + // case that the previous version of this and the current + // version are identical: waiting for POLLHUP and POLLRDNORM in + // the kernel *should* be identical. + // https://github.com/apple-oss-distributions/xnu/blob/94d3b452840153a99b38a3a9659680b2a006908e/bsd/kern/sys_generic.c#L1751-L1758 + // + // So, this needs actual testing and we need to figure out if + // this is actually bogus. + short hangup_events = #ifdef __APPLE__ - POLLRDNORM, + POLLRDNORM #else - 0, + 0 #endif + ; + + /* Wait indefinitely until a POLLHUP occurs. */ + constexpr size_t num_fds = 2; + struct pollfd fds[num_fds] = { + { + .fd = fd, + .events = hangup_events, + }, + { + .fd = notifyPipe.readSide.get(), + .events = hangup_events, }, }; @@ -48,7 +66,6 @@ public: continue; throw SysError("failed to poll() in MonitorFdHup"); } - /* This shouldn't happen, but can on macOS due to a bug. See rdar://37550628. @@ -62,6 +79,9 @@ public: unix::triggerInterrupt(); break; } + if (fds[1].revents & POLLHUP) { + break; + } /* This will only happen on macOS. We sleep a bit to avoid waking up too often if the client is sending input. */ @@ -72,7 +92,7 @@ public: ~MonitorFdHup() { - pthread_cancel(thread.native_handle()); + close(notifyPipe.writeSide.get()); thread.join(); } };