From 32ab3ef598a18c6257badb17ecd53c67e7c35689 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B6rg=20Thalheim?= Date: Wed, 12 Mar 2025 08:51:01 +0000 Subject: [PATCH] libutil/file-descriptor: handle EAGAIN in read/write operations We now see exception beeing thrown when remote building in master because of writing to a non-blocking file descriptor from our json logger. > #0 0x00007f2ea97aea9c in __pthread_kill_implementation () from /nix/store/wn7v2vhyyyi6clcyn0s9ixvl7d4d87ic-glibc-2.40-36/lib/libc.so.6 > #1 0x00007f2ea975c576 in raise () from /nix/store/wn7v2vhyyyi6clcyn0s9ixvl7d4d87ic-glibc-2.40-36/lib/libc.so.6 > #2 0x00007f2ea9744935 in abort () from /nix/store/wn7v2vhyyyi6clcyn0s9ixvl7d4d87ic-glibc-2.40-36/lib/libc.so.6 > #3 0x00007f2ea99e8c2b in __gnu_cxx::__verbose_terminate_handler() [clone .cold] () from /nix/store/ybjcla5bhj8g1y84998pn4a2drfxybkv-gcc-13.3.0-lib/lib/libstdc++.so.6 > #4 0x00007f2ea99f820a in __cxxabiv1::__terminate(void (*)()) () from /nix/store/ybjcla5bhj8g1y84998pn4a2drfxybkv-gcc-13.3.0-lib/lib/libstdc++.so.6 > #5 0x00007f2ea99f8275 in std::terminate() () from /nix/store/ybjcla5bhj8g1y84998pn4a2drfxybkv-gcc-13.3.0-lib/lib/libstdc++.so.6 > #6 0x00007f2ea99f84c7 in __cxa_throw () from /nix/store/ybjcla5bhj8g1y84998pn4a2drfxybkv-gcc-13.3.0-lib/lib/libstdc++.so.6 > #7 0x00007f2eaa5035c2 in nix::writeFull (fd=2, s=..., allowInterrupts=true) at ../unix/file-descriptor.cc:43 > #8 0x00007f2eaa5633c4 in nix::JSONLogger::write (this=this@entry=0x249a7d40, json=...) at /nix/store/4krab2h0hd4wvxxmscxrw21pl77j4i7j-gcc-13.3.0/include/c++/13.3.0/bits/char_traits.h:358 > #9 0x00007f2eaa5658d7 in nix::JSONLogger::logEI (this=, ei=...) at ../logging.cc:242 > #10 0x00007f2ea9c5d048 in nix::Logger::logEI (ei=..., lvl=nix::lvlError, this=0x249a7d40) at /nix/store/a7cq5bqh0ryvnkv4m19ffchnvi8l9qx6-nix-util-2.27.0-dev/include/nix/logging.hh:108 > #11 nix::handleExceptions (programName="nix", fun=...) at ../shared.cc:343 > #12 0x0000000000465b1f in main (argc=, argv=) at /nix/store/4krab2h0hd4wvxxmscxrw21pl77j4i7j-gcc-13.3.0/include/c++/13.3.0/bits/allocator.h:163 > (gdb) frame 10 > #10 0x00007f2ea9c5d048 in nix::Logger::logEI (ei=..., lvl=nix::lvlError, this=0x249a7d40) at /nix/store/a7cq5bqh0ryvnkv4m19ffchnvi8l9qx6-nix-util-2.27.0-dev/include/nix/logging.hh:108 > 108 logEI(ei); So far only drainFD sets the non-blocking flag on a "readable" file descriptor, while this is a "writeable" file descriptor. It's not clear to me yet, why we see logs after that point, but it's also not that bad to handle EAGAIN in read/write functions after all. (cherry picked from commit 2790f5f9aeac7cb4179918fac26f4fb74fe4f53d) --- src/libutil/unix/file-descriptor.cc | 44 ++++++++++++++++++++++++++--- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/src/libutil/unix/file-descriptor.cc b/src/libutil/unix/file-descriptor.cc index ac7c086af..a02a53b1e 100644 --- a/src/libutil/unix/file-descriptor.cc +++ b/src/libutil/unix/file-descriptor.cc @@ -5,9 +5,27 @@ #include #include +#include namespace nix { +namespace { + +// This function is needed to handle non-blocking reads/writes. This is needed in the buildhook, because +// somehow the json logger file descriptor ends up beeing non-blocking and breaks remote-building. +// TODO: get rid of buildhook and remove this function again (https://github.com/NixOS/nix/issues/12688) +void pollFD(int fd, int events) +{ + struct pollfd pfd; + pfd.fd = fd; + pfd.events = events; + int ret = poll(&pfd, 1, -1); + if (ret == -1) { + throw SysError("poll on file descriptor failed"); + } +} +} + std::string readFile(int fd) { struct stat st; @@ -17,14 +35,18 @@ std::string readFile(int fd) return drainFD(fd, true, st.st_size); } - void readFull(int fd, char * buf, size_t count) { while (count) { checkInterrupt(); ssize_t res = read(fd, buf, count); if (res == -1) { - if (errno == EINTR) continue; + switch (errno) { + case EINTR: continue; + case EAGAIN: + pollFD(fd, POLLIN); + continue; + } throw SysError("reading from file"); } if (res == 0) throw EndOfFile("unexpected end-of-file"); @@ -39,8 +61,15 @@ void writeFull(int fd, std::string_view s, bool allowInterrupts) while (!s.empty()) { if (allowInterrupts) checkInterrupt(); ssize_t res = write(fd, s.data(), s.size()); - if (res == -1 && errno != EINTR) + if (res == -1) { + switch (errno) { + case EINTR: continue; + case EAGAIN: + pollFD(fd, POLLOUT); + continue; + } throw SysError("writing to file"); + } if (res > 0) s.remove_prefix(res); } @@ -56,8 +85,15 @@ std::string readLine(int fd, bool eofOk) // FIXME: inefficient ssize_t rd = read(fd, &ch, 1); if (rd == -1) { - if (errno != EINTR) + switch (errno) { + case EINTR: continue; + case EAGAIN: { + pollFD(fd, POLLIN); + continue; + } + default: throw SysError("reading a line"); + } } else if (rd == 0) { if (eofOk) return s;