From aaffbc220952adce73ca9278b73394f7a2eda78f Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Mon, 30 Jun 2025 23:29:07 +0300 Subject: [PATCH] libutil: Use caching `directory_entry` API in `PosixSourceAccessor::readDirectory` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous use of symlink_status() always translated into a stat call, leading to huge performance penalties for by-name-overlay in nixpkgs. The comment below references the possible caching, but that seemed to be erroneous, since the correct way to make use of the caching API is by calling a bunch of `is_*` functions [1]. For example, here's how libstdc++ does that [2], [3]. This translates to great nixpkgs eval performance improvements: ``` Benchmark 1: GC_INITIAL_HEAP_SIZE=4G result/bin/nix-instantiate ../nixpkgs -A hello --readonly-mode Time (mean ± σ): 186.7 ms ± 6.7 ms [User: 121.3 ms, System: 64.9 ms] Range (min … max): 179.4 ms … 201.6 ms 16 runs Benchmark 2: GC_INITIAL_HEAP_SIZE=4G nix-instantiate ../nixpkgs -A hello --readonly-mode Time (mean ± σ): 230.6 ms ± 5.0 ms [User: 126.9 ms, System: 103.1 ms] Range (min … max): 225.1 ms … 241.4 ms 13 runs ``` [1]: https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0317r1.html [2]: https://github.com/gcc-mirror/gcc/blob/8ea555b7b4725dbc5d9286f729166cd54ce5b615/libstdc%2B%2B-v3/include/bits/fs_dir.h#L341-L348 [3]: https://github.com/gcc-mirror/gcc/blob/8ea555b7b4725dbc5d9286f729166cd54ce5b615/libstdc%2B%2B-v3/include/bits/fs_dir.h#L161-L163 (cherry picked from commit 8708e9a52617a70ce1a85ff090e216e257c2ae93) --- src/libutil/posix-source-accessor.cc | 49 +++++++++++++++++----------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/src/libutil/posix-source-accessor.cc b/src/libutil/posix-source-accessor.cc index 773540e6a..2ce7c88e4 100644 --- a/src/libutil/posix-source-accessor.cc +++ b/src/libutil/posix-source-accessor.cc @@ -141,33 +141,44 @@ SourceAccessor::DirEntries PosixSourceAccessor::readDirectory(const CanonPath & for (auto & entry : DirectoryIterator{makeAbsPath(path)}) { checkInterrupt(); auto type = [&]() -> std::optional { - std::filesystem::file_type nativeType; try { - nativeType = entry.symlink_status().type(); + /* WARNING: We are specifically not calling symlink_status() + * here, because that always translates to `stat` call and + * doesn't make use of any caching. Instead, we have to + * rely on the myriad of `is_*` functions, which actually do + * the caching. If you are in doubt then take a look at the + * libstdc++ implementation [1] and the standard proposal + * about the caching variations of directory_entry [2]. + + * [1]: https://github.com/gcc-mirror/gcc/blob/8ea555b7b4725dbc5d9286f729166cd54ce5b615/libstdc%2B%2B-v3/include/bits/fs_dir.h#L341-L348 + * [2]: https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0317r1.html + */ + + /* Check for symlink first, because other getters follow symlinks. */ + if (entry.is_symlink()) + return tSymlink; + if (entry.is_regular_file()) + return tRegular; + if (entry.is_directory()) + return tDirectory; + if (entry.is_character_file()) + return tChar; + if (entry.is_block_file()) + return tBlock; + if (entry.is_fifo()) + return tFifo; + if (entry.is_socket()) + return tSocket; + return tUnknown; } catch (std::filesystem::filesystem_error & e) { // We cannot always stat the child. (Ideally there is no // stat because the native directory entry has the type // already, but this isn't always the case.) if (e.code() == std::errc::permission_denied || e.code() == std::errc::operation_not_permitted) return std::nullopt; - else throw; + else + throw; } - - // cannot exhaustively enumerate because implementation-specific - // additional file types are allowed. -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wswitch-enum" - switch (nativeType) { - case std::filesystem::file_type::regular: return Type::tRegular; break; - case std::filesystem::file_type::symlink: return Type::tSymlink; break; - case std::filesystem::file_type::directory: return Type::tDirectory; break; - case std::filesystem::file_type::character: return Type::tChar; break; - case std::filesystem::file_type::block: return Type::tBlock; break; - case std::filesystem::file_type::fifo: return Type::tFifo; break; - case std::filesystem::file_type::socket: return Type::tSocket; break; - default: return tUnknown; - } -#pragma GCC diagnostic pop }(); res.emplace(entry.path().filename().string(), type); }