1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-07-07 14:21:48 +02:00

libutil: Use caching directory_entry API in PosixSourceAccessor::readDirectory

Previous use of symlink_status() always translated into a stat call, leading
to huge performance penalties for by-name-overlay in nixpkgs. The comment
below references the possible caching, but that seemed to be erroneous, since
the correct way to make use of the caching API is by calling a bunch of `is_*`
functions [1]. For example, here's how libstdc++ does that [2], [3].

This translates to great nixpkgs eval performance improvements:

```
Benchmark 1: GC_INITIAL_HEAP_SIZE=4G result/bin/nix-instantiate ../nixpkgs -A hello --readonly-mode
  Time (mean ± σ):     186.7 ms ±   6.7 ms    [User: 121.3 ms, System: 64.9 ms]
  Range (min … max):   179.4 ms … 201.6 ms    16 runs

Benchmark 2: GC_INITIAL_HEAP_SIZE=4G nix-instantiate ../nixpkgs -A hello --readonly-mode
  Time (mean ± σ):     230.6 ms ±   5.0 ms    [User: 126.9 ms, System: 103.1 ms]
  Range (min … max):   225.1 ms … 241.4 ms    13 runs
```

[1]: https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0317r1.html
[2]: 8ea555b7b4/libstdc%2B%2B-v3/include/bits/fs_dir.h (L341-L348)
[3]: 8ea555b7b4/libstdc%2B%2B-v3/include/bits/fs_dir.h (L161-L163)
This commit is contained in:
Sergei Zimmerman 2025-06-30 23:29:07 +03:00
parent acfdacc971
commit 8708e9a526
No known key found for this signature in database
GPG key ID: A9B0B557CA632325

View file

@ -141,33 +141,44 @@ SourceAccessor::DirEntries PosixSourceAccessor::readDirectory(const CanonPath &
for (auto & entry : DirectoryIterator{makeAbsPath(path)}) {
checkInterrupt();
auto type = [&]() -> std::optional<Type> {
std::filesystem::file_type nativeType;
try {
nativeType = entry.symlink_status().type();
/* WARNING: We are specifically not calling symlink_status()
* here, because that always translates to `stat` call and
* doesn't make use of any caching. Instead, we have to
* rely on the myriad of `is_*` functions, which actually do
* the caching. If you are in doubt then take a look at the
* libstdc++ implementation [1] and the standard proposal
* about the caching variations of directory_entry [2].
* [1]: https://github.com/gcc-mirror/gcc/blob/8ea555b7b4725dbc5d9286f729166cd54ce5b615/libstdc%2B%2B-v3/include/bits/fs_dir.h#L341-L348
* [2]: https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2016/p0317r1.html
*/
/* Check for symlink first, because other getters follow symlinks. */
if (entry.is_symlink())
return tSymlink;
if (entry.is_regular_file())
return tRegular;
if (entry.is_directory())
return tDirectory;
if (entry.is_character_file())
return tChar;
if (entry.is_block_file())
return tBlock;
if (entry.is_fifo())
return tFifo;
if (entry.is_socket())
return tSocket;
return tUnknown;
} catch (std::filesystem::filesystem_error & e) {
// We cannot always stat the child. (Ideally there is no
// stat because the native directory entry has the type
// already, but this isn't always the case.)
if (e.code() == std::errc::permission_denied || e.code() == std::errc::operation_not_permitted)
return std::nullopt;
else throw;
else
throw;
}
// cannot exhaustively enumerate because implementation-specific
// additional file types are allowed.
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wswitch-enum"
switch (nativeType) {
case std::filesystem::file_type::regular: return Type::tRegular; break;
case std::filesystem::file_type::symlink: return Type::tSymlink; break;
case std::filesystem::file_type::directory: return Type::tDirectory; break;
case std::filesystem::file_type::character: return Type::tChar; break;
case std::filesystem::file_type::block: return Type::tBlock; break;
case std::filesystem::file_type::fifo: return Type::tFifo; break;
case std::filesystem::file_type::socket: return Type::tSocket; break;
default: return tUnknown;
}
#pragma GCC diagnostic pop
}();
res.emplace(entry.path().filename().string(), type);
}