1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-06-24 22:11:15 +02:00

libstore: Use boost::regex for GC root discovery

As it turns out using `std::regex` is actually the bottleneck
for root discovery. Just substituting `std::` -> `boost::`
makes root discovery twice as fast (3x if counting only userspace time).

Some rather ad-hoc measurements to motivate the switch:

(On master)

```
nix build github:nixos/nix/1e822bd4149a8bce1da81ee2ad9404986b07914c#nix-cli --out-link result-1e822bd4149a8bce1da81ee2ad9404986b07914c
taskset -c 2,3 hyperfine "result-1e822bd4149a8bce1da81ee2ad9404986b07914c/bin/nix store gc --dry-run --max 0"
Benchmark 1: result-1e822bd4149a8bce1da81ee2ad9404986b07914c/bin/nix store gc --dry-run --max 0
  Time (mean ± σ):     481.6 ms ±   3.9 ms    [User: 336.2 ms, System: 142.0 ms]
  Range (min … max):   474.6 ms … 487.7 ms    10 runs
```

(After this patch)

```
taskset -c 2,3 hyperfine "result/bin/nix store gc --dry-run --max 0"
Benchmark 1: result/bin/nix store gc --dry-run --max 0
  Time (mean ± σ):     254.7 ms ±   9.7 ms    [User: 111.1 ms, System: 141.3 ms]
  Range (min … max):   246.5 ms … 281.3 ms    10 runs
```

`boost::regex` is a drop-in replacement for `std::regex`, but much faster.
Doing a simple before/after comparison doesn't surface any change in behavior:

```
result/bin/nix store gc --dry-run -vvvvv --max 0 |& grep "got additional" | wc -l
result-1e822bd4149a8bce1da81ee2ad9404986b07914c/bin/nix store gc --dry-run -vvvvv --max 0 |& grep "got additional" | wc -l
```
This commit is contained in:
Sergei Zimmerman 2025-05-06 21:58:52 +00:00
parent 1e822bd414
commit 3a1301cd6d
No known key found for this signature in database
GPG key ID: A9B0B557CA632325

View file

@ -13,10 +13,11 @@
# include "nix/util/processes.hh"
#endif
#include <boost/regex.hpp>
#include <functional>
#include <queue>
#include <algorithm>
#include <regex>
#include <random>
#include <climits>
@ -331,8 +332,8 @@ static void readProcLink(const std::filesystem::path & file, UncheckedRoots & ro
static std::string quoteRegexChars(const std::string & raw)
{
static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])");
return std::regex_replace(raw, specialRegex, R"(\$&)");
static auto specialRegex = boost::regex(R"([.^$\\*+?()\[\]{}|])");
return boost::regex_replace(raw, specialRegex, R"(\$&)");
}
#ifdef __linux__
@ -354,12 +355,12 @@ void LocalStore::findRuntimeRoots(Roots & roots, bool censor)
auto procDir = AutoCloseDir{opendir("/proc")};
if (procDir) {
struct dirent * ent;
auto digitsRegex = std::regex(R"(^\d+$)");
auto mapRegex = std::regex(R"(^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(/\S+)\s*$)");
auto storePathRegex = std::regex(quoteRegexChars(storeDir) + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)");
static const auto digitsRegex = boost::regex(R"(^\d+$)");
static const auto mapRegex = boost::regex(R"(^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(/\S+)\s*$)");
auto storePathRegex = boost::regex(quoteRegexChars(storeDir) + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)");
while (errno = 0, ent = readdir(procDir.get())) {
checkInterrupt();
if (std::regex_match(ent->d_name, digitsRegex)) {
if (boost::regex_match(ent->d_name, digitsRegex)) {
try {
readProcLink(fmt("/proc/%s/exe" ,ent->d_name), unchecked);
readProcLink(fmt("/proc/%s/cwd", ent->d_name), unchecked);
@ -386,15 +387,15 @@ void LocalStore::findRuntimeRoots(Roots & roots, bool censor)
std::filesystem::path mapFile = fmt("/proc/%s/maps", ent->d_name);
auto mapLines = tokenizeString<std::vector<std::string>>(readFile(mapFile.string()), "\n");
for (const auto & line : mapLines) {
auto match = std::smatch{};
if (std::regex_match(line, match, mapRegex))
auto match = boost::smatch{};
if (boost::regex_match(line, match, mapRegex))
unchecked[match[1]].emplace(mapFile.string());
}
auto envFile = fmt("/proc/%s/environ", ent->d_name);
auto envString = readFile(envFile);
auto env_end = std::sregex_iterator{};
for (auto i = std::sregex_iterator{envString.begin(), envString.end(), storePathRegex}; i != env_end; ++i)
auto env_end = boost::sregex_iterator{};
for (auto i = boost::sregex_iterator{envString.begin(), envString.end(), storePathRegex}; i != env_end; ++i)
unchecked[i->str()].emplace(envFile);
} catch (SystemError & e) {
if (errno == ENOENT || errno == EACCES || errno == ESRCH)
@ -413,12 +414,12 @@ void LocalStore::findRuntimeRoots(Roots & roots, bool censor)
// Because of this we disable lsof when running the tests.
if (getEnv("_NIX_TEST_NO_LSOF") != "1") {
try {
std::regex lsofRegex(R"(^n(/.*)$)");
boost::regex lsofRegex(R"(^n(/.*)$)");
auto lsofLines =
tokenizeString<std::vector<std::string>>(runProgram(LSOF, true, { "-n", "-w", "-F", "n" }), "\n");
for (const auto & line : lsofLines) {
std::smatch match;
if (std::regex_match(line, match, lsofRegex))
boost::smatch match;
if (boost::regex_match(line, match, lsofRegex))
unchecked[match[1].str()].emplace("{lsof}");
}
} catch (ExecError & e) {