From 3a1301cd6db698a212a0c036e40ad402bd8a2a12 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Tue, 6 May 2025 21:58:52 +0000 Subject: [PATCH 1/3] libstore: Use `boost::regex` for GC root discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As it turns out using `std::regex` is actually the bottleneck for root discovery. Just substituting `std::` -> `boost::` makes root discovery twice as fast (3x if counting only userspace time). Some rather ad-hoc measurements to motivate the switch: (On master) ``` nix build github:nixos/nix/1e822bd4149a8bce1da81ee2ad9404986b07914c#nix-cli --out-link result-1e822bd4149a8bce1da81ee2ad9404986b07914c taskset -c 2,3 hyperfine "result-1e822bd4149a8bce1da81ee2ad9404986b07914c/bin/nix store gc --dry-run --max 0" Benchmark 1: result-1e822bd4149a8bce1da81ee2ad9404986b07914c/bin/nix store gc --dry-run --max 0 Time (mean ± σ): 481.6 ms ± 3.9 ms [User: 336.2 ms, System: 142.0 ms] Range (min … max): 474.6 ms … 487.7 ms 10 runs ``` (After this patch) ``` taskset -c 2,3 hyperfine "result/bin/nix store gc --dry-run --max 0" Benchmark 1: result/bin/nix store gc --dry-run --max 0 Time (mean ± σ): 254.7 ms ± 9.7 ms [User: 111.1 ms, System: 141.3 ms] Range (min … max): 246.5 ms … 281.3 ms 10 runs ``` `boost::regex` is a drop-in replacement for `std::regex`, but much faster. Doing a simple before/after comparison doesn't surface any change in behavior: ``` result/bin/nix store gc --dry-run -vvvvv --max 0 |& grep "got additional" | wc -l result-1e822bd4149a8bce1da81ee2ad9404986b07914c/bin/nix store gc --dry-run -vvvvv --max 0 |& grep "got additional" | wc -l ``` --- src/libstore/gc.cc | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/libstore/gc.cc b/src/libstore/gc.cc index f6a4124ff..e9fe72afe 100644 --- a/src/libstore/gc.cc +++ b/src/libstore/gc.cc @@ -13,10 +13,11 @@ # include "nix/util/processes.hh" #endif +#include + #include #include #include -#include #include #include @@ -331,8 +332,8 @@ static void readProcLink(const std::filesystem::path & file, UncheckedRoots & ro static std::string quoteRegexChars(const std::string & raw) { - static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])"); - return std::regex_replace(raw, specialRegex, R"(\$&)"); + static auto specialRegex = boost::regex(R"([.^$\\*+?()\[\]{}|])"); + return boost::regex_replace(raw, specialRegex, R"(\$&)"); } #ifdef __linux__ @@ -354,12 +355,12 @@ void LocalStore::findRuntimeRoots(Roots & roots, bool censor) auto procDir = AutoCloseDir{opendir("/proc")}; if (procDir) { struct dirent * ent; - auto digitsRegex = std::regex(R"(^\d+$)"); - auto mapRegex = std::regex(R"(^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(/\S+)\s*$)"); - auto storePathRegex = std::regex(quoteRegexChars(storeDir) + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)"); + static const auto digitsRegex = boost::regex(R"(^\d+$)"); + static const auto mapRegex = boost::regex(R"(^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(/\S+)\s*$)"); + auto storePathRegex = boost::regex(quoteRegexChars(storeDir) + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)"); while (errno = 0, ent = readdir(procDir.get())) { checkInterrupt(); - if (std::regex_match(ent->d_name, digitsRegex)) { + if (boost::regex_match(ent->d_name, digitsRegex)) { try { readProcLink(fmt("/proc/%s/exe" ,ent->d_name), unchecked); readProcLink(fmt("/proc/%s/cwd", ent->d_name), unchecked); @@ -386,15 +387,15 @@ void LocalStore::findRuntimeRoots(Roots & roots, bool censor) std::filesystem::path mapFile = fmt("/proc/%s/maps", ent->d_name); auto mapLines = tokenizeString>(readFile(mapFile.string()), "\n"); for (const auto & line : mapLines) { - auto match = std::smatch{}; - if (std::regex_match(line, match, mapRegex)) + auto match = boost::smatch{}; + if (boost::regex_match(line, match, mapRegex)) unchecked[match[1]].emplace(mapFile.string()); } auto envFile = fmt("/proc/%s/environ", ent->d_name); auto envString = readFile(envFile); - auto env_end = std::sregex_iterator{}; - for (auto i = std::sregex_iterator{envString.begin(), envString.end(), storePathRegex}; i != env_end; ++i) + auto env_end = boost::sregex_iterator{}; + for (auto i = boost::sregex_iterator{envString.begin(), envString.end(), storePathRegex}; i != env_end; ++i) unchecked[i->str()].emplace(envFile); } catch (SystemError & e) { if (errno == ENOENT || errno == EACCES || errno == ESRCH) @@ -413,12 +414,12 @@ void LocalStore::findRuntimeRoots(Roots & roots, bool censor) // Because of this we disable lsof when running the tests. if (getEnv("_NIX_TEST_NO_LSOF") != "1") { try { - std::regex lsofRegex(R"(^n(/.*)$)"); + boost::regex lsofRegex(R"(^n(/.*)$)"); auto lsofLines = tokenizeString>(runProgram(LSOF, true, { "-n", "-w", "-F", "n" }), "\n"); for (const auto & line : lsofLines) { - std::smatch match; - if (std::regex_match(line, match, lsofRegex)) + boost::smatch match; + if (boost::regex_match(line, match, lsofRegex)) unchecked[match[1].str()].emplace("{lsof}"); } } catch (ExecError & e) { From f3090ef7033c9bdc04beacfbb128c688cfa40fee Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Tue, 13 May 2025 08:47:24 +0000 Subject: [PATCH 2/3] packaging/dependencies: Use boost without enableIcu This reduces the closure size on master by 40MiB. ``` $ nix build github:nixos/nix/1e822bd4149a8bce1da81ee2ad9404986b07914c#nix-store --out-link closure-on-master $ nix build .#nix-store -L --out-link closure-without-icu $ nix path-info --closure-size -h ./closure-on-master /nix/store/8gwr38m5h6p7245ji9jv28a2a11w1isx-nix-store-2.29.0pre 124.4 MiB $ nix path-info --closure-size -h ./closure-without-icu /nix/store/k0gwfykjqpnmaqbwh23nk55lhanc9g24-nix-store-2.29.0pre 86.6 MiB ``` --- packaging/dependencies.nix | 1 + 1 file changed, 1 insertion(+) diff --git a/packaging/dependencies.nix b/packaging/dependencies.nix index a90ef1b4a..7ce3bf125 100644 --- a/packaging/dependencies.nix +++ b/packaging/dependencies.nix @@ -63,6 +63,7 @@ scope: { "--with-coroutine" "--with-iostreams" ]; + enableIcu = false; }).overrideAttrs (old: { # Need to remove `--with-*` to use `--with-libraries=...` From 18a5589f9a6d710fe1f70e694cee513589c1c11c Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Tue, 13 May 2025 08:51:46 +0000 Subject: [PATCH 3/3] libstore: Depend on boost_regex explicitly --- src/libstore/meson.build | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libstore/meson.build b/src/libstore/meson.build index 255f83f74..a8781b457 100644 --- a/src/libstore/meson.build +++ b/src/libstore/meson.build @@ -94,7 +94,7 @@ subdir('nix-meson-build-support/libatomic') boost = dependency( 'boost', - modules : ['container'], + modules : ['container', 'regex'], include_type: 'system', ) # boost is a public dependency, but not a pkg-config dependency unfortunately, so we