mirror of
https://github.com/NixOS/nix
synced 2025-06-25 02:21:16 +02:00
libstore: Use boost::regex
for GC root discovery
As it turns out using `std::regex` is actually the bottleneck for root discovery. Just substituting `std::` -> `boost::` makes root discovery twice as fast (3x if counting only userspace time). Some rather ad-hoc measurements to motivate the switch: (On master) ``` nix build github:nixos/nix/1e822bd4149a8bce1da81ee2ad9404986b07914c#nix-cli --out-link result-1e822bd4149a8bce1da81ee2ad9404986b07914c taskset -c 2,3 hyperfine "result-1e822bd4149a8bce1da81ee2ad9404986b07914c/bin/nix store gc --dry-run --max 0" Benchmark 1: result-1e822bd4149a8bce1da81ee2ad9404986b07914c/bin/nix store gc --dry-run --max 0 Time (mean ± σ): 481.6 ms ± 3.9 ms [User: 336.2 ms, System: 142.0 ms] Range (min … max): 474.6 ms … 487.7 ms 10 runs ``` (After this patch) ``` taskset -c 2,3 hyperfine "result/bin/nix store gc --dry-run --max 0" Benchmark 1: result/bin/nix store gc --dry-run --max 0 Time (mean ± σ): 254.7 ms ± 9.7 ms [User: 111.1 ms, System: 141.3 ms] Range (min … max): 246.5 ms … 281.3 ms 10 runs ``` `boost::regex` is a drop-in replacement for `std::regex`, but much faster. Doing a simple before/after comparison doesn't surface any change in behavior: ``` result/bin/nix store gc --dry-run -vvvvv --max 0 |& grep "got additional" | wc -l result-1e822bd4149a8bce1da81ee2ad9404986b07914c/bin/nix store gc --dry-run -vvvvv --max 0 |& grep "got additional" | wc -l ```
This commit is contained in:
parent
1e822bd414
commit
3a1301cd6d
1 changed files with 15 additions and 14 deletions
|
@ -13,10 +13,11 @@
|
||||||
# include "nix/util/processes.hh"
|
# include "nix/util/processes.hh"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include <boost/regex.hpp>
|
||||||
|
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <queue>
|
#include <queue>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <regex>
|
|
||||||
#include <random>
|
#include <random>
|
||||||
|
|
||||||
#include <climits>
|
#include <climits>
|
||||||
|
@ -331,8 +332,8 @@ static void readProcLink(const std::filesystem::path & file, UncheckedRoots & ro
|
||||||
|
|
||||||
static std::string quoteRegexChars(const std::string & raw)
|
static std::string quoteRegexChars(const std::string & raw)
|
||||||
{
|
{
|
||||||
static auto specialRegex = std::regex(R"([.^$\\*+?()\[\]{}|])");
|
static auto specialRegex = boost::regex(R"([.^$\\*+?()\[\]{}|])");
|
||||||
return std::regex_replace(raw, specialRegex, R"(\$&)");
|
return boost::regex_replace(raw, specialRegex, R"(\$&)");
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
|
@ -354,12 +355,12 @@ void LocalStore::findRuntimeRoots(Roots & roots, bool censor)
|
||||||
auto procDir = AutoCloseDir{opendir("/proc")};
|
auto procDir = AutoCloseDir{opendir("/proc")};
|
||||||
if (procDir) {
|
if (procDir) {
|
||||||
struct dirent * ent;
|
struct dirent * ent;
|
||||||
auto digitsRegex = std::regex(R"(^\d+$)");
|
static const auto digitsRegex = boost::regex(R"(^\d+$)");
|
||||||
auto mapRegex = std::regex(R"(^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(/\S+)\s*$)");
|
static const auto mapRegex = boost::regex(R"(^\s*\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+(/\S+)\s*$)");
|
||||||
auto storePathRegex = std::regex(quoteRegexChars(storeDir) + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)");
|
auto storePathRegex = boost::regex(quoteRegexChars(storeDir) + R"(/[0-9a-z]+[0-9a-zA-Z\+\-\._\?=]*)");
|
||||||
while (errno = 0, ent = readdir(procDir.get())) {
|
while (errno = 0, ent = readdir(procDir.get())) {
|
||||||
checkInterrupt();
|
checkInterrupt();
|
||||||
if (std::regex_match(ent->d_name, digitsRegex)) {
|
if (boost::regex_match(ent->d_name, digitsRegex)) {
|
||||||
try {
|
try {
|
||||||
readProcLink(fmt("/proc/%s/exe" ,ent->d_name), unchecked);
|
readProcLink(fmt("/proc/%s/exe" ,ent->d_name), unchecked);
|
||||||
readProcLink(fmt("/proc/%s/cwd", ent->d_name), unchecked);
|
readProcLink(fmt("/proc/%s/cwd", ent->d_name), unchecked);
|
||||||
|
@ -386,15 +387,15 @@ void LocalStore::findRuntimeRoots(Roots & roots, bool censor)
|
||||||
std::filesystem::path mapFile = fmt("/proc/%s/maps", ent->d_name);
|
std::filesystem::path mapFile = fmt("/proc/%s/maps", ent->d_name);
|
||||||
auto mapLines = tokenizeString<std::vector<std::string>>(readFile(mapFile.string()), "\n");
|
auto mapLines = tokenizeString<std::vector<std::string>>(readFile(mapFile.string()), "\n");
|
||||||
for (const auto & line : mapLines) {
|
for (const auto & line : mapLines) {
|
||||||
auto match = std::smatch{};
|
auto match = boost::smatch{};
|
||||||
if (std::regex_match(line, match, mapRegex))
|
if (boost::regex_match(line, match, mapRegex))
|
||||||
unchecked[match[1]].emplace(mapFile.string());
|
unchecked[match[1]].emplace(mapFile.string());
|
||||||
}
|
}
|
||||||
|
|
||||||
auto envFile = fmt("/proc/%s/environ", ent->d_name);
|
auto envFile = fmt("/proc/%s/environ", ent->d_name);
|
||||||
auto envString = readFile(envFile);
|
auto envString = readFile(envFile);
|
||||||
auto env_end = std::sregex_iterator{};
|
auto env_end = boost::sregex_iterator{};
|
||||||
for (auto i = std::sregex_iterator{envString.begin(), envString.end(), storePathRegex}; i != env_end; ++i)
|
for (auto i = boost::sregex_iterator{envString.begin(), envString.end(), storePathRegex}; i != env_end; ++i)
|
||||||
unchecked[i->str()].emplace(envFile);
|
unchecked[i->str()].emplace(envFile);
|
||||||
} catch (SystemError & e) {
|
} catch (SystemError & e) {
|
||||||
if (errno == ENOENT || errno == EACCES || errno == ESRCH)
|
if (errno == ENOENT || errno == EACCES || errno == ESRCH)
|
||||||
|
@ -413,12 +414,12 @@ void LocalStore::findRuntimeRoots(Roots & roots, bool censor)
|
||||||
// Because of this we disable lsof when running the tests.
|
// Because of this we disable lsof when running the tests.
|
||||||
if (getEnv("_NIX_TEST_NO_LSOF") != "1") {
|
if (getEnv("_NIX_TEST_NO_LSOF") != "1") {
|
||||||
try {
|
try {
|
||||||
std::regex lsofRegex(R"(^n(/.*)$)");
|
boost::regex lsofRegex(R"(^n(/.*)$)");
|
||||||
auto lsofLines =
|
auto lsofLines =
|
||||||
tokenizeString<std::vector<std::string>>(runProgram(LSOF, true, { "-n", "-w", "-F", "n" }), "\n");
|
tokenizeString<std::vector<std::string>>(runProgram(LSOF, true, { "-n", "-w", "-F", "n" }), "\n");
|
||||||
for (const auto & line : lsofLines) {
|
for (const auto & line : lsofLines) {
|
||||||
std::smatch match;
|
boost::smatch match;
|
||||||
if (std::regex_match(line, match, lsofRegex))
|
if (boost::regex_match(line, match, lsofRegex))
|
||||||
unchecked[match[1].str()].emplace("{lsof}");
|
unchecked[match[1].str()].emplace("{lsof}");
|
||||||
}
|
}
|
||||||
} catch (ExecError & e) {
|
} catch (ExecError & e) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue