From 6384a65baf21b811fc1c02a7752544c47181e111 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Thu, 13 Mar 2025 12:55:39 +0000 Subject: [PATCH 1/4] {libutil,libexpr}: Move pos-idx,pos-table code to libutil All of this code doesn't actually depend on anything from libexpr. Because Pos is so tigtly coupled with Error, it makes sense to have in the same library. (cherry picked from commit a53b184e63114ec390e3a1b1f7cd45b8a012ab04) --- maintainers/flake-module.nix | 1 - src/libexpr/meson.build | 2 -- src/libexpr/nixexpr.cc | 35 ------------------------- src/libutil/meson.build | 3 +++ src/{libexpr => libutil}/pos-idx.hh | 1 + src/libutil/pos-table.cc | 37 +++++++++++++++++++++++++++ src/{libexpr => libutil}/pos-table.hh | 10 +++++--- 7 files changed, 48 insertions(+), 41 deletions(-) rename src/{libexpr => libutil}/pos-idx.hh (98%) create mode 100644 src/libutil/pos-table.cc rename src/{libexpr => libutil}/pos-table.hh (94%) diff --git a/maintainers/flake-module.nix b/maintainers/flake-module.nix index 2f19072ee..f89e24b8a 100644 --- a/maintainers/flake-module.nix +++ b/maintainers/flake-module.nix @@ -99,7 +99,6 @@ ''^src/libexpr/nixexpr\.cc$'' ''^src/libexpr/nixexpr\.hh$'' ''^src/libexpr/parser-state\.hh$'' - ''^src/libexpr/pos-table\.hh$'' ''^src/libexpr/primops\.cc$'' ''^src/libexpr/primops\.hh$'' ''^src/libexpr/primops/context\.cc$'' diff --git a/src/libexpr/meson.build b/src/libexpr/meson.build index b33aebc86..ec2a44cd1 100644 --- a/src/libexpr/meson.build +++ b/src/libexpr/meson.build @@ -171,8 +171,6 @@ headers = [config_h] + files( # internal: 'lexer-helpers.hh', 'nixexpr.hh', 'parser-state.hh', - 'pos-idx.hh', - 'pos-table.hh', 'primops.hh', 'print-ambiguous.hh', 'print-options.hh', diff --git a/src/libexpr/nixexpr.cc b/src/libexpr/nixexpr.cc index 063ff0753..5312b8d30 100644 --- a/src/libexpr/nixexpr.cc +++ b/src/libexpr/nixexpr.cc @@ -601,41 +601,6 @@ void ExprLambda::setDocComment(DocComment docComment) { } }; - - -/* Position table. */ - -Pos PosTable::operator[](PosIdx p) const -{ - auto origin = resolve(p); - if (!origin) - return {}; - - const auto offset = origin->offsetOf(p); - - Pos result{0, 0, origin->origin}; - auto lines = this->lines.lock(); - auto linesForInput = (*lines)[origin->offset]; - - if (linesForInput.empty()) { - auto source = result.getSource().value_or(""); - const char * begin = source.data(); - for (Pos::LinesIterator it(source), end; it != end; it++) - linesForInput.push_back(it->data() - begin); - if (linesForInput.empty()) - linesForInput.push_back(0); - } - // as above: the first line starts at byte 0 and is always present - auto lineStartOffset = std::prev( - std::upper_bound(linesForInput.begin(), linesForInput.end(), offset)); - - result.line = 1 + (lineStartOffset - linesForInput.begin()); - result.column = 1 + (offset - *lineStartOffset); - return result; -} - - - /* Symbol table. */ size_t SymbolTable::totalSize() const diff --git a/src/libutil/meson.build b/src/libutil/meson.build index ac701d8fd..33de2f882 100644 --- a/src/libutil/meson.build +++ b/src/libutil/meson.build @@ -148,6 +148,7 @@ sources = files( 'logging.cc', 'memory-source-accessor.cc', 'position.cc', + 'pos-table.cc', 'posix-source-accessor.cc', 'references.cc', 'serialise.cc', @@ -217,6 +218,8 @@ headers = [config_h] + files( 'muxable-pipe.hh', 'os-string.hh', 'pool.hh', + 'pos-idx.hh', + 'pos-table.hh', 'position.hh', 'posix-source-accessor.hh', 'processes.hh', diff --git a/src/libexpr/pos-idx.hh b/src/libutil/pos-idx.hh similarity index 98% rename from src/libexpr/pos-idx.hh rename to src/libutil/pos-idx.hh index 2faa6b7fe..c1749ba69 100644 --- a/src/libexpr/pos-idx.hh +++ b/src/libutil/pos-idx.hh @@ -1,4 +1,5 @@ #pragma once +///@file #include #include diff --git a/src/libutil/pos-table.cc b/src/libutil/pos-table.cc new file mode 100644 index 000000000..8178beb90 --- /dev/null +++ b/src/libutil/pos-table.cc @@ -0,0 +1,37 @@ +#include "pos-table.hh" + +#include + +namespace nix { + +/* Position table. */ + +Pos PosTable::operator[](PosIdx p) const +{ + auto origin = resolve(p); + if (!origin) + return {}; + + const auto offset = origin->offsetOf(p); + + Pos result{0, 0, origin->origin}; + auto lines = this->lines.lock(); + auto linesForInput = (*lines)[origin->offset]; + + if (linesForInput.empty()) { + auto source = result.getSource().value_or(""); + const char * begin = source.data(); + for (Pos::LinesIterator it(source), end; it != end; it++) + linesForInput.push_back(it->data() - begin); + if (linesForInput.empty()) + linesForInput.push_back(0); + } + // as above: the first line starts at byte 0 and is always present + auto lineStartOffset = std::prev(std::upper_bound(linesForInput.begin(), linesForInput.end(), offset)); + + result.line = 1 + (lineStartOffset - linesForInput.begin()); + result.column = 1 + (offset - *lineStartOffset); + return result; +} + +} diff --git a/src/libexpr/pos-table.hh b/src/libutil/pos-table.hh similarity index 94% rename from src/libexpr/pos-table.hh rename to src/libutil/pos-table.hh index ba2b91cf3..673cf62ae 100644 --- a/src/libexpr/pos-table.hh +++ b/src/libutil/pos-table.hh @@ -1,4 +1,5 @@ #pragma once +///@file #include #include @@ -18,9 +19,12 @@ public: private: uint32_t offset; - Origin(Pos::Origin origin, uint32_t offset, size_t size): - offset(offset), origin(origin), size(size) - {} + Origin(Pos::Origin origin, uint32_t offset, size_t size) + : offset(offset) + , origin(origin) + , size(size) + { + } public: const Pos::Origin origin; From 5a3b0d9c7a23dfc840b3e13cc84eddcf9e2bab7d Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Thu, 13 Mar 2025 12:55:42 +0000 Subject: [PATCH 2/4] libutil: Document hacks and problems around Pos class This should provide context for follow-up commits in the patch series. (cherry picked from commit bf12aedf2edb10feb4605ebcde395e3b418ec58a) --- src/libutil/error.hh | 8 ++++++++ src/libutil/pos-table.hh | 11 +++++++++++ src/libutil/position.hh | 1 + 3 files changed, 20 insertions(+) diff --git a/src/libutil/error.hh b/src/libutil/error.hh index 58d902622..04fa18e35 100644 --- a/src/libutil/error.hh +++ b/src/libutil/error.hh @@ -50,6 +50,14 @@ struct LinesOfCode { std::optional nextLineOfCode; }; +/* NOTE: position.hh recursively depends on source-path.hh -> source-accessor.hh + -> hash.hh -> config.hh -> experimental-features.hh -> error.hh -> Pos. + There are other such cycles. + Thus, Pos has to be an incomplete type in this header. But since ErrorInfo/Trace + have to refer to Pos, they have to use pointer indirection via std::shared_ptr + to break the recursive header dependency. + FIXME: Untangle this mess. Should there be AbstractPos as there used to be before + 4feb7d9f71? */ struct Pos; void printCodeLines(std::ostream & out, diff --git a/src/libutil/pos-table.hh b/src/libutil/pos-table.hh index 673cf62ae..a6fe09d79 100644 --- a/src/libutil/pos-table.hh +++ b/src/libutil/pos-table.hh @@ -76,6 +76,17 @@ public: return PosIdx(1 + origin.offset + offset); } + /** + * Convert a byte-offset PosIdx into a Pos with line/column information. + * + * @param p Byte offset into the virtual concatenation of all parsed contents + * @return Position + * + * @warning Very expensive to call, as this has to read the entire source + * into memory each time. Call this only if absolutely necessary. Prefer + * to keep PosIdx around instead of needlessly converting it into Pos by + * using this lookup method. + */ Pos operator[](PosIdx p) const; Pos::Origin originOf(PosIdx p) const diff --git a/src/libutil/position.hh b/src/libutil/position.hh index 25217069c..2ac68d15a 100644 --- a/src/libutil/position.hh +++ b/src/libutil/position.hh @@ -50,6 +50,7 @@ struct Pos explicit operator bool() const { return line > 0; } + /* TODO: Why std::shared_ptr and not std::shared_ptr? */ operator std::shared_ptr() const; /** From bee21af2a16707183752cbda5aa0c29ed80e7ddc Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Thu, 13 Mar 2025 12:55:45 +0000 Subject: [PATCH 3/4] libutil: Fix Pos::getSourcePath Previous implementation didn't actually check if std::get_if returned a nullptr: std::optional getSourcePath() const { return *std::get_if(&origin); } (cherry picked from commit 50123f2a566bd9157ef6ed64d95799473e5d8670) --- src/libutil/position.cc | 7 +++++++ src/libutil/position.hh | 4 +--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/libutil/position.cc b/src/libutil/position.cc index 946f167b6..275985c8c 100644 --- a/src/libutil/position.cc +++ b/src/libutil/position.cc @@ -66,6 +66,13 @@ std::optional Pos::getSource() const }, origin); } +std::optional Pos::getSourcePath() const +{ + if (auto * path = std::get_if(&origin)) + return *path; + return std::nullopt; +} + void Pos::print(std::ostream & out, bool showOrigin) const { if (showOrigin) { diff --git a/src/libutil/position.hh b/src/libutil/position.hh index 2ac68d15a..07e261c4c 100644 --- a/src/libutil/position.hh +++ b/src/libutil/position.hh @@ -70,9 +70,7 @@ struct Pos /** * Get the SourcePath, if the source was loaded from a file. */ - std::optional getSourcePath() const { - return *std::get_if(&origin); - } + std::optional getSourcePath() const; struct LinesIterator { using difference_type = size_t; From 9d54f34ed0bee8fb6a7038ec58b129e67968de77 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Thu, 13 Mar 2025 16:24:30 +0000 Subject: [PATCH 4/4] {libexpr,libcmd}: Make debugger significantly faster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The underlying issue is that debugger code path was calling PosTable::operator[] in each eval method. This has become incredibly expensive since 5d9fdab3de. While we are it it, I've reworked the code to not use std::shared_ptr where it really isn't necessary. As I've documented in previous commits, this is actually more a workaround for recursive header dependencies now and is only necessary in `error.hh` code. Some ad-hoc benchmarking: After this commit: ``` Benchmark 1: nix eval nixpkgs#hello --impure --ignore-try --no-eval-cache --debugger Time (mean ± σ): 784.2 ms ± 7.1 ms [User: 561.4 ms, System: 147.7 ms] Range (min … max): 773.5 ms … 792.6 ms 10 runs ``` On master 3604c7c51: ``` Benchmark 1: nix eval nixpkgs#hello --impure --ignore-try --no-eval-cache --debugger Time (mean ± σ): 22.914 s ± 0.178 s [User: 18.524 s, System: 4.151 s] Range (min … max): 22.738 s … 23.290 s 10 runs ``` (cherry picked from commit adbd08399c1817bc4dc5a1a3a32b160eaed49c6f) --- src/libcmd/repl.cc | 11 ++++------ src/libexpr/eval-error.cc | 2 +- src/libexpr/eval.cc | 44 +++++++++++++++++++++------------------ src/libexpr/eval.hh | 19 ++++++++++++++++- 4 files changed, 47 insertions(+), 29 deletions(-) diff --git a/src/libcmd/repl.cc b/src/libcmd/repl.cc index f292f06bb..1e984201c 100644 --- a/src/libcmd/repl.cc +++ b/src/libcmd/repl.cc @@ -138,16 +138,13 @@ static std::ostream & showDebugTrace(std::ostream & out, const PosTable & positi out << ANSI_RED "error: " << ANSI_NORMAL; out << dt.hint.str() << "\n"; - // prefer direct pos, but if noPos then try the expr. - auto pos = dt.pos - ? dt.pos - : positions[dt.expr.getPos() ? dt.expr.getPos() : noPos]; + auto pos = dt.getPos(positions); if (pos) { - out << *pos; - if (auto loc = pos->getCodeLines()) { + out << pos; + if (auto loc = pos.getCodeLines()) { out << "\n"; - printCodeLines(out, "", *pos, *loc); + printCodeLines(out, "", pos, *loc); out << "\n"; } } diff --git a/src/libexpr/eval-error.cc b/src/libexpr/eval-error.cc index cdb0b4772..b9742d3ea 100644 --- a/src/libexpr/eval-error.cc +++ b/src/libexpr/eval-error.cc @@ -45,7 +45,7 @@ EvalErrorBuilder & EvalErrorBuilder::withFrame(const Env & env, const Expr // TODO: check compatibility with nested debugger calls. // TODO: What side-effects?? error.state.debugTraces.push_front(DebugTrace{ - .pos = error.state.positions[expr.getPos()], + .pos = expr.getPos(), .expr = expr, .env = env, .hint = HintFmt("Fake frame for debugging purposes"), diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 8aef85dc5..ba67d0679 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -754,18 +754,26 @@ void EvalState::runDebugRepl(const Error * error, const Env & env, const Expr & if (!debugRepl || inDebugger) return; - auto dts = - error && expr.getPos() - ? std::make_unique( - *this, - DebugTrace { - .pos = error->info().pos ? error->info().pos : positions[expr.getPos()], + auto dts = [&]() -> std::unique_ptr { + if (error && expr.getPos()) { + auto trace = DebugTrace{ + .pos = [&]() -> std::variant { + if (error->info().pos) { + if (auto * pos = error->info().pos.get()) + return *pos; + return noPos; + } + return expr.getPos(); + }(), .expr = expr, .env = env, .hint = error->info().msg, - .isError = true - }) - : nullptr; + .isError = true}; + + return std::make_unique(*this, std::move(trace)); + } + return nullptr; + }(); if (error) { @@ -810,7 +818,7 @@ static std::unique_ptr makeDebugTraceStacker( EvalState & state, Expr & expr, Env & env, - std::shared_ptr && pos, + std::variant pos, const Args & ... formatArgs) { return std::make_unique(state, @@ -1087,7 +1095,7 @@ void EvalState::evalFile(const SourcePath & path, Value & v, bool mustBeTrivial) *this, *e, this->baseEnv, - e->getPos() ? std::make_shared(positions[e->getPos()]) : nullptr, + e->getPos(), "while evaluating the file '%1%':", resolvedPath.to_string()) : nullptr; @@ -1313,9 +1321,7 @@ void ExprLet::eval(EvalState & state, Env & env, Value & v) state, *this, env2, - getPos() - ? std::make_shared(state.positions[getPos()]) - : nullptr, + getPos(), "while evaluating a '%1%' expression", "let" ) @@ -1384,7 +1390,7 @@ void ExprSelect::eval(EvalState & state, Env & env, Value & v) state, *this, env, - state.positions[getPos()], + getPos(), "while evaluating the attribute '%1%'", showAttrPath(state, env, attrPath)) : nullptr; @@ -1585,7 +1591,7 @@ void EvalState::callFunction(Value & fun, std::span args, Value & vRes, try { auto dts = debugRepl ? makeDebugTraceStacker( - *this, *lambda.body, env2, positions[lambda.pos], + *this, *lambda.body, env2, lambda.pos, "while calling %s", lambda.name ? concatStrings("'", symbols[lambda.name], "'") @@ -1720,9 +1726,7 @@ void ExprCall::eval(EvalState & state, Env & env, Value & v) state, *this, env, - getPos() - ? std::make_shared(state.positions[getPos()]) - : nullptr, + getPos(), "while calling a function" ) : nullptr; @@ -2106,7 +2110,7 @@ void EvalState::forceValueDeep(Value & v) try { // If the value is a thunk, we're evaling. Otherwise no trace necessary. auto dts = debugRepl && i.value->isThunk() - ? makeDebugTraceStacker(*this, *i.value->payload.thunk.expr, *i.value->payload.thunk.env, positions[i.pos], + ? makeDebugTraceStacker(*this, *i.value->payload.thunk.expr, *i.value->payload.thunk.env, i.pos, "while evaluating the attribute '%1%'", symbols[i.name]) : nullptr; diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index 767578343..e2f7513de 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -171,11 +171,28 @@ struct RegexCache; std::shared_ptr makeRegexCache(); struct DebugTrace { - std::shared_ptr pos; + /* WARNING: Converting PosIdx -> Pos should be done with extra care. This is + due to the fact that operator[] of PosTable is incredibly expensive. */ + std::variant pos; const Expr & expr; const Env & env; HintFmt hint; bool isError; + + Pos getPos(const PosTable & table) const + { + return std::visit( + overloaded{ + [&](PosIdx idx) { + // Prefer direct pos, but if noPos then try the expr. + if (!idx) + idx = expr.getPos(); + return table[idx]; + }, + [&](Pos pos) { return pos; }, + }, + pos); + } }; class EvalState : public std::enable_shared_from_this