From 0033cf4270706cd9bcbe8210399715777d6e63b5 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Thu, 13 Mar 2025 12:55:39 +0000 Subject: [PATCH 1/4] {libutil,libexpr}: Move pos-idx,pos-table code to libutil All of this code doesn't actually depend on anything from libexpr. Because Pos is so tigtly coupled with Error, it makes sense to have in the same library. (cherry picked from commit a53b184e63114ec390e3a1b1f7cd45b8a012ab04) --- maintainers/flake-module.nix | 1 - src/libexpr/meson.build | 2 -- src/libexpr/nixexpr.cc | 35 ------------------------- src/libutil/meson.build | 3 +++ src/{libexpr => libutil}/pos-idx.hh | 1 + src/libutil/pos-table.cc | 37 +++++++++++++++++++++++++++ src/{libexpr => libutil}/pos-table.hh | 10 +++++--- 7 files changed, 48 insertions(+), 41 deletions(-) rename src/{libexpr => libutil}/pos-idx.hh (98%) create mode 100644 src/libutil/pos-table.cc rename src/{libexpr => libutil}/pos-table.hh (94%) diff --git a/maintainers/flake-module.nix b/maintainers/flake-module.nix index f18a06f9d..bdc0d49a9 100644 --- a/maintainers/flake-module.nix +++ b/maintainers/flake-module.nix @@ -99,7 +99,6 @@ ''^src/libexpr/nixexpr\.cc$'' ''^src/libexpr/nixexpr\.hh$'' ''^src/libexpr/parser-state\.hh$'' - ''^src/libexpr/pos-table\.hh$'' ''^src/libexpr/primops\.cc$'' ''^src/libexpr/primops\.hh$'' ''^src/libexpr/primops/context\.cc$'' diff --git a/src/libexpr/meson.build b/src/libexpr/meson.build index 4d8a38b43..7b5557bd1 100644 --- a/src/libexpr/meson.build +++ b/src/libexpr/meson.build @@ -175,8 +175,6 @@ headers = [config_h] + files( # internal: 'lexer-helpers.hh', 'nixexpr.hh', 'parser-state.hh', - 'pos-idx.hh', - 'pos-table.hh', 'primops.hh', 'print-ambiguous.hh', 'print-options.hh', diff --git a/src/libexpr/nixexpr.cc b/src/libexpr/nixexpr.cc index dbc74faf9..5e93b2a57 100644 --- a/src/libexpr/nixexpr.cc +++ b/src/libexpr/nixexpr.cc @@ -601,41 +601,6 @@ void ExprLambda::setDocComment(DocComment docComment) { } }; - - -/* Position table. */ - -Pos PosTable::operator[](PosIdx p) const -{ - auto origin = resolve(p); - if (!origin) - return {}; - - const auto offset = origin->offsetOf(p); - - Pos result{0, 0, origin->origin}; - auto lines = this->lines.lock(); - auto linesForInput = (*lines)[origin->offset]; - - if (linesForInput.empty()) { - auto source = result.getSource().value_or(""); - const char * begin = source.data(); - for (Pos::LinesIterator it(source), end; it != end; it++) - linesForInput.push_back(it->data() - begin); - if (linesForInput.empty()) - linesForInput.push_back(0); - } - // as above: the first line starts at byte 0 and is always present - auto lineStartOffset = std::prev( - std::upper_bound(linesForInput.begin(), linesForInput.end(), offset)); - - result.line = 1 + (lineStartOffset - linesForInput.begin()); - result.column = 1 + (offset - *lineStartOffset); - return result; -} - - - /* Symbol table. */ size_t SymbolTable::totalSize() const diff --git a/src/libutil/meson.build b/src/libutil/meson.build index cba5a5288..ea38d6de4 100644 --- a/src/libutil/meson.build +++ b/src/libutil/meson.build @@ -146,6 +146,7 @@ sources = files( 'logging.cc', 'memory-source-accessor.cc', 'position.cc', + 'pos-table.cc', 'posix-source-accessor.cc', 'references.cc', 'serialise.cc', @@ -207,6 +208,8 @@ headers = [config_h] + files( 'memory-source-accessor.hh', 'muxable-pipe.hh', 'pool.hh', + 'pos-idx.hh', + 'pos-table.hh', 'position.hh', 'posix-source-accessor.hh', 'processes.hh', diff --git a/src/libexpr/pos-idx.hh b/src/libutil/pos-idx.hh similarity index 98% rename from src/libexpr/pos-idx.hh rename to src/libutil/pos-idx.hh index 2faa6b7fe..c1749ba69 100644 --- a/src/libexpr/pos-idx.hh +++ b/src/libutil/pos-idx.hh @@ -1,4 +1,5 @@ #pragma once +///@file #include #include diff --git a/src/libutil/pos-table.cc b/src/libutil/pos-table.cc new file mode 100644 index 000000000..8178beb90 --- /dev/null +++ b/src/libutil/pos-table.cc @@ -0,0 +1,37 @@ +#include "pos-table.hh" + +#include + +namespace nix { + +/* Position table. */ + +Pos PosTable::operator[](PosIdx p) const +{ + auto origin = resolve(p); + if (!origin) + return {}; + + const auto offset = origin->offsetOf(p); + + Pos result{0, 0, origin->origin}; + auto lines = this->lines.lock(); + auto linesForInput = (*lines)[origin->offset]; + + if (linesForInput.empty()) { + auto source = result.getSource().value_or(""); + const char * begin = source.data(); + for (Pos::LinesIterator it(source), end; it != end; it++) + linesForInput.push_back(it->data() - begin); + if (linesForInput.empty()) + linesForInput.push_back(0); + } + // as above: the first line starts at byte 0 and is always present + auto lineStartOffset = std::prev(std::upper_bound(linesForInput.begin(), linesForInput.end(), offset)); + + result.line = 1 + (lineStartOffset - linesForInput.begin()); + result.column = 1 + (offset - *lineStartOffset); + return result; +} + +} diff --git a/src/libexpr/pos-table.hh b/src/libutil/pos-table.hh similarity index 94% rename from src/libexpr/pos-table.hh rename to src/libutil/pos-table.hh index ba2b91cf3..673cf62ae 100644 --- a/src/libexpr/pos-table.hh +++ b/src/libutil/pos-table.hh @@ -1,4 +1,5 @@ #pragma once +///@file #include #include @@ -18,9 +19,12 @@ public: private: uint32_t offset; - Origin(Pos::Origin origin, uint32_t offset, size_t size): - offset(offset), origin(origin), size(size) - {} + Origin(Pos::Origin origin, uint32_t offset, size_t size) + : offset(offset) + , origin(origin) + , size(size) + { + } public: const Pos::Origin origin; From 3ab83f507c49325be25c65d138487aa008569c13 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Thu, 13 Mar 2025 12:55:42 +0000 Subject: [PATCH 2/4] libutil: Document hacks and problems around Pos class This should provide context for follow-up commits in the patch series. (cherry picked from commit bf12aedf2edb10feb4605ebcde395e3b418ec58a) --- src/libutil/error.hh | 8 ++++++++ src/libutil/pos-table.hh | 11 +++++++++++ src/libutil/position.hh | 1 + 3 files changed, 20 insertions(+) diff --git a/src/libutil/error.hh b/src/libutil/error.hh index 58d902622..04fa18e35 100644 --- a/src/libutil/error.hh +++ b/src/libutil/error.hh @@ -50,6 +50,14 @@ struct LinesOfCode { std::optional nextLineOfCode; }; +/* NOTE: position.hh recursively depends on source-path.hh -> source-accessor.hh + -> hash.hh -> config.hh -> experimental-features.hh -> error.hh -> Pos. + There are other such cycles. + Thus, Pos has to be an incomplete type in this header. But since ErrorInfo/Trace + have to refer to Pos, they have to use pointer indirection via std::shared_ptr + to break the recursive header dependency. + FIXME: Untangle this mess. Should there be AbstractPos as there used to be before + 4feb7d9f71? */ struct Pos; void printCodeLines(std::ostream & out, diff --git a/src/libutil/pos-table.hh b/src/libutil/pos-table.hh index 673cf62ae..a6fe09d79 100644 --- a/src/libutil/pos-table.hh +++ b/src/libutil/pos-table.hh @@ -76,6 +76,17 @@ public: return PosIdx(1 + origin.offset + offset); } + /** + * Convert a byte-offset PosIdx into a Pos with line/column information. + * + * @param p Byte offset into the virtual concatenation of all parsed contents + * @return Position + * + * @warning Very expensive to call, as this has to read the entire source + * into memory each time. Call this only if absolutely necessary. Prefer + * to keep PosIdx around instead of needlessly converting it into Pos by + * using this lookup method. + */ Pos operator[](PosIdx p) const; Pos::Origin originOf(PosIdx p) const diff --git a/src/libutil/position.hh b/src/libutil/position.hh index 25217069c..2ac68d15a 100644 --- a/src/libutil/position.hh +++ b/src/libutil/position.hh @@ -50,6 +50,7 @@ struct Pos explicit operator bool() const { return line > 0; } + /* TODO: Why std::shared_ptr and not std::shared_ptr? */ operator std::shared_ptr() const; /** From 6faf66d2f72b0bc503ab750f15684403482be607 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Thu, 13 Mar 2025 12:55:45 +0000 Subject: [PATCH 3/4] libutil: Fix Pos::getSourcePath Previous implementation didn't actually check if std::get_if returned a nullptr: std::optional getSourcePath() const { return *std::get_if(&origin); } (cherry picked from commit 50123f2a566bd9157ef6ed64d95799473e5d8670) --- src/libutil/position.cc | 7 +++++++ src/libutil/position.hh | 4 +--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/libutil/position.cc b/src/libutil/position.cc index 5a2529262..a8424d259 100644 --- a/src/libutil/position.cc +++ b/src/libutil/position.cc @@ -66,6 +66,13 @@ std::optional Pos::getSource() const }, origin); } +std::optional Pos::getSourcePath() const +{ + if (auto * path = std::get_if(&origin)) + return *path; + return std::nullopt; +} + void Pos::print(std::ostream & out, bool showOrigin) const { if (showOrigin) { diff --git a/src/libutil/position.hh b/src/libutil/position.hh index 2ac68d15a..07e261c4c 100644 --- a/src/libutil/position.hh +++ b/src/libutil/position.hh @@ -70,9 +70,7 @@ struct Pos /** * Get the SourcePath, if the source was loaded from a file. */ - std::optional getSourcePath() const { - return *std::get_if(&origin); - } + std::optional getSourcePath() const; struct LinesIterator { using difference_type = size_t; From aaf86cc0d57a36788360ada533f1bf040e11e667 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Thu, 13 Mar 2025 16:24:30 +0000 Subject: [PATCH 4/4] {libexpr,libcmd}: Make debugger significantly faster MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The underlying issue is that debugger code path was calling PosTable::operator[] in each eval method. This has become incredibly expensive since 5d9fdab3de. While we are it it, I've reworked the code to not use std::shared_ptr where it really isn't necessary. As I've documented in previous commits, this is actually more a workaround for recursive header dependencies now and is only necessary in `error.hh` code. Some ad-hoc benchmarking: After this commit: ``` Benchmark 1: nix eval nixpkgs#hello --impure --ignore-try --no-eval-cache --debugger Time (mean ± σ): 784.2 ms ± 7.1 ms [User: 561.4 ms, System: 147.7 ms] Range (min … max): 773.5 ms … 792.6 ms 10 runs ``` On master 3604c7c51: ``` Benchmark 1: nix eval nixpkgs#hello --impure --ignore-try --no-eval-cache --debugger Time (mean ± σ): 22.914 s ± 0.178 s [User: 18.524 s, System: 4.151 s] Range (min … max): 22.738 s … 23.290 s 10 runs ``` (cherry picked from commit adbd08399c1817bc4dc5a1a3a32b160eaed49c6f) --- src/libcmd/repl.cc | 11 ++++------ src/libexpr/eval-error.cc | 2 +- src/libexpr/eval.cc | 44 +++++++++++++++++++++------------------ src/libexpr/eval.hh | 19 ++++++++++++++++- 4 files changed, 47 insertions(+), 29 deletions(-) diff --git a/src/libcmd/repl.cc b/src/libcmd/repl.cc index efc04b029..a3132b2d3 100644 --- a/src/libcmd/repl.cc +++ b/src/libcmd/repl.cc @@ -157,16 +157,13 @@ static std::ostream & showDebugTrace(std::ostream & out, const PosTable & positi out << ANSI_RED "error: " << ANSI_NORMAL; out << dt.hint.str() << "\n"; - // prefer direct pos, but if noPos then try the expr. - auto pos = dt.pos - ? dt.pos - : positions[dt.expr.getPos() ? dt.expr.getPos() : noPos]; + auto pos = dt.getPos(positions); if (pos) { - out << *pos; - if (auto loc = pos->getCodeLines()) { + out << pos; + if (auto loc = pos.getCodeLines()) { out << "\n"; - printCodeLines(out, "", *pos, *loc); + printCodeLines(out, "", pos, *loc); out << "\n"; } } diff --git a/src/libexpr/eval-error.cc b/src/libexpr/eval-error.cc index cdb0b4772..b9742d3ea 100644 --- a/src/libexpr/eval-error.cc +++ b/src/libexpr/eval-error.cc @@ -45,7 +45,7 @@ EvalErrorBuilder & EvalErrorBuilder::withFrame(const Env & env, const Expr // TODO: check compatibility with nested debugger calls. // TODO: What side-effects?? error.state.debugTraces.push_front(DebugTrace{ - .pos = error.state.positions[expr.getPos()], + .pos = expr.getPos(), .expr = expr, .env = env, .hint = HintFmt("Fake frame for debugging purposes"), diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 463f361d3..1ed85d761 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -756,18 +756,26 @@ void EvalState::runDebugRepl(const Error * error, const Env & env, const Expr & if (!debugRepl || inDebugger) return; - auto dts = - error && expr.getPos() - ? std::make_unique( - *this, - DebugTrace { - .pos = error->info().pos ? error->info().pos : positions[expr.getPos()], + auto dts = [&]() -> std::unique_ptr { + if (error && expr.getPos()) { + auto trace = DebugTrace{ + .pos = [&]() -> std::variant { + if (error->info().pos) { + if (auto * pos = error->info().pos.get()) + return *pos; + return noPos; + } + return expr.getPos(); + }(), .expr = expr, .env = env, .hint = error->info().msg, - .isError = true - }) - : nullptr; + .isError = true}; + + return std::make_unique(*this, std::move(trace)); + } + return nullptr; + }(); if (error) { @@ -812,7 +820,7 @@ static std::unique_ptr makeDebugTraceStacker( EvalState & state, Expr & expr, Env & env, - std::shared_ptr && pos, + std::variant pos, const Args & ... formatArgs) { return std::make_unique(state, @@ -1088,7 +1096,7 @@ void EvalState::evalFile(const SourcePath & path, Value & v, bool mustBeTrivial) *this, *e, this->baseEnv, - e->getPos() ? std::make_shared(positions[e->getPos()]) : nullptr, + e->getPos(), "while evaluating the file '%1%':", resolvedPath.to_string()) : nullptr; @@ -1314,9 +1322,7 @@ void ExprLet::eval(EvalState & state, Env & env, Value & v) state, *this, env2, - getPos() - ? std::make_shared(state.positions[getPos()]) - : nullptr, + getPos(), "while evaluating a '%1%' expression", "let" ) @@ -1385,7 +1391,7 @@ void ExprSelect::eval(EvalState & state, Env & env, Value & v) state, *this, env, - state.positions[getPos()], + getPos(), "while evaluating the attribute '%1%'", showAttrPath(state, env, attrPath)) : nullptr; @@ -1603,7 +1609,7 @@ void EvalState::callFunction(Value & fun, size_t nrArgs, Value * * args, Value & try { auto dts = debugRepl ? makeDebugTraceStacker( - *this, *lambda.body, env2, positions[lambda.pos], + *this, *lambda.body, env2, lambda.pos, "while calling %s", lambda.name ? concatStrings("'", symbols[lambda.name], "'") @@ -1742,9 +1748,7 @@ void ExprCall::eval(EvalState & state, Env & env, Value & v) state, *this, env, - getPos() - ? std::make_shared(state.positions[getPos()]) - : nullptr, + getPos(), "while calling a function" ) : nullptr; @@ -2122,7 +2126,7 @@ void EvalState::forceValueDeep(Value & v) try { // If the value is a thunk, we're evaling. Otherwise no trace necessary. auto dts = debugRepl && i.value->isThunk() - ? makeDebugTraceStacker(*this, *i.value->payload.thunk.expr, *i.value->payload.thunk.env, positions[i.pos], + ? makeDebugTraceStacker(*this, *i.value->payload.thunk.expr, *i.value->payload.thunk.env, i.pos, "while evaluating the attribute '%1%'", symbols[i.name]) : nullptr; diff --git a/src/libexpr/eval.hh b/src/libexpr/eval.hh index 27fb336ce..4ef2869c1 100644 --- a/src/libexpr/eval.hh +++ b/src/libexpr/eval.hh @@ -155,11 +155,28 @@ struct RegexCache; std::shared_ptr makeRegexCache(); struct DebugTrace { - std::shared_ptr pos; + /* WARNING: Converting PosIdx -> Pos should be done with extra care. This is + due to the fact that operator[] of PosTable is incredibly expensive. */ + std::variant pos; const Expr & expr; const Env & env; HintFmt hint; bool isError; + + Pos getPos(const PosTable & table) const + { + return std::visit( + overloaded{ + [&](PosIdx idx) { + // Prefer direct pos, but if noPos then try the expr. + if (!idx) + idx = expr.getPos(); + return table[idx]; + }, + [&](Pos pos) { return pos; }, + }, + pos); + } }; class EvalState : public std::enable_shared_from_this