From 5e74c0e4d6825d3531e33424bf7182a37901ef5d Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Sun, 18 May 2025 22:24:14 +0000 Subject: [PATCH] libexpr: Add `SampleStack` stack-sampling profiler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds support for a native stack sampling profiler to the evaluator, which saves a collapsed stack profile information to a configurable location. Introduced options (in `EvalSettings`): - `eval-profile-file` - path to the collected profile file. - `eval-profiler-frequency` - sampling frequency. - `eval-profiler` - enumeration option for enabling the profiler. Currently only `flamegraph` is supported, but having this an enumeration rather than a boolean switch leaves the door open for other profiler variants (e.g. tracy). Profile includes the following information on best-effort basis (e.g. some lambdas might have an undefined name). Callstack information contains: - Call site location (where the function gets called). - Primop/lambda name of the function being called. - Functors/partial applications don't have a name attached to them unlike special-cased primops and lambads. For cases where callsite location isn't available we have to resort to providing the location where the lambda itself is defined. This removes some of the confusing `«none»:0` locations in the profile from previous attempts. Example usage with piping directly into zstd for compression: ``` nix eval --no-eval-cache nixpkgs#nixosTests.gnome \ --eval-profiler flamegraph \ --eval-profile-file >(zstd -of nix.profile.zstd) ``` Co-authored-by: Jörg Thalheim --- src/libexpr/eval-profiler-settings.cc | 49 ++++ src/libexpr/eval-profiler.cc | 265 ++++++++++++++++++ src/libexpr/eval.cc | 12 +- .../nix/expr/eval-profiler-settings.hh | 16 ++ src/libexpr/include/nix/expr/eval-profiler.hh | 4 + src/libexpr/include/nix/expr/eval-settings.hh | 22 ++ src/libexpr/include/nix/expr/eval.hh | 2 +- src/libexpr/include/nix/expr/meson.build | 1 + src/libexpr/meson.build | 1 + 9 files changed, 369 insertions(+), 3 deletions(-) create mode 100644 src/libexpr/eval-profiler-settings.cc create mode 100644 src/libexpr/include/nix/expr/eval-profiler-settings.hh diff --git a/src/libexpr/eval-profiler-settings.cc b/src/libexpr/eval-profiler-settings.cc new file mode 100644 index 000000000..1a35d4a2d --- /dev/null +++ b/src/libexpr/eval-profiler-settings.cc @@ -0,0 +1,49 @@ +#include "nix/expr/eval-profiler-settings.hh" +#include "nix/util/configuration.hh" +#include "nix/util/logging.hh" /* Needs to be included before config-impl.hh */ +#include "nix/util/config-impl.hh" +#include "nix/util/abstract-setting-to-json.hh" + +#include + +namespace nix { + +template<> +EvalProfilerMode BaseSetting::parse(const std::string & str) const +{ + if (str == "disabled") + return EvalProfilerMode::disabled; + else if (str == "flamegraph") + return EvalProfilerMode::flamegraph; + else + throw UsageError("option '%s' has invalid value '%s'", name, str); +} + +template<> +struct BaseSetting::trait +{ + static constexpr bool appendable = false; +}; + +template<> +std::string BaseSetting::to_string() const +{ + if (value == EvalProfilerMode::disabled) + return "disabled"; + else if (value == EvalProfilerMode::flamegraph) + return "flamegraph"; + else + unreachable(); +} + +NLOHMANN_JSON_SERIALIZE_ENUM( + EvalProfilerMode, + { + {EvalProfilerMode::disabled, "disabled"}, + {EvalProfilerMode::flamegraph, "flamegraph"}, + }); + +/* Explicit instantiation of templates */ +template class BaseSetting; + +} diff --git a/src/libexpr/eval-profiler.cc b/src/libexpr/eval-profiler.cc index 5374d7d99..f08b46e6b 100644 --- a/src/libexpr/eval-profiler.cc +++ b/src/libexpr/eval-profiler.cc @@ -1,5 +1,7 @@ #include "nix/expr/eval-profiler.hh" #include "nix/expr/nixexpr.hh" +#include "nix/expr/eval.hh" +#include "nix/util/lru-cache.hh" namespace nix { @@ -45,4 +47,267 @@ void MultiEvalProfiler::addProfiler(ref profiler) invalidateNeededHooks(); } +namespace { + +class PosCache : private LRUCache +{ + const EvalState & state; + +public: + PosCache(const EvalState & state) + : LRUCache(524288) /* ~40MiB */ + , state(state) + { + } + + Pos lookup(PosIdx posIdx) + { + auto posOrNone = LRUCache::get(posIdx); + if (posOrNone) + return *posOrNone; + + auto pos = state.positions[posIdx]; + upsert(posIdx, pos); + return pos; + } +}; + +struct LambdaFrameInfo +{ + ExprLambda * expr; + /** Position where the lambda has been called. */ + PosIdx callPos = noPos; + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; + auto operator<=>(const LambdaFrameInfo & rhs) const = default; +}; + +/** Primop call. */ +struct PrimOpFrameInfo +{ + const PrimOp * expr; + /** Position where the primop has been called. */ + PosIdx callPos = noPos; + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; + auto operator<=>(const PrimOpFrameInfo & rhs) const = default; +}; + +/** Used for functor calls (attrset with __functor attr). */ +struct FunctorFrameInfo +{ + PosIdx pos; + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; + auto operator<=>(const FunctorFrameInfo & rhs) const = default; +}; + +/** Fallback frame info. */ +struct GenericFrameInfo +{ + PosIdx pos; + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; + auto operator<=>(const GenericFrameInfo & rhs) const = default; +}; + +using FrameInfo = std::variant; +using FrameStack = std::vector; + +/** + * Stack sampling profiler. + */ +class SampleStack : public EvalProfiler +{ + /* How often stack profiles should be flushed to file. This avoids the need + to persist stack samples across the whole evaluation at the cost + of periodically flushing data to disk. */ + static constexpr std::chrono::microseconds profileDumpInterval = std::chrono::milliseconds(2000); + + Hooks getNeededHooksImpl() const override + { + return Hooks().set(preFunctionCall).set(postFunctionCall); + } + +public: + SampleStack(const EvalState & state, std::filesystem::path profileFile, std::chrono::nanoseconds period) + : state(state) + , sampleInterval(period) + , profileFd([&]() { + AutoCloseFD fd = toDescriptor(open(profileFile.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0660)); + if (!fd) + throw SysError("opening file %s", profileFile); + return fd; + }()) + , posCache(state) + { + } + + [[gnu::noinline]] void + preFunctionCallHook(const EvalState & state, const Value & v, std::span args, const PosIdx pos) override; + [[gnu::noinline]] void + postFunctionCallHook(const EvalState & state, const Value & v, std::span args, const PosIdx pos) override; + + void maybeSaveProfile(std::chrono::time_point now); + void saveProfile(); + FrameInfo getFrameInfoFromValueAndPos(const Value & v, PosIdx pos); + + SampleStack(SampleStack &&) = default; + SampleStack & operator=(SampleStack &&) = delete; + SampleStack(const SampleStack &) = delete; + SampleStack & operator=(const SampleStack &) = delete; + ~SampleStack(); + +private: + /** Hold on to an instance of EvalState for symbolizing positions. */ + const EvalState & state; + std::chrono::nanoseconds sampleInterval; + AutoCloseFD profileFd; + FrameStack stack; + std::map callCount; + std::chrono::time_point lastStackSample = + std::chrono::high_resolution_clock::now(); + std::chrono::time_point lastDump = std::chrono::high_resolution_clock::now(); + PosCache posCache; +}; + +FrameInfo SampleStack::getFrameInfoFromValueAndPos(const Value & v, PosIdx pos) +{ + /* NOTE: No actual references to garbage collected values are not held in + the profiler. */ + if (v.isLambda()) + return LambdaFrameInfo{.expr = v.payload.lambda.fun, .callPos = pos}; + else if (v.isPrimOp()) + return PrimOpFrameInfo{.expr = v.primOp(), .callPos = pos}; + else if (v.isPrimOpApp()) + /* Resolve primOp eagerly. Must not hold on to a reference to a Value. */ + return PrimOpFrameInfo{.expr = v.primOpAppPrimOp(), .callPos = pos}; + else if (state.isFunctor(v)) { + const auto functor = v.attrs()->get(state.sFunctor); + if (auto pos_ = posCache.lookup(pos); std::holds_alternative(pos_.origin)) + /* HACK: In case callsite position is unresolved. */ + return FunctorFrameInfo{.pos = functor->pos}; + return FunctorFrameInfo{.pos = pos}; + } else + /* NOTE: Add a stack frame even for invalid cases (e.g. when calling a non-function). This is what + * trace-function-calls does. */ + return GenericFrameInfo{.pos = pos}; +} + +[[gnu::noinline]] void SampleStack::preFunctionCallHook( + const EvalState & state, const Value & v, [[maybe_unused]] std::span args, const PosIdx pos) +{ + stack.push_back(getFrameInfoFromValueAndPos(v, pos)); + + auto now = std::chrono::high_resolution_clock::now(); + + if (now - lastStackSample > sampleInterval) { + callCount[stack] += 1; + lastStackSample = now; + } + + /* Do this in preFunctionCallHook because we might throw an exception, but + callFunction uses Finally, which doesn't play well with exceptions. */ + maybeSaveProfile(now); +} + +[[gnu::noinline]] void +SampleStack::postFunctionCallHook(const EvalState & state, const Value & v, std::span args, const PosIdx pos) +{ + + if (!stack.empty()) + stack.pop_back(); +} + +std::ostream & LambdaFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const +{ + if (auto pos = posCache.lookup(callPos); std::holds_alternative(pos.origin)) + /* HACK: To avoid dubious «none»:0 in the generated profile if the origin can't be resolved + resort to printing the lambda location instead of the callsite position. */ + os << posCache.lookup(expr->getPos()); + else + os << pos; + if (expr->name) + os << ":" << state.symbols[expr->name]; + return os; +} + +std::ostream & GenericFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const +{ + os << posCache.lookup(pos); + return os; +} + +std::ostream & FunctorFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const +{ + os << posCache.lookup(pos) << ":functor"; + return os; +} + +std::ostream & PrimOpFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const +{ + /* Sometimes callsite position can have an unresolved origin, which + leads to confusing «none»:0 locations in the profile. */ + auto pos = posCache.lookup(callPos); + if (!std::holds_alternative(pos.origin)) + os << posCache.lookup(callPos) << ":"; + os << *expr; + return os; +} + +void SampleStack::maybeSaveProfile(std::chrono::time_point now) +{ + if (now - lastDump >= profileDumpInterval) + saveProfile(); + else + return; + + /* Save the last dump timepoint. Do this after actually saving data to file + to not account for the time doing the flushing to disk. */ + lastDump = std::chrono::high_resolution_clock::now(); + + /* Free up memory used for stack sampling. This might be very significant for + long-running evaluations, so we shouldn't hog too much memory. */ + callCount.clear(); +} + +void SampleStack::saveProfile() +{ + auto os = std::ostringstream{}; + for (auto & [stack, count] : callCount) { + auto first = true; + for (auto & pos : stack) { + if (first) + first = false; + else + os << ";"; + + std::visit([&](auto && info) { info.symbolize(state, os, posCache); }, pos); + } + os << " " << count; + writeLine(profileFd.get(), std::move(os).str()); + /* Clear ostringstream. */ + os.str(""); + os.clear(); + } +} + +SampleStack::~SampleStack() +{ + /* Guard against cases when we are already unwinding the stack. */ + try { + saveProfile(); + } catch (...) { + ignoreExceptionInDestructor(); + } +} + +} // namespace + +ref +makeSampleStackProfiler(const EvalState & state, std::filesystem::path profileFile, uint64_t frequency) +{ + /* 0 is a special value for sampling stack after each call. */ + std::chrono::nanoseconds period = frequency == 0 + ? std::chrono::nanoseconds{0} + : std::chrono::nanoseconds{std::nano::den / frequency / std::nano::num}; + return make_ref(state, profileFile, period); +} + } diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 1b87cf42f..ff7df249a 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -376,8 +376,16 @@ EvalState::EvalState( /* Register function call tracer. */ if (settings.traceFunctionCalls) profiler.addProfiler(make_ref()); -} + switch (settings.evalProfilerMode) { + case EvalProfilerMode::flamegraph: + profiler.addProfiler(makeSampleStackProfiler( + *this, settings.evalProfileFile.get(), settings.evalProfilerFrequency)); + break; + case EvalProfilerMode::disabled: + break; + } +} EvalState::~EvalState() { @@ -2236,7 +2244,7 @@ bool EvalState::forceBool(Value & v, const PosIdx pos, std::string_view errorCtx } -bool EvalState::isFunctor(Value & fun) +bool EvalState::isFunctor(const Value & fun) const { return fun.type() == nAttrs && fun.attrs()->find(sFunctor) != fun.attrs()->end(); } diff --git a/src/libexpr/include/nix/expr/eval-profiler-settings.hh b/src/libexpr/include/nix/expr/eval-profiler-settings.hh new file mode 100644 index 000000000..a94cde042 --- /dev/null +++ b/src/libexpr/include/nix/expr/eval-profiler-settings.hh @@ -0,0 +1,16 @@ +#pragma once +///@file + +#include "nix/util/configuration.hh" + +namespace nix { + +enum struct EvalProfilerMode { disabled, flamegraph }; + +template<> +EvalProfilerMode BaseSetting::parse(const std::string & str) const; + +template<> +std::string BaseSetting::to_string() const; + +} diff --git a/src/libexpr/include/nix/expr/eval-profiler.hh b/src/libexpr/include/nix/expr/eval-profiler.hh index 763b737f7..9fee4ef35 100644 --- a/src/libexpr/include/nix/expr/eval-profiler.hh +++ b/src/libexpr/include/nix/expr/eval-profiler.hh @@ -11,6 +11,7 @@ #include #include #include +#include namespace nix { @@ -110,4 +111,7 @@ public: postFunctionCallHook(const EvalState & state, const Value & v, std::span args, const PosIdx pos) override; }; +ref +makeSampleStackProfiler(const EvalState & state, std::filesystem::path profileFile, uint64_t frequency); + } diff --git a/src/libexpr/include/nix/expr/eval-settings.hh b/src/libexpr/include/nix/expr/eval-settings.hh index 8d3db59b3..f1367b9bb 100644 --- a/src/libexpr/include/nix/expr/eval-settings.hh +++ b/src/libexpr/include/nix/expr/eval-settings.hh @@ -1,6 +1,7 @@ #pragma once ///@file +#include "nix/expr/eval-profiler-settings.hh" #include "nix/util/configuration.hh" #include "nix/util/source-path.hh" @@ -193,6 +194,27 @@ struct EvalSettings : Config `flamegraph.pl`. )"}; + Setting evalProfilerMode{this, EvalProfilerMode::disabled, "eval-profiler", + R"( + Enables evaluation profiling. The following modes are supported: + + * `flamegraph` stack sampling profiler. Outputs folded format, one line per stack (suitable for `flamegraph.pl` and compatible tools). + + Use [`eval-profile-file`](#conf-eval-profile-file) to specify where the profile is saved. + )"}; + + Setting evalProfileFile{this, "nix.profile", "eval-profile-file", + R"( + Specifies the file where [evaluation profile](#conf-eval-profiler) is saved. + )"}; + + Setting evalProfilerFrequency{this, 99, "eval-profiler-frequency", + R"( + Specifies the sampling rate in hertz for sampling evaluation profilers. + Use `0` to sample the stack after each function call. + See [`eval-profiler`](#conf-eval-profiler). + )"}; + Setting useEvalCache{this, true, "eval-cache", R"( Whether to use the flake evaluation cache. diff --git a/src/libexpr/include/nix/expr/eval.hh b/src/libexpr/include/nix/expr/eval.hh index ffbc84bcd..de865ae7b 100644 --- a/src/libexpr/include/nix/expr/eval.hh +++ b/src/libexpr/include/nix/expr/eval.hh @@ -731,7 +731,7 @@ public: */ void assertEqValues(Value & v1, Value & v2, const PosIdx pos, std::string_view errorCtx); - bool isFunctor(Value & fun); + bool isFunctor(const Value & fun) const; void callFunction(Value & fun, std::span args, Value & vRes, const PosIdx pos); diff --git a/src/libexpr/include/nix/expr/meson.build b/src/libexpr/include/nix/expr/meson.build index db902a616..333490ee4 100644 --- a/src/libexpr/include/nix/expr/meson.build +++ b/src/libexpr/include/nix/expr/meson.build @@ -14,6 +14,7 @@ headers = [config_pub_h] + files( 'eval-error.hh', 'eval-gc.hh', 'eval-inline.hh', + 'eval-profiler-settings.hh', 'eval-profiler.hh', 'eval-settings.hh', 'eval.hh', diff --git a/src/libexpr/meson.build b/src/libexpr/meson.build index dc50d2b19..f5adafae0 100644 --- a/src/libexpr/meson.build +++ b/src/libexpr/meson.build @@ -140,6 +140,7 @@ sources = files( 'eval-cache.cc', 'eval-error.cc', 'eval-gc.cc', + 'eval-profiler-settings.cc', 'eval-profiler.cc', 'eval-settings.cc', 'eval.cc',