diff --git a/doc/manual/rl-next/eval-profiler.md b/doc/manual/rl-next/eval-profiler.md new file mode 100644 index 000000000..6b2bf2ce7 --- /dev/null +++ b/doc/manual/rl-next/eval-profiler.md @@ -0,0 +1,13 @@ +--- +synopsis: Add stack sampling evaluation profiler +prs: [13220] +--- + +Nix evaluator now supports stack sampling evaluation profiling via `--eval-profiler flamegraph` setting. +It collects collapsed call stack information to output file specified by +`--eval-profile-file` (`nix.profile` by default) in a format directly consumable +by `flamegraph.pl` and compatible tools like [speedscope](https://speedscope.app/). +Sampling frequency can be configured via `--eval-profiler-frequency` (99 Hz by default). + +Unlike existing `--trace-function-calls` this profiler includes the name of the function +being called when it's available. diff --git a/doc/manual/source/SUMMARY.md.in b/doc/manual/source/SUMMARY.md.in index 6711bd4be..8326a96e3 100644 --- a/doc/manual/source/SUMMARY.md.in +++ b/doc/manual/source/SUMMARY.md.in @@ -57,6 +57,7 @@ - [Tuning Cores and Jobs](advanced-topics/cores-vs-jobs.md) - [Verifying Build Reproducibility](advanced-topics/diff-hook.md) - [Using the `post-build-hook`](advanced-topics/post-build-hook.md) + - [Evaluation profiler](advanced-topics/eval-profiler.md) - [Command Reference](command-ref/index.md) - [Common Options](command-ref/opt-common.md) - [Common Environment Variables](command-ref/env-common.md) diff --git a/doc/manual/source/advanced-topics/eval-profiler.md b/doc/manual/source/advanced-topics/eval-profiler.md new file mode 100644 index 000000000..ed3848bb2 --- /dev/null +++ b/doc/manual/source/advanced-topics/eval-profiler.md @@ -0,0 +1,33 @@ +# Using the `eval-profiler` + +Nix evaluator supports [evaluation](@docroot@/language/evaluation.md) +[profiling]() +compatible with `flamegraph.pl`. The profiler samples the nix +function call stack at regular intervals. It can be enabled with the +[`eval-profiler`](@docroot@/command-ref/conf-file.md#conf-eval-profiler) +setting: + +```console +$ nix-instantiate "" -A hello --eval-profiler flamegraph +``` + +Stack sampling frequency and the output file path can be configured with +[`eval-profile-file`](@docroot@/command-ref/conf-file.md#conf-eval-profile-file) +and [`eval-profiler-frequency`](@docroot@/command-ref/conf-file.md#conf-eval-profiler-frequency). +By default the collected profile is saved to `nix.profile` file in the current working directory. + +The collected profile can be directly consumed by `flamegraph.pl`: + +```console +$ flamegraph.pl nix.profile > flamegraph.svg +``` + +The line information in the profile contains the location of the [call +site](https://en.wikipedia.org/wiki/Call_site) position and the name of the +function being called (when available). For example: + +``` +/nix/store/x9wnkly3k1gkq580m90jjn32q9f05q2v-source/pkgs/top-level/default.nix:167:5:primop import +``` + +Here `import` primop is called at `/nix/store/x9wnkly3k1gkq580m90jjn32q9f05q2v-source/pkgs/top-level/default.nix:167:5`. diff --git a/src/libexpr/eval-profiler-settings.cc b/src/libexpr/eval-profiler-settings.cc new file mode 100644 index 000000000..1a35d4a2d --- /dev/null +++ b/src/libexpr/eval-profiler-settings.cc @@ -0,0 +1,49 @@ +#include "nix/expr/eval-profiler-settings.hh" +#include "nix/util/configuration.hh" +#include "nix/util/logging.hh" /* Needs to be included before config-impl.hh */ +#include "nix/util/config-impl.hh" +#include "nix/util/abstract-setting-to-json.hh" + +#include + +namespace nix { + +template<> +EvalProfilerMode BaseSetting::parse(const std::string & str) const +{ + if (str == "disabled") + return EvalProfilerMode::disabled; + else if (str == "flamegraph") + return EvalProfilerMode::flamegraph; + else + throw UsageError("option '%s' has invalid value '%s'", name, str); +} + +template<> +struct BaseSetting::trait +{ + static constexpr bool appendable = false; +}; + +template<> +std::string BaseSetting::to_string() const +{ + if (value == EvalProfilerMode::disabled) + return "disabled"; + else if (value == EvalProfilerMode::flamegraph) + return "flamegraph"; + else + unreachable(); +} + +NLOHMANN_JSON_SERIALIZE_ENUM( + EvalProfilerMode, + { + {EvalProfilerMode::disabled, "disabled"}, + {EvalProfilerMode::flamegraph, "flamegraph"}, + }); + +/* Explicit instantiation of templates */ +template class BaseSetting; + +} diff --git a/src/libexpr/eval-profiler.cc b/src/libexpr/eval-profiler.cc index 5374d7d99..f08b46e6b 100644 --- a/src/libexpr/eval-profiler.cc +++ b/src/libexpr/eval-profiler.cc @@ -1,5 +1,7 @@ #include "nix/expr/eval-profiler.hh" #include "nix/expr/nixexpr.hh" +#include "nix/expr/eval.hh" +#include "nix/util/lru-cache.hh" namespace nix { @@ -45,4 +47,267 @@ void MultiEvalProfiler::addProfiler(ref profiler) invalidateNeededHooks(); } +namespace { + +class PosCache : private LRUCache +{ + const EvalState & state; + +public: + PosCache(const EvalState & state) + : LRUCache(524288) /* ~40MiB */ + , state(state) + { + } + + Pos lookup(PosIdx posIdx) + { + auto posOrNone = LRUCache::get(posIdx); + if (posOrNone) + return *posOrNone; + + auto pos = state.positions[posIdx]; + upsert(posIdx, pos); + return pos; + } +}; + +struct LambdaFrameInfo +{ + ExprLambda * expr; + /** Position where the lambda has been called. */ + PosIdx callPos = noPos; + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; + auto operator<=>(const LambdaFrameInfo & rhs) const = default; +}; + +/** Primop call. */ +struct PrimOpFrameInfo +{ + const PrimOp * expr; + /** Position where the primop has been called. */ + PosIdx callPos = noPos; + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; + auto operator<=>(const PrimOpFrameInfo & rhs) const = default; +}; + +/** Used for functor calls (attrset with __functor attr). */ +struct FunctorFrameInfo +{ + PosIdx pos; + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; + auto operator<=>(const FunctorFrameInfo & rhs) const = default; +}; + +/** Fallback frame info. */ +struct GenericFrameInfo +{ + PosIdx pos; + std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const; + auto operator<=>(const GenericFrameInfo & rhs) const = default; +}; + +using FrameInfo = std::variant; +using FrameStack = std::vector; + +/** + * Stack sampling profiler. + */ +class SampleStack : public EvalProfiler +{ + /* How often stack profiles should be flushed to file. This avoids the need + to persist stack samples across the whole evaluation at the cost + of periodically flushing data to disk. */ + static constexpr std::chrono::microseconds profileDumpInterval = std::chrono::milliseconds(2000); + + Hooks getNeededHooksImpl() const override + { + return Hooks().set(preFunctionCall).set(postFunctionCall); + } + +public: + SampleStack(const EvalState & state, std::filesystem::path profileFile, std::chrono::nanoseconds period) + : state(state) + , sampleInterval(period) + , profileFd([&]() { + AutoCloseFD fd = toDescriptor(open(profileFile.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0660)); + if (!fd) + throw SysError("opening file %s", profileFile); + return fd; + }()) + , posCache(state) + { + } + + [[gnu::noinline]] void + preFunctionCallHook(const EvalState & state, const Value & v, std::span args, const PosIdx pos) override; + [[gnu::noinline]] void + postFunctionCallHook(const EvalState & state, const Value & v, std::span args, const PosIdx pos) override; + + void maybeSaveProfile(std::chrono::time_point now); + void saveProfile(); + FrameInfo getFrameInfoFromValueAndPos(const Value & v, PosIdx pos); + + SampleStack(SampleStack &&) = default; + SampleStack & operator=(SampleStack &&) = delete; + SampleStack(const SampleStack &) = delete; + SampleStack & operator=(const SampleStack &) = delete; + ~SampleStack(); + +private: + /** Hold on to an instance of EvalState for symbolizing positions. */ + const EvalState & state; + std::chrono::nanoseconds sampleInterval; + AutoCloseFD profileFd; + FrameStack stack; + std::map callCount; + std::chrono::time_point lastStackSample = + std::chrono::high_resolution_clock::now(); + std::chrono::time_point lastDump = std::chrono::high_resolution_clock::now(); + PosCache posCache; +}; + +FrameInfo SampleStack::getFrameInfoFromValueAndPos(const Value & v, PosIdx pos) +{ + /* NOTE: No actual references to garbage collected values are not held in + the profiler. */ + if (v.isLambda()) + return LambdaFrameInfo{.expr = v.payload.lambda.fun, .callPos = pos}; + else if (v.isPrimOp()) + return PrimOpFrameInfo{.expr = v.primOp(), .callPos = pos}; + else if (v.isPrimOpApp()) + /* Resolve primOp eagerly. Must not hold on to a reference to a Value. */ + return PrimOpFrameInfo{.expr = v.primOpAppPrimOp(), .callPos = pos}; + else if (state.isFunctor(v)) { + const auto functor = v.attrs()->get(state.sFunctor); + if (auto pos_ = posCache.lookup(pos); std::holds_alternative(pos_.origin)) + /* HACK: In case callsite position is unresolved. */ + return FunctorFrameInfo{.pos = functor->pos}; + return FunctorFrameInfo{.pos = pos}; + } else + /* NOTE: Add a stack frame even for invalid cases (e.g. when calling a non-function). This is what + * trace-function-calls does. */ + return GenericFrameInfo{.pos = pos}; +} + +[[gnu::noinline]] void SampleStack::preFunctionCallHook( + const EvalState & state, const Value & v, [[maybe_unused]] std::span args, const PosIdx pos) +{ + stack.push_back(getFrameInfoFromValueAndPos(v, pos)); + + auto now = std::chrono::high_resolution_clock::now(); + + if (now - lastStackSample > sampleInterval) { + callCount[stack] += 1; + lastStackSample = now; + } + + /* Do this in preFunctionCallHook because we might throw an exception, but + callFunction uses Finally, which doesn't play well with exceptions. */ + maybeSaveProfile(now); +} + +[[gnu::noinline]] void +SampleStack::postFunctionCallHook(const EvalState & state, const Value & v, std::span args, const PosIdx pos) +{ + + if (!stack.empty()) + stack.pop_back(); +} + +std::ostream & LambdaFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const +{ + if (auto pos = posCache.lookup(callPos); std::holds_alternative(pos.origin)) + /* HACK: To avoid dubious «none»:0 in the generated profile if the origin can't be resolved + resort to printing the lambda location instead of the callsite position. */ + os << posCache.lookup(expr->getPos()); + else + os << pos; + if (expr->name) + os << ":" << state.symbols[expr->name]; + return os; +} + +std::ostream & GenericFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const +{ + os << posCache.lookup(pos); + return os; +} + +std::ostream & FunctorFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const +{ + os << posCache.lookup(pos) << ":functor"; + return os; +} + +std::ostream & PrimOpFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const +{ + /* Sometimes callsite position can have an unresolved origin, which + leads to confusing «none»:0 locations in the profile. */ + auto pos = posCache.lookup(callPos); + if (!std::holds_alternative(pos.origin)) + os << posCache.lookup(callPos) << ":"; + os << *expr; + return os; +} + +void SampleStack::maybeSaveProfile(std::chrono::time_point now) +{ + if (now - lastDump >= profileDumpInterval) + saveProfile(); + else + return; + + /* Save the last dump timepoint. Do this after actually saving data to file + to not account for the time doing the flushing to disk. */ + lastDump = std::chrono::high_resolution_clock::now(); + + /* Free up memory used for stack sampling. This might be very significant for + long-running evaluations, so we shouldn't hog too much memory. */ + callCount.clear(); +} + +void SampleStack::saveProfile() +{ + auto os = std::ostringstream{}; + for (auto & [stack, count] : callCount) { + auto first = true; + for (auto & pos : stack) { + if (first) + first = false; + else + os << ";"; + + std::visit([&](auto && info) { info.symbolize(state, os, posCache); }, pos); + } + os << " " << count; + writeLine(profileFd.get(), std::move(os).str()); + /* Clear ostringstream. */ + os.str(""); + os.clear(); + } +} + +SampleStack::~SampleStack() +{ + /* Guard against cases when we are already unwinding the stack. */ + try { + saveProfile(); + } catch (...) { + ignoreExceptionInDestructor(); + } +} + +} // namespace + +ref +makeSampleStackProfiler(const EvalState & state, std::filesystem::path profileFile, uint64_t frequency) +{ + /* 0 is a special value for sampling stack after each call. */ + std::chrono::nanoseconds period = frequency == 0 + ? std::chrono::nanoseconds{0} + : std::chrono::nanoseconds{std::nano::den / frequency / std::nano::num}; + return make_ref(state, profileFile, period); +} + } diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 1b87cf42f..ff7df249a 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -376,8 +376,16 @@ EvalState::EvalState( /* Register function call tracer. */ if (settings.traceFunctionCalls) profiler.addProfiler(make_ref()); -} + switch (settings.evalProfilerMode) { + case EvalProfilerMode::flamegraph: + profiler.addProfiler(makeSampleStackProfiler( + *this, settings.evalProfileFile.get(), settings.evalProfilerFrequency)); + break; + case EvalProfilerMode::disabled: + break; + } +} EvalState::~EvalState() { @@ -2236,7 +2244,7 @@ bool EvalState::forceBool(Value & v, const PosIdx pos, std::string_view errorCtx } -bool EvalState::isFunctor(Value & fun) +bool EvalState::isFunctor(const Value & fun) const { return fun.type() == nAttrs && fun.attrs()->find(sFunctor) != fun.attrs()->end(); } diff --git a/src/libexpr/include/nix/expr/eval-profiler-settings.hh b/src/libexpr/include/nix/expr/eval-profiler-settings.hh new file mode 100644 index 000000000..a94cde042 --- /dev/null +++ b/src/libexpr/include/nix/expr/eval-profiler-settings.hh @@ -0,0 +1,16 @@ +#pragma once +///@file + +#include "nix/util/configuration.hh" + +namespace nix { + +enum struct EvalProfilerMode { disabled, flamegraph }; + +template<> +EvalProfilerMode BaseSetting::parse(const std::string & str) const; + +template<> +std::string BaseSetting::to_string() const; + +} diff --git a/src/libexpr/include/nix/expr/eval-profiler.hh b/src/libexpr/include/nix/expr/eval-profiler.hh index 763b737f7..9fee4ef35 100644 --- a/src/libexpr/include/nix/expr/eval-profiler.hh +++ b/src/libexpr/include/nix/expr/eval-profiler.hh @@ -11,6 +11,7 @@ #include #include #include +#include namespace nix { @@ -110,4 +111,7 @@ public: postFunctionCallHook(const EvalState & state, const Value & v, std::span args, const PosIdx pos) override; }; +ref +makeSampleStackProfiler(const EvalState & state, std::filesystem::path profileFile, uint64_t frequency); + } diff --git a/src/libexpr/include/nix/expr/eval-settings.hh b/src/libexpr/include/nix/expr/eval-settings.hh index 8d3db59b3..f1367b9bb 100644 --- a/src/libexpr/include/nix/expr/eval-settings.hh +++ b/src/libexpr/include/nix/expr/eval-settings.hh @@ -1,6 +1,7 @@ #pragma once ///@file +#include "nix/expr/eval-profiler-settings.hh" #include "nix/util/configuration.hh" #include "nix/util/source-path.hh" @@ -193,6 +194,27 @@ struct EvalSettings : Config `flamegraph.pl`. )"}; + Setting evalProfilerMode{this, EvalProfilerMode::disabled, "eval-profiler", + R"( + Enables evaluation profiling. The following modes are supported: + + * `flamegraph` stack sampling profiler. Outputs folded format, one line per stack (suitable for `flamegraph.pl` and compatible tools). + + Use [`eval-profile-file`](#conf-eval-profile-file) to specify where the profile is saved. + )"}; + + Setting evalProfileFile{this, "nix.profile", "eval-profile-file", + R"( + Specifies the file where [evaluation profile](#conf-eval-profiler) is saved. + )"}; + + Setting evalProfilerFrequency{this, 99, "eval-profiler-frequency", + R"( + Specifies the sampling rate in hertz for sampling evaluation profilers. + Use `0` to sample the stack after each function call. + See [`eval-profiler`](#conf-eval-profiler). + )"}; + Setting useEvalCache{this, true, "eval-cache", R"( Whether to use the flake evaluation cache. diff --git a/src/libexpr/include/nix/expr/eval.hh b/src/libexpr/include/nix/expr/eval.hh index ffbc84bcd..de865ae7b 100644 --- a/src/libexpr/include/nix/expr/eval.hh +++ b/src/libexpr/include/nix/expr/eval.hh @@ -731,7 +731,7 @@ public: */ void assertEqValues(Value & v1, Value & v2, const PosIdx pos, std::string_view errorCtx); - bool isFunctor(Value & fun); + bool isFunctor(const Value & fun) const; void callFunction(Value & fun, std::span args, Value & vRes, const PosIdx pos); diff --git a/src/libexpr/include/nix/expr/meson.build b/src/libexpr/include/nix/expr/meson.build index db902a616..333490ee4 100644 --- a/src/libexpr/include/nix/expr/meson.build +++ b/src/libexpr/include/nix/expr/meson.build @@ -14,6 +14,7 @@ headers = [config_pub_h] + files( 'eval-error.hh', 'eval-gc.hh', 'eval-inline.hh', + 'eval-profiler-settings.hh', 'eval-profiler.hh', 'eval-settings.hh', 'eval.hh', diff --git a/src/libexpr/meson.build b/src/libexpr/meson.build index dc50d2b19..f5adafae0 100644 --- a/src/libexpr/meson.build +++ b/src/libexpr/meson.build @@ -140,6 +140,7 @@ sources = files( 'eval-cache.cc', 'eval-error.cc', 'eval-gc.cc', + 'eval-profiler-settings.cc', 'eval-profiler.cc', 'eval-settings.cc', 'eval.cc', diff --git a/tests/functional/flamegraph-profiler.sh b/tests/functional/flamegraph-profiler.sh new file mode 100755 index 000000000..0c35037a8 --- /dev/null +++ b/tests/functional/flamegraph-profiler.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash + +source common.sh + +set +x + +expect_trace() { + expr="$1" + expect="$2" + actual=$( + nix-instantiate \ + --eval-profiler flamegraph \ + --eval-profiler-frequency 0 \ + --eval-profile-file /dev/stdout \ + --expr "$expr" | + grep "«string»" || true + ) + + echo -n "Tracing expression '$expr'" + msg=$( + diff -swB \ + <(echo "$expect") \ + <(echo "$actual") + ) && result=0 || result=$? + if [ "$result" -eq 0 ]; then + echo " ok." + else + echo " failed. difference:" + echo "$msg" + return "$result" + fi +} + +# lambda +expect_trace 'let f = arg: arg; in f 1' " +«string»:1:22:f 1 +" + +# unnamed lambda +expect_trace '(arg: arg) 1' " +«string»:1:1 1 +" + +# primop +expect_trace 'builtins.head [0 1]' " +«string»:1:1:primop head 1 +" + +# primop application +expect_trace 'let a = builtins.all (let f = x: x; in f); in a [1]' " +«string»:1:9:primop all 1 +«string»:1:47:primop all 1 +«string»:1:47:primop all;«string»:1:31:f 1 +" + +# functor +expect_trace '{__functor = x: arg: arg;} 1' " +«string»:1:1:functor 1 +«string»:1:1:functor;«string»:1:2 1 +" + +# failure inside a tryEval +expect_trace 'builtins.tryEval (throw "example")' " +«string»:1:1:primop tryEval 1 +«string»:1:1:primop tryEval;«string»:1:19:primop throw 1 +" + +# Missing argument to a formal function +expect_trace 'let f = ({ x }: x); in f { }' " +«string»:1:24:f 1 +" + +# Too many arguments to a formal function +expect_trace 'let f = ({ x }: x); in f { x = "x"; y = "y"; }' " +«string»:1:24:f 1 +" + +# Not enough arguments to a lambda +expect_trace 'let f = (x: y: x + y); in f 1' " +«string»:1:27:f 1 +" + +# Too many arguments to a lambda +expect_trace 'let f2 = (x: x); in f2 1 2' " +«string»:1:21:f2 1 +" + +# Not a function +expect_trace '1 2' " +«string»:1:1 1 +" diff --git a/tests/functional/meson.build b/tests/functional/meson.build index b2005d9d9..f5a19ac64 100644 --- a/tests/functional/meson.build +++ b/tests/functional/meson.build @@ -133,6 +133,7 @@ suites = [ 'post-hook.sh', 'function-trace.sh', 'formatter.sh', + 'flamegraph-profiler.sh', 'eval-store.sh', 'why-depends.sh', 'derivation-json.sh',