1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-06-24 22:11:15 +02:00

Merge pull request #13220 from xokdvium/flamegraph

Stack sampling flamegraph profiler
This commit is contained in:
Jörg Thalheim 2025-05-23 10:42:49 +02:00 committed by GitHub
commit 906cc88f4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
14 changed files with 508 additions and 3 deletions

View file

@ -0,0 +1,13 @@
---
synopsis: Add stack sampling evaluation profiler
prs: [13220]
---
Nix evaluator now supports stack sampling evaluation profiling via `--eval-profiler flamegraph` setting.
It collects collapsed call stack information to output file specified by
`--eval-profile-file` (`nix.profile` by default) in a format directly consumable
by `flamegraph.pl` and compatible tools like [speedscope](https://speedscope.app/).
Sampling frequency can be configured via `--eval-profiler-frequency` (99 Hz by default).
Unlike existing `--trace-function-calls` this profiler includes the name of the function
being called when it's available.

View file

@ -57,6 +57,7 @@
- [Tuning Cores and Jobs](advanced-topics/cores-vs-jobs.md)
- [Verifying Build Reproducibility](advanced-topics/diff-hook.md)
- [Using the `post-build-hook`](advanced-topics/post-build-hook.md)
- [Evaluation profiler](advanced-topics/eval-profiler.md)
- [Command Reference](command-ref/index.md)
- [Common Options](command-ref/opt-common.md)
- [Common Environment Variables](command-ref/env-common.md)

View file

@ -0,0 +1,33 @@
# Using the `eval-profiler`
Nix evaluator supports [evaluation](@docroot@/language/evaluation.md)
[profiling](<https://en.wikipedia.org/wiki/Profiling_(computer_programming)>)
compatible with `flamegraph.pl`. The profiler samples the nix
function call stack at regular intervals. It can be enabled with the
[`eval-profiler`](@docroot@/command-ref/conf-file.md#conf-eval-profiler)
setting:
```console
$ nix-instantiate "<nixpkgs>" -A hello --eval-profiler flamegraph
```
Stack sampling frequency and the output file path can be configured with
[`eval-profile-file`](@docroot@/command-ref/conf-file.md#conf-eval-profile-file)
and [`eval-profiler-frequency`](@docroot@/command-ref/conf-file.md#conf-eval-profiler-frequency).
By default the collected profile is saved to `nix.profile` file in the current working directory.
The collected profile can be directly consumed by `flamegraph.pl`:
```console
$ flamegraph.pl nix.profile > flamegraph.svg
```
The line information in the profile contains the location of the [call
site](https://en.wikipedia.org/wiki/Call_site) position and the name of the
function being called (when available). For example:
```
/nix/store/x9wnkly3k1gkq580m90jjn32q9f05q2v-source/pkgs/top-level/default.nix:167:5:primop import
```
Here `import` primop is called at `/nix/store/x9wnkly3k1gkq580m90jjn32q9f05q2v-source/pkgs/top-level/default.nix:167:5`.

View file

@ -0,0 +1,49 @@
#include "nix/expr/eval-profiler-settings.hh"
#include "nix/util/configuration.hh"
#include "nix/util/logging.hh" /* Needs to be included before config-impl.hh */
#include "nix/util/config-impl.hh"
#include "nix/util/abstract-setting-to-json.hh"
#include <nlohmann/json.hpp>
namespace nix {
template<>
EvalProfilerMode BaseSetting<EvalProfilerMode>::parse(const std::string & str) const
{
if (str == "disabled")
return EvalProfilerMode::disabled;
else if (str == "flamegraph")
return EvalProfilerMode::flamegraph;
else
throw UsageError("option '%s' has invalid value '%s'", name, str);
}
template<>
struct BaseSetting<EvalProfilerMode>::trait
{
static constexpr bool appendable = false;
};
template<>
std::string BaseSetting<EvalProfilerMode>::to_string() const
{
if (value == EvalProfilerMode::disabled)
return "disabled";
else if (value == EvalProfilerMode::flamegraph)
return "flamegraph";
else
unreachable();
}
NLOHMANN_JSON_SERIALIZE_ENUM(
EvalProfilerMode,
{
{EvalProfilerMode::disabled, "disabled"},
{EvalProfilerMode::flamegraph, "flamegraph"},
});
/* Explicit instantiation of templates */
template class BaseSetting<EvalProfilerMode>;
}

View file

@ -1,5 +1,7 @@
#include "nix/expr/eval-profiler.hh"
#include "nix/expr/nixexpr.hh"
#include "nix/expr/eval.hh"
#include "nix/util/lru-cache.hh"
namespace nix {
@ -45,4 +47,267 @@ void MultiEvalProfiler::addProfiler(ref<EvalProfiler> profiler)
invalidateNeededHooks();
}
namespace {
class PosCache : private LRUCache<PosIdx, Pos>
{
const EvalState & state;
public:
PosCache(const EvalState & state)
: LRUCache(524288) /* ~40MiB */
, state(state)
{
}
Pos lookup(PosIdx posIdx)
{
auto posOrNone = LRUCache::get(posIdx);
if (posOrNone)
return *posOrNone;
auto pos = state.positions[posIdx];
upsert(posIdx, pos);
return pos;
}
};
struct LambdaFrameInfo
{
ExprLambda * expr;
/** Position where the lambda has been called. */
PosIdx callPos = noPos;
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
auto operator<=>(const LambdaFrameInfo & rhs) const = default;
};
/** Primop call. */
struct PrimOpFrameInfo
{
const PrimOp * expr;
/** Position where the primop has been called. */
PosIdx callPos = noPos;
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
auto operator<=>(const PrimOpFrameInfo & rhs) const = default;
};
/** Used for functor calls (attrset with __functor attr). */
struct FunctorFrameInfo
{
PosIdx pos;
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
auto operator<=>(const FunctorFrameInfo & rhs) const = default;
};
/** Fallback frame info. */
struct GenericFrameInfo
{
PosIdx pos;
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
auto operator<=>(const GenericFrameInfo & rhs) const = default;
};
using FrameInfo = std::variant<LambdaFrameInfo, PrimOpFrameInfo, FunctorFrameInfo, GenericFrameInfo>;
using FrameStack = std::vector<FrameInfo>;
/**
* Stack sampling profiler.
*/
class SampleStack : public EvalProfiler
{
/* How often stack profiles should be flushed to file. This avoids the need
to persist stack samples across the whole evaluation at the cost
of periodically flushing data to disk. */
static constexpr std::chrono::microseconds profileDumpInterval = std::chrono::milliseconds(2000);
Hooks getNeededHooksImpl() const override
{
return Hooks().set(preFunctionCall).set(postFunctionCall);
}
public:
SampleStack(const EvalState & state, std::filesystem::path profileFile, std::chrono::nanoseconds period)
: state(state)
, sampleInterval(period)
, profileFd([&]() {
AutoCloseFD fd = toDescriptor(open(profileFile.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0660));
if (!fd)
throw SysError("opening file %s", profileFile);
return fd;
}())
, posCache(state)
{
}
[[gnu::noinline]] void
preFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override;
[[gnu::noinline]] void
postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override;
void maybeSaveProfile(std::chrono::time_point<std::chrono::high_resolution_clock> now);
void saveProfile();
FrameInfo getFrameInfoFromValueAndPos(const Value & v, PosIdx pos);
SampleStack(SampleStack &&) = default;
SampleStack & operator=(SampleStack &&) = delete;
SampleStack(const SampleStack &) = delete;
SampleStack & operator=(const SampleStack &) = delete;
~SampleStack();
private:
/** Hold on to an instance of EvalState for symbolizing positions. */
const EvalState & state;
std::chrono::nanoseconds sampleInterval;
AutoCloseFD profileFd;
FrameStack stack;
std::map<FrameStack, uint32_t> callCount;
std::chrono::time_point<std::chrono::high_resolution_clock> lastStackSample =
std::chrono::high_resolution_clock::now();
std::chrono::time_point<std::chrono::high_resolution_clock> lastDump = std::chrono::high_resolution_clock::now();
PosCache posCache;
};
FrameInfo SampleStack::getFrameInfoFromValueAndPos(const Value & v, PosIdx pos)
{
/* NOTE: No actual references to garbage collected values are not held in
the profiler. */
if (v.isLambda())
return LambdaFrameInfo{.expr = v.payload.lambda.fun, .callPos = pos};
else if (v.isPrimOp())
return PrimOpFrameInfo{.expr = v.primOp(), .callPos = pos};
else if (v.isPrimOpApp())
/* Resolve primOp eagerly. Must not hold on to a reference to a Value. */
return PrimOpFrameInfo{.expr = v.primOpAppPrimOp(), .callPos = pos};
else if (state.isFunctor(v)) {
const auto functor = v.attrs()->get(state.sFunctor);
if (auto pos_ = posCache.lookup(pos); std::holds_alternative<std::monostate>(pos_.origin))
/* HACK: In case callsite position is unresolved. */
return FunctorFrameInfo{.pos = functor->pos};
return FunctorFrameInfo{.pos = pos};
} else
/* NOTE: Add a stack frame even for invalid cases (e.g. when calling a non-function). This is what
* trace-function-calls does. */
return GenericFrameInfo{.pos = pos};
}
[[gnu::noinline]] void SampleStack::preFunctionCallHook(
const EvalState & state, const Value & v, [[maybe_unused]] std::span<Value *> args, const PosIdx pos)
{
stack.push_back(getFrameInfoFromValueAndPos(v, pos));
auto now = std::chrono::high_resolution_clock::now();
if (now - lastStackSample > sampleInterval) {
callCount[stack] += 1;
lastStackSample = now;
}
/* Do this in preFunctionCallHook because we might throw an exception, but
callFunction uses Finally, which doesn't play well with exceptions. */
maybeSaveProfile(now);
}
[[gnu::noinline]] void
SampleStack::postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos)
{
if (!stack.empty())
stack.pop_back();
}
std::ostream & LambdaFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
{
if (auto pos = posCache.lookup(callPos); std::holds_alternative<std::monostate>(pos.origin))
/* HACK: To avoid dubious «none»:0 in the generated profile if the origin can't be resolved
resort to printing the lambda location instead of the callsite position. */
os << posCache.lookup(expr->getPos());
else
os << pos;
if (expr->name)
os << ":" << state.symbols[expr->name];
return os;
}
std::ostream & GenericFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
{
os << posCache.lookup(pos);
return os;
}
std::ostream & FunctorFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
{
os << posCache.lookup(pos) << ":functor";
return os;
}
std::ostream & PrimOpFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
{
/* Sometimes callsite position can have an unresolved origin, which
leads to confusing «none»:0 locations in the profile. */
auto pos = posCache.lookup(callPos);
if (!std::holds_alternative<std::monostate>(pos.origin))
os << posCache.lookup(callPos) << ":";
os << *expr;
return os;
}
void SampleStack::maybeSaveProfile(std::chrono::time_point<std::chrono::high_resolution_clock> now)
{
if (now - lastDump >= profileDumpInterval)
saveProfile();
else
return;
/* Save the last dump timepoint. Do this after actually saving data to file
to not account for the time doing the flushing to disk. */
lastDump = std::chrono::high_resolution_clock::now();
/* Free up memory used for stack sampling. This might be very significant for
long-running evaluations, so we shouldn't hog too much memory. */
callCount.clear();
}
void SampleStack::saveProfile()
{
auto os = std::ostringstream{};
for (auto & [stack, count] : callCount) {
auto first = true;
for (auto & pos : stack) {
if (first)
first = false;
else
os << ";";
std::visit([&](auto && info) { info.symbolize(state, os, posCache); }, pos);
}
os << " " << count;
writeLine(profileFd.get(), std::move(os).str());
/* Clear ostringstream. */
os.str("");
os.clear();
}
}
SampleStack::~SampleStack()
{
/* Guard against cases when we are already unwinding the stack. */
try {
saveProfile();
} catch (...) {
ignoreExceptionInDestructor();
}
}
} // namespace
ref<EvalProfiler>
makeSampleStackProfiler(const EvalState & state, std::filesystem::path profileFile, uint64_t frequency)
{
/* 0 is a special value for sampling stack after each call. */
std::chrono::nanoseconds period = frequency == 0
? std::chrono::nanoseconds{0}
: std::chrono::nanoseconds{std::nano::den / frequency / std::nano::num};
return make_ref<SampleStack>(state, profileFile, period);
}
}

View file

@ -376,8 +376,16 @@ EvalState::EvalState(
/* Register function call tracer. */
if (settings.traceFunctionCalls)
profiler.addProfiler(make_ref<FunctionCallTrace>());
}
switch (settings.evalProfilerMode) {
case EvalProfilerMode::flamegraph:
profiler.addProfiler(makeSampleStackProfiler(
*this, settings.evalProfileFile.get(), settings.evalProfilerFrequency));
break;
case EvalProfilerMode::disabled:
break;
}
}
EvalState::~EvalState()
{
@ -2236,7 +2244,7 @@ bool EvalState::forceBool(Value & v, const PosIdx pos, std::string_view errorCtx
}
bool EvalState::isFunctor(Value & fun)
bool EvalState::isFunctor(const Value & fun) const
{
return fun.type() == nAttrs && fun.attrs()->find(sFunctor) != fun.attrs()->end();
}

View file

@ -0,0 +1,16 @@
#pragma once
///@file
#include "nix/util/configuration.hh"
namespace nix {
enum struct EvalProfilerMode { disabled, flamegraph };
template<>
EvalProfilerMode BaseSetting<EvalProfilerMode>::parse(const std::string & str) const;
template<>
std::string BaseSetting<EvalProfilerMode>::to_string() const;
}

View file

@ -11,6 +11,7 @@
#include <span>
#include <bitset>
#include <optional>
#include <filesystem>
namespace nix {
@ -110,4 +111,7 @@ public:
postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override;
};
ref<EvalProfiler>
makeSampleStackProfiler(const EvalState & state, std::filesystem::path profileFile, uint64_t frequency);
}

View file

@ -1,6 +1,7 @@
#pragma once
///@file
#include "nix/expr/eval-profiler-settings.hh"
#include "nix/util/configuration.hh"
#include "nix/util/source-path.hh"
@ -193,6 +194,27 @@ struct EvalSettings : Config
`flamegraph.pl`.
)"};
Setting<EvalProfilerMode> evalProfilerMode{this, EvalProfilerMode::disabled, "eval-profiler",
R"(
Enables evaluation profiling. The following modes are supported:
* `flamegraph` stack sampling profiler. Outputs folded format, one line per stack (suitable for `flamegraph.pl` and compatible tools).
Use [`eval-profile-file`](#conf-eval-profile-file) to specify where the profile is saved.
)"};
Setting<Path> evalProfileFile{this, "nix.profile", "eval-profile-file",
R"(
Specifies the file where [evaluation profile](#conf-eval-profiler) is saved.
)"};
Setting<uint32_t> evalProfilerFrequency{this, 99, "eval-profiler-frequency",
R"(
Specifies the sampling rate in hertz for sampling evaluation profilers.
Use `0` to sample the stack after each function call.
See [`eval-profiler`](#conf-eval-profiler).
)"};
Setting<bool> useEvalCache{this, true, "eval-cache",
R"(
Whether to use the flake evaluation cache.

View file

@ -731,7 +731,7 @@ public:
*/
void assertEqValues(Value & v1, Value & v2, const PosIdx pos, std::string_view errorCtx);
bool isFunctor(Value & fun);
bool isFunctor(const Value & fun) const;
void callFunction(Value & fun, std::span<Value *> args, Value & vRes, const PosIdx pos);

View file

@ -14,6 +14,7 @@ headers = [config_pub_h] + files(
'eval-error.hh',
'eval-gc.hh',
'eval-inline.hh',
'eval-profiler-settings.hh',
'eval-profiler.hh',
'eval-settings.hh',
'eval.hh',

View file

@ -140,6 +140,7 @@ sources = files(
'eval-cache.cc',
'eval-error.cc',
'eval-gc.cc',
'eval-profiler-settings.cc',
'eval-profiler.cc',
'eval-settings.cc',
'eval.cc',

View file

@ -0,0 +1,91 @@
#!/usr/bin/env bash
source common.sh
set +x
expect_trace() {
expr="$1"
expect="$2"
actual=$(
nix-instantiate \
--eval-profiler flamegraph \
--eval-profiler-frequency 0 \
--eval-profile-file /dev/stdout \
--expr "$expr" |
grep "«string»" || true
)
echo -n "Tracing expression '$expr'"
msg=$(
diff -swB \
<(echo "$expect") \
<(echo "$actual")
) && result=0 || result=$?
if [ "$result" -eq 0 ]; then
echo " ok."
else
echo " failed. difference:"
echo "$msg"
return "$result"
fi
}
# lambda
expect_trace 'let f = arg: arg; in f 1' "
«string»:1:22:f 1
"
# unnamed lambda
expect_trace '(arg: arg) 1' "
«string»:1:1 1
"
# primop
expect_trace 'builtins.head [0 1]' "
«string»:1:1:primop head 1
"
# primop application
expect_trace 'let a = builtins.all (let f = x: x; in f); in a [1]' "
«string»:1:9:primop all 1
«string»:1:47:primop all 1
«string»:1:47:primop all;«string»:1:31:f 1
"
# functor
expect_trace '{__functor = x: arg: arg;} 1' "
«string»:1:1:functor 1
«string»:1:1:functor;«string»:1:2 1
"
# failure inside a tryEval
expect_trace 'builtins.tryEval (throw "example")' "
«string»:1:1:primop tryEval 1
«string»:1:1:primop tryEval;«string»:1:19:primop throw 1
"
# Missing argument to a formal function
expect_trace 'let f = ({ x }: x); in f { }' "
«string»:1:24:f 1
"
# Too many arguments to a formal function
expect_trace 'let f = ({ x }: x); in f { x = "x"; y = "y"; }' "
«string»:1:24:f 1
"
# Not enough arguments to a lambda
expect_trace 'let f = (x: y: x + y); in f 1' "
«string»:1:27:f 1
"
# Too many arguments to a lambda
expect_trace 'let f2 = (x: x); in f2 1 2' "
«string»:1:21:f2 1
"
# Not a function
expect_trace '1 2' "
«string»:1:1 1
"

View file

@ -133,6 +133,7 @@ suites = [
'post-hook.sh',
'function-trace.sh',
'formatter.sh',
'flamegraph-profiler.sh',
'eval-store.sh',
'why-depends.sh',
'derivation-json.sh',