1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-06-24 22:11:15 +02:00

libexpr: Add SampleStack stack-sampling profiler

This patch adds support for a native stack sampling
profiler to the evaluator, which saves a collapsed stack
profile information to a configurable location.

Introduced options (in `EvalSettings`):

- `eval-profile-file` - path to the collected profile file.
- `eval-profiler-frequency` - sampling frequency.
- `eval-profiler` - enumeration option for enabling the profiler.

  Currently only `flamegraph` is supported, but having this an
  enumeration rather than a boolean switch leaves the door open
  for other profiler variants (e.g. tracy).

Profile includes the following information on best-effort basis (e.g. some lambdas might
have an undefined name). Callstack information contains:

- Call site location (where the function gets called).
- Primop/lambda name of the function being called.
- Functors/partial applications don't have a name attached to them unlike special-cased primops and lambads.

For cases where callsite location isn't available we have to resort to providing
the location where the lambda itself is defined. This removes some of the confusing
`«none»:0` locations in the profile from previous attempts.

Example usage with piping directly into zstd for compression:

```
nix eval --no-eval-cache nixpkgs#nixosTests.gnome \
  --eval-profiler flamegraph \
  --eval-profile-file >(zstd -of nix.profile.zstd)
```

Co-authored-by: Jörg Thalheim <joerg@thalheim.io>
This commit is contained in:
Sergei Zimmerman 2025-05-18 22:24:14 +00:00
parent e22142e11a
commit 5e74c0e4d6
No known key found for this signature in database
GPG key ID: A9B0B557CA632325
9 changed files with 369 additions and 3 deletions

View file

@ -0,0 +1,49 @@
#include "nix/expr/eval-profiler-settings.hh"
#include "nix/util/configuration.hh"
#include "nix/util/logging.hh" /* Needs to be included before config-impl.hh */
#include "nix/util/config-impl.hh"
#include "nix/util/abstract-setting-to-json.hh"
#include <nlohmann/json.hpp>
namespace nix {
template<>
EvalProfilerMode BaseSetting<EvalProfilerMode>::parse(const std::string & str) const
{
if (str == "disabled")
return EvalProfilerMode::disabled;
else if (str == "flamegraph")
return EvalProfilerMode::flamegraph;
else
throw UsageError("option '%s' has invalid value '%s'", name, str);
}
template<>
struct BaseSetting<EvalProfilerMode>::trait
{
static constexpr bool appendable = false;
};
template<>
std::string BaseSetting<EvalProfilerMode>::to_string() const
{
if (value == EvalProfilerMode::disabled)
return "disabled";
else if (value == EvalProfilerMode::flamegraph)
return "flamegraph";
else
unreachable();
}
NLOHMANN_JSON_SERIALIZE_ENUM(
EvalProfilerMode,
{
{EvalProfilerMode::disabled, "disabled"},
{EvalProfilerMode::flamegraph, "flamegraph"},
});
/* Explicit instantiation of templates */
template class BaseSetting<EvalProfilerMode>;
}

View file

@ -1,5 +1,7 @@
#include "nix/expr/eval-profiler.hh"
#include "nix/expr/nixexpr.hh"
#include "nix/expr/eval.hh"
#include "nix/util/lru-cache.hh"
namespace nix {
@ -45,4 +47,267 @@ void MultiEvalProfiler::addProfiler(ref<EvalProfiler> profiler)
invalidateNeededHooks();
}
namespace {
class PosCache : private LRUCache<PosIdx, Pos>
{
const EvalState & state;
public:
PosCache(const EvalState & state)
: LRUCache(524288) /* ~40MiB */
, state(state)
{
}
Pos lookup(PosIdx posIdx)
{
auto posOrNone = LRUCache::get(posIdx);
if (posOrNone)
return *posOrNone;
auto pos = state.positions[posIdx];
upsert(posIdx, pos);
return pos;
}
};
struct LambdaFrameInfo
{
ExprLambda * expr;
/** Position where the lambda has been called. */
PosIdx callPos = noPos;
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
auto operator<=>(const LambdaFrameInfo & rhs) const = default;
};
/** Primop call. */
struct PrimOpFrameInfo
{
const PrimOp * expr;
/** Position where the primop has been called. */
PosIdx callPos = noPos;
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
auto operator<=>(const PrimOpFrameInfo & rhs) const = default;
};
/** Used for functor calls (attrset with __functor attr). */
struct FunctorFrameInfo
{
PosIdx pos;
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
auto operator<=>(const FunctorFrameInfo & rhs) const = default;
};
/** Fallback frame info. */
struct GenericFrameInfo
{
PosIdx pos;
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
auto operator<=>(const GenericFrameInfo & rhs) const = default;
};
using FrameInfo = std::variant<LambdaFrameInfo, PrimOpFrameInfo, FunctorFrameInfo, GenericFrameInfo>;
using FrameStack = std::vector<FrameInfo>;
/**
* Stack sampling profiler.
*/
class SampleStack : public EvalProfiler
{
/* How often stack profiles should be flushed to file. This avoids the need
to persist stack samples across the whole evaluation at the cost
of periodically flushing data to disk. */
static constexpr std::chrono::microseconds profileDumpInterval = std::chrono::milliseconds(2000);
Hooks getNeededHooksImpl() const override
{
return Hooks().set(preFunctionCall).set(postFunctionCall);
}
public:
SampleStack(const EvalState & state, std::filesystem::path profileFile, std::chrono::nanoseconds period)
: state(state)
, sampleInterval(period)
, profileFd([&]() {
AutoCloseFD fd = toDescriptor(open(profileFile.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0660));
if (!fd)
throw SysError("opening file %s", profileFile);
return fd;
}())
, posCache(state)
{
}
[[gnu::noinline]] void
preFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override;
[[gnu::noinline]] void
postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override;
void maybeSaveProfile(std::chrono::time_point<std::chrono::high_resolution_clock> now);
void saveProfile();
FrameInfo getFrameInfoFromValueAndPos(const Value & v, PosIdx pos);
SampleStack(SampleStack &&) = default;
SampleStack & operator=(SampleStack &&) = delete;
SampleStack(const SampleStack &) = delete;
SampleStack & operator=(const SampleStack &) = delete;
~SampleStack();
private:
/** Hold on to an instance of EvalState for symbolizing positions. */
const EvalState & state;
std::chrono::nanoseconds sampleInterval;
AutoCloseFD profileFd;
FrameStack stack;
std::map<FrameStack, uint32_t> callCount;
std::chrono::time_point<std::chrono::high_resolution_clock> lastStackSample =
std::chrono::high_resolution_clock::now();
std::chrono::time_point<std::chrono::high_resolution_clock> lastDump = std::chrono::high_resolution_clock::now();
PosCache posCache;
};
FrameInfo SampleStack::getFrameInfoFromValueAndPos(const Value & v, PosIdx pos)
{
/* NOTE: No actual references to garbage collected values are not held in
the profiler. */
if (v.isLambda())
return LambdaFrameInfo{.expr = v.payload.lambda.fun, .callPos = pos};
else if (v.isPrimOp())
return PrimOpFrameInfo{.expr = v.primOp(), .callPos = pos};
else if (v.isPrimOpApp())
/* Resolve primOp eagerly. Must not hold on to a reference to a Value. */
return PrimOpFrameInfo{.expr = v.primOpAppPrimOp(), .callPos = pos};
else if (state.isFunctor(v)) {
const auto functor = v.attrs()->get(state.sFunctor);
if (auto pos_ = posCache.lookup(pos); std::holds_alternative<std::monostate>(pos_.origin))
/* HACK: In case callsite position is unresolved. */
return FunctorFrameInfo{.pos = functor->pos};
return FunctorFrameInfo{.pos = pos};
} else
/* NOTE: Add a stack frame even for invalid cases (e.g. when calling a non-function). This is what
* trace-function-calls does. */
return GenericFrameInfo{.pos = pos};
}
[[gnu::noinline]] void SampleStack::preFunctionCallHook(
const EvalState & state, const Value & v, [[maybe_unused]] std::span<Value *> args, const PosIdx pos)
{
stack.push_back(getFrameInfoFromValueAndPos(v, pos));
auto now = std::chrono::high_resolution_clock::now();
if (now - lastStackSample > sampleInterval) {
callCount[stack] += 1;
lastStackSample = now;
}
/* Do this in preFunctionCallHook because we might throw an exception, but
callFunction uses Finally, which doesn't play well with exceptions. */
maybeSaveProfile(now);
}
[[gnu::noinline]] void
SampleStack::postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos)
{
if (!stack.empty())
stack.pop_back();
}
std::ostream & LambdaFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
{
if (auto pos = posCache.lookup(callPos); std::holds_alternative<std::monostate>(pos.origin))
/* HACK: To avoid dubious «none»:0 in the generated profile if the origin can't be resolved
resort to printing the lambda location instead of the callsite position. */
os << posCache.lookup(expr->getPos());
else
os << pos;
if (expr->name)
os << ":" << state.symbols[expr->name];
return os;
}
std::ostream & GenericFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
{
os << posCache.lookup(pos);
return os;
}
std::ostream & FunctorFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
{
os << posCache.lookup(pos) << ":functor";
return os;
}
std::ostream & PrimOpFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
{
/* Sometimes callsite position can have an unresolved origin, which
leads to confusing «none»:0 locations in the profile. */
auto pos = posCache.lookup(callPos);
if (!std::holds_alternative<std::monostate>(pos.origin))
os << posCache.lookup(callPos) << ":";
os << *expr;
return os;
}
void SampleStack::maybeSaveProfile(std::chrono::time_point<std::chrono::high_resolution_clock> now)
{
if (now - lastDump >= profileDumpInterval)
saveProfile();
else
return;
/* Save the last dump timepoint. Do this after actually saving data to file
to not account for the time doing the flushing to disk. */
lastDump = std::chrono::high_resolution_clock::now();
/* Free up memory used for stack sampling. This might be very significant for
long-running evaluations, so we shouldn't hog too much memory. */
callCount.clear();
}
void SampleStack::saveProfile()
{
auto os = std::ostringstream{};
for (auto & [stack, count] : callCount) {
auto first = true;
for (auto & pos : stack) {
if (first)
first = false;
else
os << ";";
std::visit([&](auto && info) { info.symbolize(state, os, posCache); }, pos);
}
os << " " << count;
writeLine(profileFd.get(), std::move(os).str());
/* Clear ostringstream. */
os.str("");
os.clear();
}
}
SampleStack::~SampleStack()
{
/* Guard against cases when we are already unwinding the stack. */
try {
saveProfile();
} catch (...) {
ignoreExceptionInDestructor();
}
}
} // namespace
ref<EvalProfiler>
makeSampleStackProfiler(const EvalState & state, std::filesystem::path profileFile, uint64_t frequency)
{
/* 0 is a special value for sampling stack after each call. */
std::chrono::nanoseconds period = frequency == 0
? std::chrono::nanoseconds{0}
: std::chrono::nanoseconds{std::nano::den / frequency / std::nano::num};
return make_ref<SampleStack>(state, profileFile, period);
}
}

View file

@ -376,8 +376,16 @@ EvalState::EvalState(
/* Register function call tracer. */
if (settings.traceFunctionCalls)
profiler.addProfiler(make_ref<FunctionCallTrace>());
}
switch (settings.evalProfilerMode) {
case EvalProfilerMode::flamegraph:
profiler.addProfiler(makeSampleStackProfiler(
*this, settings.evalProfileFile.get(), settings.evalProfilerFrequency));
break;
case EvalProfilerMode::disabled:
break;
}
}
EvalState::~EvalState()
{
@ -2236,7 +2244,7 @@ bool EvalState::forceBool(Value & v, const PosIdx pos, std::string_view errorCtx
}
bool EvalState::isFunctor(Value & fun)
bool EvalState::isFunctor(const Value & fun) const
{
return fun.type() == nAttrs && fun.attrs()->find(sFunctor) != fun.attrs()->end();
}

View file

@ -0,0 +1,16 @@
#pragma once
///@file
#include "nix/util/configuration.hh"
namespace nix {
enum struct EvalProfilerMode { disabled, flamegraph };
template<>
EvalProfilerMode BaseSetting<EvalProfilerMode>::parse(const std::string & str) const;
template<>
std::string BaseSetting<EvalProfilerMode>::to_string() const;
}

View file

@ -11,6 +11,7 @@
#include <span>
#include <bitset>
#include <optional>
#include <filesystem>
namespace nix {
@ -110,4 +111,7 @@ public:
postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override;
};
ref<EvalProfiler>
makeSampleStackProfiler(const EvalState & state, std::filesystem::path profileFile, uint64_t frequency);
}

View file

@ -1,6 +1,7 @@
#pragma once
///@file
#include "nix/expr/eval-profiler-settings.hh"
#include "nix/util/configuration.hh"
#include "nix/util/source-path.hh"
@ -193,6 +194,27 @@ struct EvalSettings : Config
`flamegraph.pl`.
)"};
Setting<EvalProfilerMode> evalProfilerMode{this, EvalProfilerMode::disabled, "eval-profiler",
R"(
Enables evaluation profiling. The following modes are supported:
* `flamegraph` stack sampling profiler. Outputs folded format, one line per stack (suitable for `flamegraph.pl` and compatible tools).
Use [`eval-profile-file`](#conf-eval-profile-file) to specify where the profile is saved.
)"};
Setting<Path> evalProfileFile{this, "nix.profile", "eval-profile-file",
R"(
Specifies the file where [evaluation profile](#conf-eval-profiler) is saved.
)"};
Setting<uint32_t> evalProfilerFrequency{this, 99, "eval-profiler-frequency",
R"(
Specifies the sampling rate in hertz for sampling evaluation profilers.
Use `0` to sample the stack after each function call.
See [`eval-profiler`](#conf-eval-profiler).
)"};
Setting<bool> useEvalCache{this, true, "eval-cache",
R"(
Whether to use the flake evaluation cache.

View file

@ -731,7 +731,7 @@ public:
*/
void assertEqValues(Value & v1, Value & v2, const PosIdx pos, std::string_view errorCtx);
bool isFunctor(Value & fun);
bool isFunctor(const Value & fun) const;
void callFunction(Value & fun, std::span<Value *> args, Value & vRes, const PosIdx pos);

View file

@ -14,6 +14,7 @@ headers = [config_pub_h] + files(
'eval-error.hh',
'eval-gc.hh',
'eval-inline.hh',
'eval-profiler-settings.hh',
'eval-profiler.hh',
'eval-settings.hh',
'eval.hh',

View file

@ -140,6 +140,7 @@ sources = files(
'eval-cache.cc',
'eval-error.cc',
'eval-gc.cc',
'eval-profiler-settings.cc',
'eval-profiler.cc',
'eval-settings.cc',
'eval.cc',