mirror of
https://github.com/NixOS/nix
synced 2025-06-24 22:11:15 +02:00
libexpr: Add SampleStack
stack-sampling profiler
This patch adds support for a native stack sampling profiler to the evaluator, which saves a collapsed stack profile information to a configurable location. Introduced options (in `EvalSettings`): - `eval-profile-file` - path to the collected profile file. - `eval-profiler-frequency` - sampling frequency. - `eval-profiler` - enumeration option for enabling the profiler. Currently only `flamegraph` is supported, but having this an enumeration rather than a boolean switch leaves the door open for other profiler variants (e.g. tracy). Profile includes the following information on best-effort basis (e.g. some lambdas might have an undefined name). Callstack information contains: - Call site location (where the function gets called). - Primop/lambda name of the function being called. - Functors/partial applications don't have a name attached to them unlike special-cased primops and lambads. For cases where callsite location isn't available we have to resort to providing the location where the lambda itself is defined. This removes some of the confusing `«none»:0` locations in the profile from previous attempts. Example usage with piping directly into zstd for compression: ``` nix eval --no-eval-cache nixpkgs#nixosTests.gnome \ --eval-profiler flamegraph \ --eval-profile-file >(zstd -of nix.profile.zstd) ``` Co-authored-by: Jörg Thalheim <joerg@thalheim.io>
This commit is contained in:
parent
e22142e11a
commit
5e74c0e4d6
9 changed files with 369 additions and 3 deletions
49
src/libexpr/eval-profiler-settings.cc
Normal file
49
src/libexpr/eval-profiler-settings.cc
Normal file
|
@ -0,0 +1,49 @@
|
|||
#include "nix/expr/eval-profiler-settings.hh"
|
||||
#include "nix/util/configuration.hh"
|
||||
#include "nix/util/logging.hh" /* Needs to be included before config-impl.hh */
|
||||
#include "nix/util/config-impl.hh"
|
||||
#include "nix/util/abstract-setting-to-json.hh"
|
||||
|
||||
#include <nlohmann/json.hpp>
|
||||
|
||||
namespace nix {
|
||||
|
||||
template<>
|
||||
EvalProfilerMode BaseSetting<EvalProfilerMode>::parse(const std::string & str) const
|
||||
{
|
||||
if (str == "disabled")
|
||||
return EvalProfilerMode::disabled;
|
||||
else if (str == "flamegraph")
|
||||
return EvalProfilerMode::flamegraph;
|
||||
else
|
||||
throw UsageError("option '%s' has invalid value '%s'", name, str);
|
||||
}
|
||||
|
||||
template<>
|
||||
struct BaseSetting<EvalProfilerMode>::trait
|
||||
{
|
||||
static constexpr bool appendable = false;
|
||||
};
|
||||
|
||||
template<>
|
||||
std::string BaseSetting<EvalProfilerMode>::to_string() const
|
||||
{
|
||||
if (value == EvalProfilerMode::disabled)
|
||||
return "disabled";
|
||||
else if (value == EvalProfilerMode::flamegraph)
|
||||
return "flamegraph";
|
||||
else
|
||||
unreachable();
|
||||
}
|
||||
|
||||
NLOHMANN_JSON_SERIALIZE_ENUM(
|
||||
EvalProfilerMode,
|
||||
{
|
||||
{EvalProfilerMode::disabled, "disabled"},
|
||||
{EvalProfilerMode::flamegraph, "flamegraph"},
|
||||
});
|
||||
|
||||
/* Explicit instantiation of templates */
|
||||
template class BaseSetting<EvalProfilerMode>;
|
||||
|
||||
}
|
|
@ -1,5 +1,7 @@
|
|||
#include "nix/expr/eval-profiler.hh"
|
||||
#include "nix/expr/nixexpr.hh"
|
||||
#include "nix/expr/eval.hh"
|
||||
#include "nix/util/lru-cache.hh"
|
||||
|
||||
namespace nix {
|
||||
|
||||
|
@ -45,4 +47,267 @@ void MultiEvalProfiler::addProfiler(ref<EvalProfiler> profiler)
|
|||
invalidateNeededHooks();
|
||||
}
|
||||
|
||||
namespace {
|
||||
|
||||
class PosCache : private LRUCache<PosIdx, Pos>
|
||||
{
|
||||
const EvalState & state;
|
||||
|
||||
public:
|
||||
PosCache(const EvalState & state)
|
||||
: LRUCache(524288) /* ~40MiB */
|
||||
, state(state)
|
||||
{
|
||||
}
|
||||
|
||||
Pos lookup(PosIdx posIdx)
|
||||
{
|
||||
auto posOrNone = LRUCache::get(posIdx);
|
||||
if (posOrNone)
|
||||
return *posOrNone;
|
||||
|
||||
auto pos = state.positions[posIdx];
|
||||
upsert(posIdx, pos);
|
||||
return pos;
|
||||
}
|
||||
};
|
||||
|
||||
struct LambdaFrameInfo
|
||||
{
|
||||
ExprLambda * expr;
|
||||
/** Position where the lambda has been called. */
|
||||
PosIdx callPos = noPos;
|
||||
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
|
||||
auto operator<=>(const LambdaFrameInfo & rhs) const = default;
|
||||
};
|
||||
|
||||
/** Primop call. */
|
||||
struct PrimOpFrameInfo
|
||||
{
|
||||
const PrimOp * expr;
|
||||
/** Position where the primop has been called. */
|
||||
PosIdx callPos = noPos;
|
||||
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
|
||||
auto operator<=>(const PrimOpFrameInfo & rhs) const = default;
|
||||
};
|
||||
|
||||
/** Used for functor calls (attrset with __functor attr). */
|
||||
struct FunctorFrameInfo
|
||||
{
|
||||
PosIdx pos;
|
||||
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
|
||||
auto operator<=>(const FunctorFrameInfo & rhs) const = default;
|
||||
};
|
||||
|
||||
/** Fallback frame info. */
|
||||
struct GenericFrameInfo
|
||||
{
|
||||
PosIdx pos;
|
||||
std::ostream & symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const;
|
||||
auto operator<=>(const GenericFrameInfo & rhs) const = default;
|
||||
};
|
||||
|
||||
using FrameInfo = std::variant<LambdaFrameInfo, PrimOpFrameInfo, FunctorFrameInfo, GenericFrameInfo>;
|
||||
using FrameStack = std::vector<FrameInfo>;
|
||||
|
||||
/**
|
||||
* Stack sampling profiler.
|
||||
*/
|
||||
class SampleStack : public EvalProfiler
|
||||
{
|
||||
/* How often stack profiles should be flushed to file. This avoids the need
|
||||
to persist stack samples across the whole evaluation at the cost
|
||||
of periodically flushing data to disk. */
|
||||
static constexpr std::chrono::microseconds profileDumpInterval = std::chrono::milliseconds(2000);
|
||||
|
||||
Hooks getNeededHooksImpl() const override
|
||||
{
|
||||
return Hooks().set(preFunctionCall).set(postFunctionCall);
|
||||
}
|
||||
|
||||
public:
|
||||
SampleStack(const EvalState & state, std::filesystem::path profileFile, std::chrono::nanoseconds period)
|
||||
: state(state)
|
||||
, sampleInterval(period)
|
||||
, profileFd([&]() {
|
||||
AutoCloseFD fd = toDescriptor(open(profileFile.c_str(), O_WRONLY | O_CREAT | O_TRUNC, 0660));
|
||||
if (!fd)
|
||||
throw SysError("opening file %s", profileFile);
|
||||
return fd;
|
||||
}())
|
||||
, posCache(state)
|
||||
{
|
||||
}
|
||||
|
||||
[[gnu::noinline]] void
|
||||
preFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override;
|
||||
[[gnu::noinline]] void
|
||||
postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override;
|
||||
|
||||
void maybeSaveProfile(std::chrono::time_point<std::chrono::high_resolution_clock> now);
|
||||
void saveProfile();
|
||||
FrameInfo getFrameInfoFromValueAndPos(const Value & v, PosIdx pos);
|
||||
|
||||
SampleStack(SampleStack &&) = default;
|
||||
SampleStack & operator=(SampleStack &&) = delete;
|
||||
SampleStack(const SampleStack &) = delete;
|
||||
SampleStack & operator=(const SampleStack &) = delete;
|
||||
~SampleStack();
|
||||
|
||||
private:
|
||||
/** Hold on to an instance of EvalState for symbolizing positions. */
|
||||
const EvalState & state;
|
||||
std::chrono::nanoseconds sampleInterval;
|
||||
AutoCloseFD profileFd;
|
||||
FrameStack stack;
|
||||
std::map<FrameStack, uint32_t> callCount;
|
||||
std::chrono::time_point<std::chrono::high_resolution_clock> lastStackSample =
|
||||
std::chrono::high_resolution_clock::now();
|
||||
std::chrono::time_point<std::chrono::high_resolution_clock> lastDump = std::chrono::high_resolution_clock::now();
|
||||
PosCache posCache;
|
||||
};
|
||||
|
||||
FrameInfo SampleStack::getFrameInfoFromValueAndPos(const Value & v, PosIdx pos)
|
||||
{
|
||||
/* NOTE: No actual references to garbage collected values are not held in
|
||||
the profiler. */
|
||||
if (v.isLambda())
|
||||
return LambdaFrameInfo{.expr = v.payload.lambda.fun, .callPos = pos};
|
||||
else if (v.isPrimOp())
|
||||
return PrimOpFrameInfo{.expr = v.primOp(), .callPos = pos};
|
||||
else if (v.isPrimOpApp())
|
||||
/* Resolve primOp eagerly. Must not hold on to a reference to a Value. */
|
||||
return PrimOpFrameInfo{.expr = v.primOpAppPrimOp(), .callPos = pos};
|
||||
else if (state.isFunctor(v)) {
|
||||
const auto functor = v.attrs()->get(state.sFunctor);
|
||||
if (auto pos_ = posCache.lookup(pos); std::holds_alternative<std::monostate>(pos_.origin))
|
||||
/* HACK: In case callsite position is unresolved. */
|
||||
return FunctorFrameInfo{.pos = functor->pos};
|
||||
return FunctorFrameInfo{.pos = pos};
|
||||
} else
|
||||
/* NOTE: Add a stack frame even for invalid cases (e.g. when calling a non-function). This is what
|
||||
* trace-function-calls does. */
|
||||
return GenericFrameInfo{.pos = pos};
|
||||
}
|
||||
|
||||
[[gnu::noinline]] void SampleStack::preFunctionCallHook(
|
||||
const EvalState & state, const Value & v, [[maybe_unused]] std::span<Value *> args, const PosIdx pos)
|
||||
{
|
||||
stack.push_back(getFrameInfoFromValueAndPos(v, pos));
|
||||
|
||||
auto now = std::chrono::high_resolution_clock::now();
|
||||
|
||||
if (now - lastStackSample > sampleInterval) {
|
||||
callCount[stack] += 1;
|
||||
lastStackSample = now;
|
||||
}
|
||||
|
||||
/* Do this in preFunctionCallHook because we might throw an exception, but
|
||||
callFunction uses Finally, which doesn't play well with exceptions. */
|
||||
maybeSaveProfile(now);
|
||||
}
|
||||
|
||||
[[gnu::noinline]] void
|
||||
SampleStack::postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos)
|
||||
{
|
||||
|
||||
if (!stack.empty())
|
||||
stack.pop_back();
|
||||
}
|
||||
|
||||
std::ostream & LambdaFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
|
||||
{
|
||||
if (auto pos = posCache.lookup(callPos); std::holds_alternative<std::monostate>(pos.origin))
|
||||
/* HACK: To avoid dubious «none»:0 in the generated profile if the origin can't be resolved
|
||||
resort to printing the lambda location instead of the callsite position. */
|
||||
os << posCache.lookup(expr->getPos());
|
||||
else
|
||||
os << pos;
|
||||
if (expr->name)
|
||||
os << ":" << state.symbols[expr->name];
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream & GenericFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
|
||||
{
|
||||
os << posCache.lookup(pos);
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream & FunctorFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
|
||||
{
|
||||
os << posCache.lookup(pos) << ":functor";
|
||||
return os;
|
||||
}
|
||||
|
||||
std::ostream & PrimOpFrameInfo::symbolize(const EvalState & state, std::ostream & os, PosCache & posCache) const
|
||||
{
|
||||
/* Sometimes callsite position can have an unresolved origin, which
|
||||
leads to confusing «none»:0 locations in the profile. */
|
||||
auto pos = posCache.lookup(callPos);
|
||||
if (!std::holds_alternative<std::monostate>(pos.origin))
|
||||
os << posCache.lookup(callPos) << ":";
|
||||
os << *expr;
|
||||
return os;
|
||||
}
|
||||
|
||||
void SampleStack::maybeSaveProfile(std::chrono::time_point<std::chrono::high_resolution_clock> now)
|
||||
{
|
||||
if (now - lastDump >= profileDumpInterval)
|
||||
saveProfile();
|
||||
else
|
||||
return;
|
||||
|
||||
/* Save the last dump timepoint. Do this after actually saving data to file
|
||||
to not account for the time doing the flushing to disk. */
|
||||
lastDump = std::chrono::high_resolution_clock::now();
|
||||
|
||||
/* Free up memory used for stack sampling. This might be very significant for
|
||||
long-running evaluations, so we shouldn't hog too much memory. */
|
||||
callCount.clear();
|
||||
}
|
||||
|
||||
void SampleStack::saveProfile()
|
||||
{
|
||||
auto os = std::ostringstream{};
|
||||
for (auto & [stack, count] : callCount) {
|
||||
auto first = true;
|
||||
for (auto & pos : stack) {
|
||||
if (first)
|
||||
first = false;
|
||||
else
|
||||
os << ";";
|
||||
|
||||
std::visit([&](auto && info) { info.symbolize(state, os, posCache); }, pos);
|
||||
}
|
||||
os << " " << count;
|
||||
writeLine(profileFd.get(), std::move(os).str());
|
||||
/* Clear ostringstream. */
|
||||
os.str("");
|
||||
os.clear();
|
||||
}
|
||||
}
|
||||
|
||||
SampleStack::~SampleStack()
|
||||
{
|
||||
/* Guard against cases when we are already unwinding the stack. */
|
||||
try {
|
||||
saveProfile();
|
||||
} catch (...) {
|
||||
ignoreExceptionInDestructor();
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ref<EvalProfiler>
|
||||
makeSampleStackProfiler(const EvalState & state, std::filesystem::path profileFile, uint64_t frequency)
|
||||
{
|
||||
/* 0 is a special value for sampling stack after each call. */
|
||||
std::chrono::nanoseconds period = frequency == 0
|
||||
? std::chrono::nanoseconds{0}
|
||||
: std::chrono::nanoseconds{std::nano::den / frequency / std::nano::num};
|
||||
return make_ref<SampleStack>(state, profileFile, period);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -376,8 +376,16 @@ EvalState::EvalState(
|
|||
/* Register function call tracer. */
|
||||
if (settings.traceFunctionCalls)
|
||||
profiler.addProfiler(make_ref<FunctionCallTrace>());
|
||||
}
|
||||
|
||||
switch (settings.evalProfilerMode) {
|
||||
case EvalProfilerMode::flamegraph:
|
||||
profiler.addProfiler(makeSampleStackProfiler(
|
||||
*this, settings.evalProfileFile.get(), settings.evalProfilerFrequency));
|
||||
break;
|
||||
case EvalProfilerMode::disabled:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
EvalState::~EvalState()
|
||||
{
|
||||
|
@ -2236,7 +2244,7 @@ bool EvalState::forceBool(Value & v, const PosIdx pos, std::string_view errorCtx
|
|||
}
|
||||
|
||||
|
||||
bool EvalState::isFunctor(Value & fun)
|
||||
bool EvalState::isFunctor(const Value & fun) const
|
||||
{
|
||||
return fun.type() == nAttrs && fun.attrs()->find(sFunctor) != fun.attrs()->end();
|
||||
}
|
||||
|
|
16
src/libexpr/include/nix/expr/eval-profiler-settings.hh
Normal file
16
src/libexpr/include/nix/expr/eval-profiler-settings.hh
Normal file
|
@ -0,0 +1,16 @@
|
|||
#pragma once
|
||||
///@file
|
||||
|
||||
#include "nix/util/configuration.hh"
|
||||
|
||||
namespace nix {
|
||||
|
||||
enum struct EvalProfilerMode { disabled, flamegraph };
|
||||
|
||||
template<>
|
||||
EvalProfilerMode BaseSetting<EvalProfilerMode>::parse(const std::string & str) const;
|
||||
|
||||
template<>
|
||||
std::string BaseSetting<EvalProfilerMode>::to_string() const;
|
||||
|
||||
}
|
|
@ -11,6 +11,7 @@
|
|||
#include <span>
|
||||
#include <bitset>
|
||||
#include <optional>
|
||||
#include <filesystem>
|
||||
|
||||
namespace nix {
|
||||
|
||||
|
@ -110,4 +111,7 @@ public:
|
|||
postFunctionCallHook(const EvalState & state, const Value & v, std::span<Value *> args, const PosIdx pos) override;
|
||||
};
|
||||
|
||||
ref<EvalProfiler>
|
||||
makeSampleStackProfiler(const EvalState & state, std::filesystem::path profileFile, uint64_t frequency);
|
||||
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
///@file
|
||||
|
||||
#include "nix/expr/eval-profiler-settings.hh"
|
||||
#include "nix/util/configuration.hh"
|
||||
#include "nix/util/source-path.hh"
|
||||
|
||||
|
@ -193,6 +194,27 @@ struct EvalSettings : Config
|
|||
`flamegraph.pl`.
|
||||
)"};
|
||||
|
||||
Setting<EvalProfilerMode> evalProfilerMode{this, EvalProfilerMode::disabled, "eval-profiler",
|
||||
R"(
|
||||
Enables evaluation profiling. The following modes are supported:
|
||||
|
||||
* `flamegraph` stack sampling profiler. Outputs folded format, one line per stack (suitable for `flamegraph.pl` and compatible tools).
|
||||
|
||||
Use [`eval-profile-file`](#conf-eval-profile-file) to specify where the profile is saved.
|
||||
)"};
|
||||
|
||||
Setting<Path> evalProfileFile{this, "nix.profile", "eval-profile-file",
|
||||
R"(
|
||||
Specifies the file where [evaluation profile](#conf-eval-profiler) is saved.
|
||||
)"};
|
||||
|
||||
Setting<uint32_t> evalProfilerFrequency{this, 99, "eval-profiler-frequency",
|
||||
R"(
|
||||
Specifies the sampling rate in hertz for sampling evaluation profilers.
|
||||
Use `0` to sample the stack after each function call.
|
||||
See [`eval-profiler`](#conf-eval-profiler).
|
||||
)"};
|
||||
|
||||
Setting<bool> useEvalCache{this, true, "eval-cache",
|
||||
R"(
|
||||
Whether to use the flake evaluation cache.
|
||||
|
|
|
@ -731,7 +731,7 @@ public:
|
|||
*/
|
||||
void assertEqValues(Value & v1, Value & v2, const PosIdx pos, std::string_view errorCtx);
|
||||
|
||||
bool isFunctor(Value & fun);
|
||||
bool isFunctor(const Value & fun) const;
|
||||
|
||||
void callFunction(Value & fun, std::span<Value *> args, Value & vRes, const PosIdx pos);
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ headers = [config_pub_h] + files(
|
|||
'eval-error.hh',
|
||||
'eval-gc.hh',
|
||||
'eval-inline.hh',
|
||||
'eval-profiler-settings.hh',
|
||||
'eval-profiler.hh',
|
||||
'eval-settings.hh',
|
||||
'eval.hh',
|
||||
|
|
|
@ -140,6 +140,7 @@ sources = files(
|
|||
'eval-cache.cc',
|
||||
'eval-error.cc',
|
||||
'eval-gc.cc',
|
||||
'eval-profiler-settings.cc',
|
||||
'eval-profiler.cc',
|
||||
'eval-settings.cc',
|
||||
'eval.cc',
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue