From ea32580c9b50b9f3dbe47ae9dd2ae28d9bcabca9 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Sat, 28 Jun 2025 15:30:38 +0300 Subject: [PATCH 1/6] libexpr: Format value.hh The following commits will touch this file significantly, so it's better to get the formatting out of the way first. --- maintainers/flake-module.nix | 1 - src/libexpr/include/nix/expr/value.hh | 244 +++++++++++++++++--------- 2 files changed, 158 insertions(+), 87 deletions(-) diff --git a/maintainers/flake-module.nix b/maintainers/flake-module.nix index d443d1a74..1058d6334 100644 --- a/maintainers/flake-module.nix +++ b/maintainers/flake-module.nix @@ -256,7 +256,6 @@ ''^src/libexpr/include/nix/expr/value-to-json\.hh$'' ''^src/libexpr/value-to-xml\.cc$'' ''^src/libexpr/include/nix/expr/value-to-xml\.hh$'' - ''^src/libexpr/include/nix/expr/value\.hh$'' ''^src/libexpr/value/context\.cc$'' ''^src/libexpr/include/nix/expr/value/context\.hh$'' ''^src/libfetchers/attrs\.cc$'' diff --git a/src/libexpr/include/nix/expr/value.hh b/src/libexpr/include/nix/expr/value.hh index fcc118c7e..b67f63808 100644 --- a/src/libexpr/include/nix/expr/value.hh +++ b/src/libexpr/include/nix/expr/value.hh @@ -17,7 +17,6 @@ namespace nix { struct Value; class BindingsBuilder; - typedef enum { tUninitialized = 0, tInt = 1, @@ -54,7 +53,7 @@ typedef enum { nAttrs, nList, nFunction, - nExternal + nExternal, } ValueType; class Bindings; @@ -81,15 +80,15 @@ using NixFloat = double; */ class ExternalValueBase { - friend std::ostream & operator << (std::ostream & str, const ExternalValueBase & v); + friend std::ostream & operator<<(std::ostream & str, const ExternalValueBase & v); friend class Printer; - protected: +protected: /** * Print out the value */ virtual std::ostream & print(std::ostream & str) const = 0; - public: +public: /** * Return a simple string describing the type */ @@ -104,41 +103,44 @@ class ExternalValueBase * Coerce the value to a string. Defaults to uncoercable, i.e. throws an * error. */ - virtual std::string coerceToString(EvalState & state, const PosIdx & pos, NixStringContext & context, bool copyMore, bool copyToStore) const; + virtual std::string coerceToString( + EvalState & state, const PosIdx & pos, NixStringContext & context, bool copyMore, bool copyToStore) const; /** * Compare to another value of the same type. Defaults to uncomparable, * i.e. always false. */ - virtual bool operator ==(const ExternalValueBase & b) const noexcept; + virtual bool operator==(const ExternalValueBase & b) const noexcept; /** * Print the value as JSON. Defaults to unconvertable, i.e. throws an error */ - virtual nlohmann::json printValueAsJSON(EvalState & state, bool strict, - NixStringContext & context, bool copyToStore = true) const; + virtual nlohmann::json + printValueAsJSON(EvalState & state, bool strict, NixStringContext & context, bool copyToStore = true) const; /** * Print the value as XML. Defaults to unevaluated */ - virtual void printValueAsXML(EvalState & state, bool strict, bool location, - XMLWriter & doc, NixStringContext & context, PathSet & drvsSeen, + virtual void printValueAsXML( + EvalState & state, + bool strict, + bool location, + XMLWriter & doc, + NixStringContext & context, + PathSet & drvsSeen, const PosIdx pos) const; - virtual ~ExternalValueBase() - { - }; + virtual ~ExternalValueBase() {}; }; -std::ostream & operator << (std::ostream & str, const ExternalValueBase & v); - +std::ostream & operator<<(std::ostream & str, const ExternalValueBase & v); class ListBuilder { const size_t size; Value * inlineElems[2] = {nullptr, nullptr}; public: - Value * * elems; + Value ** elems; ListBuilder(EvalState & state, size_t size); // NOTE: Can be noexcept because we are just copying integral values and @@ -147,22 +149,28 @@ public: : size(x.size) , inlineElems{x.inlineElems[0], x.inlineElems[1]} , elems(size <= 2 ? inlineElems : x.elems) - { } + { + } - Value * & operator [](size_t n) + Value *& operator[](size_t n) { return elems[n]; } - typedef Value * * iterator; + typedef Value ** iterator; - iterator begin() { return &elems[0]; } - iterator end() { return &elems[size]; } + iterator begin() + { + return &elems[0]; + } + iterator end() + { + return &elems[size]; + } friend struct Value; }; - struct Value { private: @@ -177,21 +185,36 @@ public: */ static Value * toPtr(SymbolStr str) noexcept; - void print(EvalState &state, std::ostream &str, PrintOptions options = PrintOptions {}); + void print(EvalState & state, std::ostream & str, PrintOptions options = PrintOptions{}); // Functions needed to distinguish the type // These should be removed eventually, by putting the functionality that's // needed by callers into methods of this type // type() == nThunk - inline bool isThunk() const { return internalType == tThunk; }; - inline bool isApp() const { return internalType == tApp; }; + inline bool isThunk() const + { + return internalType == tThunk; + }; + inline bool isApp() const + { + return internalType == tApp; + }; inline bool isBlackhole() const; // type() == nFunction - inline bool isLambda() const { return internalType == tLambda; }; - inline bool isPrimOp() const { return internalType == tPrimOp; }; - inline bool isPrimOpApp() const { return internalType == tPrimOpApp; }; + inline bool isLambda() const + { + return internalType == tLambda; + }; + inline bool isPrimOp() const + { + return internalType == tPrimOp; + }; + inline bool isPrimOpApp() const + { + return internalType == tPrimOpApp; + }; /** * Strings in the evaluator carry a so-called `context` which @@ -215,26 +238,31 @@ public: * For canonicity, the store paths should be in sorted order. */ - struct StringWithContext { + struct StringWithContext + { const char * c_str; - const char * * context; // must be in sorted order + const char ** context; // must be in sorted order }; - struct Path { + struct Path + { SourceAccessor * accessor; const char * path; }; - struct ClosureThunk { + struct ClosureThunk + { Env * env; Expr * expr; }; - struct FunctionApplicationThunk { - Value * left, * right; + struct FunctionApplicationThunk + { + Value *left, *right; }; - struct Lambda { + struct Lambda + { Env * env; ExprLambda * fun; }; @@ -249,7 +277,8 @@ public: Path path; Bindings * attrs; - struct { + struct + { size_t size; Value * const * elems; } bigList; @@ -275,18 +304,35 @@ public: inline ValueType type(bool invalidIsThunk = false) const { switch (internalType) { - case tUninitialized: break; - case tInt: return nInt; - case tBool: return nBool; - case tString: return nString; - case tPath: return nPath; - case tNull: return nNull; - case tAttrs: return nAttrs; - case tList1: case tList2: case tListN: return nList; - case tLambda: case tPrimOp: case tPrimOpApp: return nFunction; - case tExternal: return nExternal; - case tFloat: return nFloat; - case tThunk: case tApp: return nThunk; + case tUninitialized: + break; + case tInt: + return nInt; + case tBool: + return nBool; + case tString: + return nString; + case tPath: + return nPath; + case tNull: + return nNull; + case tAttrs: + return nAttrs; + case tList1: + case tList2: + case tListN: + return nList; + case tLambda: + case tPrimOp: + case tPrimOpApp: + return nFunction; + case tExternal: + return nExternal; + case tFloat: + return nFloat; + case tThunk: + case tApp: + return nThunk; } if (invalidIsThunk) return nThunk; @@ -317,17 +363,17 @@ public: inline void mkInt(NixInt n) { - finishValue(tInt, { .integer = n }); + finishValue(tInt, {.integer = n}); } inline void mkBool(bool b) { - finishValue(tBool, { .boolean = b }); + finishValue(tBool, {.boolean = b}); } - inline void mkString(const char * s, const char * * context = 0) + inline void mkString(const char * s, const char ** context = 0) { - finishValue(tString, { .string = { .c_str = s, .context = context } }); + finishValue(tString, {.string = {.c_str = s, .context = context}}); } void mkString(std::string_view s); @@ -341,7 +387,7 @@ public: inline void mkPath(SourceAccessor * accessor, const char * path) { - finishValue(tPath, { .path = { .accessor = accessor, .path = path } }); + finishValue(tPath, {.path = {.accessor = accessor, .path = path}}); } inline void mkNull() @@ -351,7 +397,7 @@ public: inline void mkAttrs(Bindings * a) { - finishValue(tAttrs, { .attrs = a }); + finishValue(tAttrs, {.attrs = a}); } Value & mkAttrs(BindingsBuilder & bindings); @@ -359,26 +405,26 @@ public: void mkList(const ListBuilder & builder) { if (builder.size == 1) - finishValue(tList1, { .smallList = { builder.inlineElems[0] } }); + finishValue(tList1, {.smallList = {builder.inlineElems[0]}}); else if (builder.size == 2) - finishValue(tList2, { .smallList = { builder.inlineElems[0], builder.inlineElems[1] } }); + finishValue(tList2, {.smallList = {builder.inlineElems[0], builder.inlineElems[1]}}); else - finishValue(tListN, { .bigList = { .size = builder.size, .elems = builder.elems } }); + finishValue(tListN, {.bigList = {.size = builder.size, .elems = builder.elems}}); } inline void mkThunk(Env * e, Expr * ex) { - finishValue(tThunk, { .thunk = { .env = e, .expr = ex } }); + finishValue(tThunk, {.thunk = {.env = e, .expr = ex}}); } inline void mkApp(Value * l, Value * r) { - finishValue(tApp, { .app = { .left = l, .right = r } }); + finishValue(tApp, {.app = {.left = l, .right = r}}); } inline void mkLambda(Env * e, ExprLambda * f) { - finishValue(tLambda, { .lambda = { .env = e, .fun = f } }); + finishValue(tLambda, {.lambda = {.env = e, .fun = f}}); } inline void mkBlackhole(); @@ -387,7 +433,7 @@ public: inline void mkPrimOpApp(Value * l, Value * r) { - finishValue(tPrimOpApp, { .primOpApp = { .left = l, .right = r } }); + finishValue(tPrimOpApp, {.primOpApp = {.left = l, .right = r}}); } /** @@ -397,12 +443,12 @@ public: inline void mkExternal(ExternalValueBase * e) { - finishValue(tExternal, { .external = e }); + finishValue(tExternal, {.external = e}); } inline void mkFloat(NixFloat n) { - finishValue(tFloat, { .fpoint = n }); + finishValue(tFloat, {.fpoint = n}); } bool isList() const @@ -438,9 +484,7 @@ public: SourcePath path() const { assert(internalType == tPath); - return SourcePath( - ref(pathAccessor()->shared_from_this()), - CanonPath(CanonPath::unchecked_t(), pathStr())); + return SourcePath(ref(pathAccessor()->shared_from_this()), CanonPath(CanonPath::unchecked_t(), pathStr())); } std::string_view string_view() const @@ -455,54 +499,77 @@ public: return payload.string.c_str; } - const char * * context() const + const char ** context() const { return payload.string.context; } ExternalValueBase * external() const - { return payload.external; } + { + return payload.external; + } const Bindings * attrs() const - { return payload.attrs; } + { + return payload.attrs; + } const PrimOp * primOp() const - { return payload.primOp; } + { + return payload.primOp; + } bool boolean() const - { return payload.boolean; } + { + return payload.boolean; + } NixInt integer() const - { return payload.integer; } + { + return payload.integer; + } NixFloat fpoint() const - { return payload.fpoint; } + { + return payload.fpoint; + } Lambda lambda() const - { return payload.lambda; } + { + return payload.lambda; + } ClosureThunk thunk() const - { return payload.thunk; } + { + return payload.thunk; + } FunctionApplicationThunk primOpApp() const - { return payload.primOpApp; } + { + return payload.primOpApp; + } FunctionApplicationThunk app() const - { return payload.app; } + { + return payload.app; + } const char * pathStr() const - { return payload.path.path; } + { + return payload.path.path; + } SourceAccessor * pathAccessor() const - { return payload.path.accessor; } + { + return payload.path.accessor; + } }; - extern ExprBlackHole eBlackHole; bool Value::isBlackhole() const { - return internalType == tThunk && thunk().expr == (Expr*) &eBlackHole; + return internalType == tThunk && thunk().expr == (Expr *) &eBlackHole; } void Value::mkBlackhole() @@ -510,11 +577,16 @@ void Value::mkBlackhole() mkThunk(nullptr, (Expr *) &eBlackHole); } - typedef std::vector> ValueVector; -typedef std::unordered_map, std::equal_to, traceable_allocator>> ValueMap; -typedef std::map, traceable_allocator>> ValueVectorMap; - +typedef std::unordered_map< + Symbol, + Value *, + std::hash, + std::equal_to, + traceable_allocator>> + ValueMap; +typedef std::map, traceable_allocator>> + ValueVectorMap; /** * A value allocated in traceable memory. From 1a033ee4eee9804f5544a92a3fe8d66e77d3257c Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Sat, 28 Jun 2025 15:30:40 +0300 Subject: [PATCH 2/6] libexpr: Use single `tSmallList` `Value` discriminator for small lists --- src/libexpr/include/nix/expr/value.hh | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/src/libexpr/include/nix/expr/value.hh b/src/libexpr/include/nix/expr/value.hh index b67f63808..658cb5a6a 100644 --- a/src/libexpr/include/nix/expr/value.hh +++ b/src/libexpr/include/nix/expr/value.hh @@ -25,8 +25,7 @@ typedef enum { tPath, tNull, tAttrs, - tList1, - tList2, + tListSmall, tListN, tThunk, tApp, @@ -318,8 +317,7 @@ public: return nNull; case tAttrs: return nAttrs; - case tList1: - case tList2: + case tListSmall: case tListN: return nList; case tLambda: @@ -405,9 +403,9 @@ public: void mkList(const ListBuilder & builder) { if (builder.size == 1) - finishValue(tList1, {.smallList = {builder.inlineElems[0]}}); + finishValue(tListSmall, {.smallList = {builder.inlineElems[0], nullptr}}); else if (builder.size == 2) - finishValue(tList2, {.smallList = {builder.inlineElems[0], builder.inlineElems[1]}}); + finishValue(tListSmall, {.smallList = {builder.inlineElems[0], builder.inlineElems[1]}}); else finishValue(tListN, {.bigList = {.size = builder.size, .elems = builder.elems}}); } @@ -453,7 +451,7 @@ public: bool isList() const { - return internalType == tList1 || internalType == tList2 || internalType == tListN; + return internalType == tListSmall || internalType == tListN; } std::span listItems() const @@ -464,12 +462,12 @@ public: Value * const * listElems() const { - return internalType == tList1 || internalType == tList2 ? payload.smallList : payload.bigList.elems; + return internalType == tListSmall ? payload.smallList : payload.bigList.elems; } size_t listSize() const { - return internalType == tList1 ? 1 : internalType == tList2 ? 2 : payload.bigList.size; + return internalType == tListSmall ? (payload.smallList[1] == nullptr ? 1 : 2) : payload.bigList.size; } PosIdx determinePos(const PosIdx pos) const; From 810455f1b80959ce096ed8be5c7f3c50b58fc7d9 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Sat, 28 Jun 2025 15:30:43 +0300 Subject: [PATCH 3/6] libexpr: Simplify Value::is* methods by introducing isa function template --- src/libexpr/eval.cc | 9 +++--- src/libexpr/include/nix/expr/value.hh | 40 +++++++++++++++++---------- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index ae422a3d4..806b506a5 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -125,7 +125,7 @@ std::string showType(const Value & v) // Allow selecting a subset of enum values #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wswitch-enum" - switch (v.internalType) { + switch (v.getInternalType()) { case tString: return v.context() ? "a string with context" : "a string"; case tPrimOp: return fmt("the built-in function '%s'", std::string(v.primOp()->name)); @@ -145,7 +145,7 @@ PosIdx Value::determinePos(const PosIdx pos) const // Allow selecting a subset of enum values #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wswitch-enum" - switch (internalType) { + switch (getInternalType()) { case tAttrs: return attrs()->pos; case tLambda: return lambda().fun->pos; case tApp: return app().left->determinePos(pos); @@ -157,9 +157,8 @@ PosIdx Value::determinePos(const PosIdx pos) const bool Value::isTrivial() const { return - internalType != tApp - && internalType != tPrimOpApp - && (internalType != tThunk + !isa() + && (!isa() || (dynamic_cast(thunk().expr) && ((ExprAttrs *) thunk().expr)->dynamicAttrs.empty()) || dynamic_cast(thunk().expr) diff --git a/src/libexpr/include/nix/expr/value.hh b/src/libexpr/include/nix/expr/value.hh index 658cb5a6a..0af9e1ea5 100644 --- a/src/libexpr/include/nix/expr/value.hh +++ b/src/libexpr/include/nix/expr/value.hh @@ -175,6 +175,18 @@ struct Value private: InternalType internalType = tUninitialized; + InternalType getInternalType() const noexcept + { + return internalType; + } + + /** Check if currently stored value type is either of Vs. */ + template + bool isa() const noexcept + { + return ((getInternalType() == Vs) || ...); + } + friend std::string showType(const Value & v); public: @@ -193,26 +205,26 @@ public: // type() == nThunk inline bool isThunk() const { - return internalType == tThunk; + return isa(); }; inline bool isApp() const { - return internalType == tApp; + return isa(); }; inline bool isBlackhole() const; // type() == nFunction inline bool isLambda() const { - return internalType == tLambda; + return isa(); }; inline bool isPrimOp() const { - return internalType == tPrimOp; + return isa(); }; inline bool isPrimOpApp() const { - return internalType == tPrimOpApp; + return isa(); }; /** @@ -302,7 +314,7 @@ public: */ inline ValueType type(bool invalidIsThunk = false) const { - switch (internalType) { + switch (getInternalType()) { case tUninitialized: break; case tInt: @@ -351,7 +363,7 @@ public: */ inline bool isValid() const { - return internalType != tUninitialized; + return !isa(); } inline void mkInt(NixInt::Inner n) @@ -451,7 +463,7 @@ public: bool isList() const { - return internalType == tListSmall || internalType == tListN; + return isa(); } std::span listItems() const @@ -462,12 +474,12 @@ public: Value * const * listElems() const { - return internalType == tListSmall ? payload.smallList : payload.bigList.elems; + return isa() ? payload.smallList : payload.bigList.elems; } size_t listSize() const { - return internalType == tListSmall ? (payload.smallList[1] == nullptr ? 1 : 2) : payload.bigList.size; + return isa() ? (payload.smallList[1] == nullptr ? 1 : 2) : payload.bigList.size; } PosIdx determinePos(const PosIdx pos) const; @@ -481,19 +493,19 @@ public: SourcePath path() const { - assert(internalType == tPath); + assert(isa()); return SourcePath(ref(pathAccessor()->shared_from_this()), CanonPath(CanonPath::unchecked_t(), pathStr())); } std::string_view string_view() const { - assert(internalType == tString); + assert(isa()); return std::string_view(payload.string.c_str); } const char * c_str() const { - assert(internalType == tString); + assert(isa()); return payload.string.c_str; } @@ -567,7 +579,7 @@ extern ExprBlackHole eBlackHole; bool Value::isBlackhole() const { - return internalType == tThunk && thunk().expr == (Expr *) &eBlackHole; + return isa() && thunk().expr == (Expr *) &eBlackHole; } void Value::mkBlackhole() From c39cc004043b95d55a0c2c2bdba58d6d3e0db846 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Sat, 28 Jun 2025 16:18:18 +0300 Subject: [PATCH 4/6] libexpr: Factor out `Payload` union to a default implementation of `ValueStorage` This factors out most of the value representation into a mixin class. `finishValue` is now gone for good and replaced with a simple template function `setStorage` which derives the type information/disriminator from the type of the argument. Likewise, reading of the value goes through function template `getStorage`. An empty type `Null` is introduced to make the bijection InternalType <-> C++ type complete. --- src/libexpr/eval.cc | 6 +- src/libexpr/include/nix/expr/value.hh | 414 ++++++++++++++++---------- src/libexpr/primops.cc | 2 +- 3 files changed, 258 insertions(+), 164 deletions(-) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index 806b506a5..fba1e6665 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -502,7 +502,7 @@ void EvalState::addConstant(const std::string & name, Value * v, Constant info) /* Install value the base environment. */ staticBaseEnv->vars.emplace_back(symbols.create(name), baseEnvDispl); baseEnv.values[baseEnvDispl++] = v; - getBuiltins().payload.attrs->push_back(Attr(symbols.create(name2), v)); + const_cast(getBuiltins().attrs())->push_back(Attr(symbols.create(name2), v)); } } @@ -540,7 +540,7 @@ const PrimOp * Value::primOpAppPrimOp() const void Value::mkPrimOp(PrimOp * p) { p->check(); - finishValue(tPrimOp, { .primOp = p }); + setStorage(p); } @@ -572,7 +572,7 @@ Value * EvalState::addPrimOp(PrimOp && primOp) else { staticBaseEnv->vars.emplace_back(envName, baseEnvDispl); baseEnv.values[baseEnvDispl++] = v; - getBuiltins().payload.attrs->push_back(Attr(symbols.create(primOp.name), v)); + const_cast(getBuiltins().attrs())->push_back(Attr(symbols.create(primOp.name), v)); } return v; diff --git a/src/libexpr/include/nix/expr/value.hh b/src/libexpr/include/nix/expr/value.hh index 0af9e1ea5..642459463 100644 --- a/src/libexpr/include/nix/expr/value.hh +++ b/src/libexpr/include/nix/expr/value.hh @@ -3,6 +3,7 @@ #include #include +#include #include "nix/expr/eval-gc.hh" #include "nix/expr/value/context.hh" @@ -170,24 +171,205 @@ public: friend struct Value; }; -struct Value +namespace detail { + +/** + * Implementation mixin class for defining the public types + * In can be inherited from by the actual ValueStorage implementations + * for free due to Empty Base Class Optimization (EBCO). + */ +struct ValueBase { + /** + * Strings in the evaluator carry a so-called `context` which + * is a list of strings representing store paths. This is to + * allow users to write things like + * + * "--with-freetype2-library=" + freetype + "/lib" + * + * where `freetype` is a derivation (or a source to be copied + * to the store). If we just concatenated the strings without + * keeping track of the referenced store paths, then if the + * string is used as a derivation attribute, the derivation + * will not have the correct dependencies in its inputDrvs and + * inputSrcs. + + * The semantics of the context is as follows: when a string + * with context C is used as a derivation attribute, then the + * derivations in C will be added to the inputDrvs of the + * derivation, and the other store paths in C will be added to + * the inputSrcs of the derivations. + + * For canonicity, the store paths should be in sorted order. + */ + struct StringWithContext + { + const char * c_str; + const char ** context; // must be in sorted order + }; + + struct Path + { + SourceAccessor * accessor; + const char * path; + }; + + struct Null + {}; + + struct ClosureThunk + { + Env * env; + Expr * expr; + }; + + struct FunctionApplicationThunk + { + Value *left, *right; + }; + + /** + * Like FunctionApplicationThunk, but must be a distinct type in order to + * resolve overloads to `tPrimOpApp` instead of `tApp`. + * This type helps with the efficient implementation of arity>=2 primop calls. + */ + struct PrimOpApplicationThunk + { + Value *left, *right; + }; + + struct Lambda + { + Env * env; + ExprLambda * fun; + }; + + using SmallList = std::array; + + struct List + { + size_t size; + Value * const * elems; + }; +}; + +template +struct PayloadTypeToInternalType +{}; + +/** + * All stored types must be distinct (not type aliases) for the purposes of + * overload resolution in setStorage. This ensures there's a bijection from + * InternalType <-> C++ type. + */ +#define NIX_VALUE_STORAGE_FOR_EACH_FIELD(MACRO) \ + MACRO(NixInt, integer, tInt) \ + MACRO(bool, boolean, tBool) \ + MACRO(ValueBase::StringWithContext, string, tString) \ + MACRO(ValueBase::Path, path, tPath) \ + MACRO(ValueBase::Null, null_, tNull) \ + MACRO(Bindings *, attrs, tAttrs) \ + MACRO(ValueBase::List, bigList, tListN) \ + MACRO(ValueBase::SmallList, smallList, tListSmall) \ + MACRO(ValueBase::ClosureThunk, thunk, tThunk) \ + MACRO(ValueBase::FunctionApplicationThunk, app, tApp) \ + MACRO(ValueBase::Lambda, lambda, tLambda) \ + MACRO(PrimOp *, primOp, tPrimOp) \ + MACRO(ValueBase::PrimOpApplicationThunk, primOpApp, tPrimOpApp) \ + MACRO(ExternalValueBase *, external, tExternal) \ + MACRO(NixFloat, fpoint, tFloat) + +#define NIX_VALUE_PAYLOAD_TYPE(T, FIELD_NAME, DISCRIMINATOR) \ + template<> \ + struct PayloadTypeToInternalType \ + { \ + static constexpr InternalType value = DISCRIMINATOR; \ + }; + +NIX_VALUE_STORAGE_FOR_EACH_FIELD(NIX_VALUE_PAYLOAD_TYPE) + +#undef NIX_VALUE_PAYLOAD_TYPE + +template +inline constexpr InternalType payloadTypeToInternalType = PayloadTypeToInternalType::value; + +} + +/** + * Discriminated union of types stored in the value. + * The union discriminator is @ref InternalType enumeration. + * + * This class can be specialized with a non-type template parameter + * of pointer size for more optimized data layouts on when pointer alignment + * bits can be used for storing the discriminator. + * + * All specializations of this type need to implement getStorage, setStorage and + * getInternalType methods. + */ +template +class ValueStorage : public detail::ValueBase +{ +protected: + using Payload = union + { +#define NIX_VALUE_STORAGE_DEFINE_FIELD(T, FIELD_NAME, DISCRIMINATOR) T FIELD_NAME; + NIX_VALUE_STORAGE_FOR_EACH_FIELD(NIX_VALUE_STORAGE_DEFINE_FIELD) +#undef NIX_VALUE_STORAGE_DEFINE_FIELD + }; + + Payload payload; + private: InternalType internalType = tUninitialized; +public: +#define NIX_VALUE_STORAGE_GET_IMPL(K, FIELD_NAME, DISCRIMINATOR) \ + void getStorage(K & val) const noexcept \ + { \ + assert(internalType == DISCRIMINATOR); \ + val = payload.FIELD_NAME; \ + } + +#define NIX_VALUE_STORAGE_SET_IMPL(K, FIELD_NAME, DISCRIMINATOR) \ + void setStorage(K val) noexcept \ + { \ + payload.FIELD_NAME = val; \ + internalType = DISCRIMINATOR; \ + } + + NIX_VALUE_STORAGE_FOR_EACH_FIELD(NIX_VALUE_STORAGE_GET_IMPL) + NIX_VALUE_STORAGE_FOR_EACH_FIELD(NIX_VALUE_STORAGE_SET_IMPL) + +#undef NIX_VALUE_STORAGE_SET_IMPL +#undef NIX_VALUE_STORAGE_GET_IMPL +#undef NIX_VALUE_STORAGE_FOR_EACH_FIELD + + /** Get internal type currently occupying the storage. */ InternalType getInternalType() const noexcept { return internalType; } +}; - /** Check if currently stored value type is either of Vs. */ - template +struct Value : public ValueStorage +{ + friend std::string showType(const Value & v); + + template bool isa() const noexcept { - return ((getInternalType() == Vs) || ...); + return ((getInternalType() == discriminator) || ...); } - friend std::string showType(const Value & v); + template + T getStorage() const noexcept + { + if (getInternalType() != detail::payloadTypeToInternalType) [[unlikely]] + unreachable(); + T out; + ValueStorage::getStorage(out); + return out; + } public: @@ -227,84 +409,6 @@ public: return isa(); }; - /** - * Strings in the evaluator carry a so-called `context` which - * is a list of strings representing store paths. This is to - * allow users to write things like - * - * "--with-freetype2-library=" + freetype + "/lib" - * - * where `freetype` is a derivation (or a source to be copied - * to the store). If we just concatenated the strings without - * keeping track of the referenced store paths, then if the - * string is used as a derivation attribute, the derivation - * will not have the correct dependencies in its inputDrvs and - * inputSrcs. - - * The semantics of the context is as follows: when a string - * with context C is used as a derivation attribute, then the - * derivations in C will be added to the inputDrvs of the - * derivation, and the other store paths in C will be added to - * the inputSrcs of the derivations. - - * For canonicity, the store paths should be in sorted order. - */ - struct StringWithContext - { - const char * c_str; - const char ** context; // must be in sorted order - }; - - struct Path - { - SourceAccessor * accessor; - const char * path; - }; - - struct ClosureThunk - { - Env * env; - Expr * expr; - }; - - struct FunctionApplicationThunk - { - Value *left, *right; - }; - - struct Lambda - { - Env * env; - ExprLambda * fun; - }; - - using Payload = union - { - NixInt integer; - bool boolean; - - StringWithContext string; - - Path path; - - Bindings * attrs; - struct - { - size_t size; - Value * const * elems; - } bigList; - Value * smallList[2]; - ClosureThunk thunk; - FunctionApplicationThunk app; - Lambda lambda; - PrimOp * primOp; - FunctionApplicationThunk primOpApp; - ExternalValueBase * external; - NixFloat fpoint; - }; - - Payload payload; - /** * Returns the normal type of a Value. This only returns nThunk if * the Value hasn't been forceValue'd @@ -350,40 +454,34 @@ public: unreachable(); } - inline void finishValue(InternalType newType, Payload newPayload) - { - payload = newPayload; - internalType = newType; - } - /** * A value becomes valid when it is initialized. We don't use this * in the evaluator; only in the bindings, where the slight extra * cost is warranted because of inexperienced callers. */ - inline bool isValid() const + inline bool isValid() const noexcept { return !isa(); } - inline void mkInt(NixInt::Inner n) + inline void mkInt(NixInt::Inner n) noexcept { mkInt(NixInt{n}); } - inline void mkInt(NixInt n) + inline void mkInt(NixInt n) noexcept { - finishValue(tInt, {.integer = n}); + setStorage(NixInt{n}); } - inline void mkBool(bool b) + inline void mkBool(bool b) noexcept { - finishValue(tBool, {.boolean = b}); + setStorage(b); } - inline void mkString(const char * s, const char ** context = 0) + inline void mkString(const char * s, const char ** context = 0) noexcept { - finishValue(tString, {.string = {.c_str = s, .context = context}}); + setStorage(StringWithContext{.c_str = s, .context = context}); } void mkString(std::string_view s); @@ -395,55 +493,55 @@ public: void mkPath(const SourcePath & path); void mkPath(std::string_view path); - inline void mkPath(SourceAccessor * accessor, const char * path) + inline void mkPath(SourceAccessor * accessor, const char * path) noexcept { - finishValue(tPath, {.path = {.accessor = accessor, .path = path}}); + setStorage(Path{.accessor = accessor, .path = path}); } - inline void mkNull() + inline void mkNull() noexcept { - finishValue(tNull, {}); + setStorage(Null{}); } - inline void mkAttrs(Bindings * a) + inline void mkAttrs(Bindings * a) noexcept { - finishValue(tAttrs, {.attrs = a}); + setStorage(a); } Value & mkAttrs(BindingsBuilder & bindings); - void mkList(const ListBuilder & builder) + void mkList(const ListBuilder & builder) noexcept { if (builder.size == 1) - finishValue(tListSmall, {.smallList = {builder.inlineElems[0], nullptr}}); + setStorage(std::array{builder.inlineElems[0], nullptr}); else if (builder.size == 2) - finishValue(tListSmall, {.smallList = {builder.inlineElems[0], builder.inlineElems[1]}}); + setStorage(std::array{builder.inlineElems[0], builder.inlineElems[1]}); else - finishValue(tListN, {.bigList = {.size = builder.size, .elems = builder.elems}}); + setStorage(List{.size = builder.size, .elems = builder.elems}); } - inline void mkThunk(Env * e, Expr * ex) + inline void mkThunk(Env * e, Expr * ex) noexcept { - finishValue(tThunk, {.thunk = {.env = e, .expr = ex}}); + setStorage(ClosureThunk{.env = e, .expr = ex}); } - inline void mkApp(Value * l, Value * r) + inline void mkApp(Value * l, Value * r) noexcept { - finishValue(tApp, {.app = {.left = l, .right = r}}); + setStorage(FunctionApplicationThunk{.left = l, .right = r}); } - inline void mkLambda(Env * e, ExprLambda * f) + inline void mkLambda(Env * e, ExprLambda * f) noexcept { - finishValue(tLambda, {.lambda = {.env = e, .fun = f}}); + setStorage(Lambda{.env = e, .fun = f}); } inline void mkBlackhole(); void mkPrimOp(PrimOp * p); - inline void mkPrimOpApp(Value * l, Value * r) + inline void mkPrimOpApp(Value * l, Value * r) noexcept { - finishValue(tPrimOpApp, {.primOpApp = {.left = l, .right = r}}); + setStorage(PrimOpApplicationThunk{.left = l, .right = r}); } /** @@ -451,35 +549,35 @@ public: */ const PrimOp * primOpAppPrimOp() const; - inline void mkExternal(ExternalValueBase * e) + inline void mkExternal(ExternalValueBase * e) noexcept { - finishValue(tExternal, {.external = e}); + setStorage(e); } - inline void mkFloat(NixFloat n) + inline void mkFloat(NixFloat n) noexcept { - finishValue(tFloat, {.fpoint = n}); + setStorage(n); } - bool isList() const + bool isList() const noexcept { return isa(); } - std::span listItems() const + std::span listItems() const noexcept { assert(isList()); return std::span(listElems(), listSize()); } - Value * const * listElems() const + Value * const * listElems() const noexcept { - return isa() ? payload.smallList : payload.bigList.elems; + return isa() ? payload.smallList.data() : getStorage().elems; } - size_t listSize() const + size_t listSize() const noexcept { - return isa() ? (payload.smallList[1] == nullptr ? 1 : 2) : payload.bigList.size; + return isa() ? (getStorage()[1] == nullptr ? 1 : 2) : getStorage().size; } PosIdx determinePos(const PosIdx pos) const; @@ -493,85 +591,82 @@ public: SourcePath path() const { - assert(isa()); return SourcePath(ref(pathAccessor()->shared_from_this()), CanonPath(CanonPath::unchecked_t(), pathStr())); } - std::string_view string_view() const + std::string_view string_view() const noexcept { - assert(isa()); - return std::string_view(payload.string.c_str); + return std::string_view(getStorage().c_str); } - const char * c_str() const + const char * c_str() const noexcept { - assert(isa()); - return payload.string.c_str; + return getStorage().c_str; } - const char ** context() const + const char ** context() const noexcept { - return payload.string.context; + return getStorage().context; } - ExternalValueBase * external() const + ExternalValueBase * external() const noexcept { - return payload.external; + return getStorage(); } - const Bindings * attrs() const + const Bindings * attrs() const noexcept { - return payload.attrs; + return getStorage(); } - const PrimOp * primOp() const + const PrimOp * primOp() const noexcept { - return payload.primOp; + return getStorage(); } - bool boolean() const + bool boolean() const noexcept { - return payload.boolean; + return getStorage(); } - NixInt integer() const + NixInt integer() const noexcept { - return payload.integer; + return getStorage(); } - NixFloat fpoint() const + NixFloat fpoint() const noexcept { - return payload.fpoint; + return getStorage(); } - Lambda lambda() const + Lambda lambda() const noexcept { - return payload.lambda; + return getStorage(); } - ClosureThunk thunk() const + ClosureThunk thunk() const noexcept { - return payload.thunk; + return getStorage(); } - FunctionApplicationThunk primOpApp() const + PrimOpApplicationThunk primOpApp() const noexcept { - return payload.primOpApp; + return getStorage(); } - FunctionApplicationThunk app() const + FunctionApplicationThunk app() const noexcept { - return payload.app; + return getStorage(); } - const char * pathStr() const + const char * pathStr() const noexcept { - return payload.path.path; + return getStorage().path; } - SourceAccessor * pathAccessor() const + SourceAccessor * pathAccessor() const noexcept { - return payload.path.accessor; + return getStorage().accessor; } }; @@ -579,7 +674,7 @@ extern ExprBlackHole eBlackHole; bool Value::isBlackhole() const { - return isa() && thunk().expr == (Expr *) &eBlackHole; + return isThunk() && thunk().expr == (Expr *) &eBlackHole; } void Value::mkBlackhole() @@ -606,5 +701,4 @@ typedef std::shared_ptr RootValue; RootValue allocRootValue(Value * v); void forceNoNullByte(std::string_view s, std::function = nullptr); - } diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 588b1fb6d..40ff250a9 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -5050,7 +5050,7 @@ void EvalState::createBaseEnv(const EvalSettings & evalSettings) /* Now that we've added all primops, sort the `builtins' set, because attribute lookups expect it to be sorted. */ - getBuiltins().payload.attrs->sort(); + const_cast(getBuiltins().attrs())->sort(); staticBaseEnv->sort(); From e73fcf7b5300c045a60e42c63e6d445b445426c4 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Wed, 2 Jul 2025 21:57:02 +0300 Subject: [PATCH 5/6] libexpr: Use proxy ListView for all Value list accesses This also makes it possible to make `payload` field private in the `ValueStorage` class template. --- src/libexpr-c/nix_api_value.cc | 2 +- src/libexpr-tests/primops.cc | 101 +++++++------ src/libexpr/attr-path.cc | 2 +- src/libexpr/eval-cache.cc | 2 +- src/libexpr/eval.cc | 14 +- src/libexpr/get-drvs.cc | 9 +- src/libexpr/include/nix/expr/value.hh | 199 ++++++++++++++++++++++++-- src/libexpr/primops.cc | 72 +++++----- src/libexpr/primops/context.cc | 2 +- src/libexpr/print-ambiguous.cc | 6 +- src/libexpr/print.cc | 4 +- src/libexpr/value-to-json.cc | 2 +- src/libexpr/value-to-xml.cc | 2 +- src/libflake/flake.cc | 2 +- src/nix-env/nix-env.cc | 2 +- src/nix/prefetch.cc | 4 +- 16 files changed, 309 insertions(+), 116 deletions(-) diff --git a/src/libexpr-c/nix_api_value.cc b/src/libexpr-c/nix_api_value.cc index 8afe35a4b..fb90e2872 100644 --- a/src/libexpr-c/nix_api_value.cc +++ b/src/libexpr-c/nix_api_value.cc @@ -324,7 +324,7 @@ nix_value * nix_get_list_byidx(nix_c_context * context, const nix_value * value, try { auto & v = check_value_in(value); assert(v.type() == nix::nList); - auto * p = v.listElems()[ix]; + auto * p = v.listView()[ix]; nix_gc_incref(nullptr, p); if (p != nullptr) state->state.forceValue(*p, nix::noPos); diff --git a/src/libexpr-tests/primops.cc b/src/libexpr-tests/primops.cc index 6c301f157..9b5590d8d 100644 --- a/src/libexpr-tests/primops.cc +++ b/src/libexpr-tests/primops.cc @@ -150,8 +150,8 @@ namespace nix { TEST_F(PrimOpTest, attrValues) { auto v = eval("builtins.attrValues { x = \"foo\"; a = 1; }"); ASSERT_THAT(v, IsListOfSize(2)); - ASSERT_THAT(*v.listElems()[0], IsIntEq(1)); - ASSERT_THAT(*v.listElems()[1], IsStringEq("foo")); + ASSERT_THAT(*v.listView()[0], IsIntEq(1)); + ASSERT_THAT(*v.listView()[1], IsStringEq("foo")); } TEST_F(PrimOpTest, getAttr) { @@ -250,8 +250,8 @@ namespace nix { TEST_F(PrimOpTest, catAttrs) { auto v = eval("builtins.catAttrs \"a\" [{a = 1;} {b = 0;} {a = 2;}]"); ASSERT_THAT(v, IsListOfSize(2)); - ASSERT_THAT(*v.listElems()[0], IsIntEq(1)); - ASSERT_THAT(*v.listElems()[1], IsIntEq(2)); + ASSERT_THAT(*v.listView()[0], IsIntEq(1)); + ASSERT_THAT(*v.listView()[1], IsIntEq(2)); } TEST_F(PrimOpTest, functionArgs) { @@ -320,7 +320,8 @@ namespace nix { TEST_F(PrimOpTest, tail) { auto v = eval("builtins.tail [ 3 2 1 0 ]"); ASSERT_THAT(v, IsListOfSize(3)); - for (const auto [n, elem] : enumerate(v.listItems())) + auto listView = v.listView(); + for (const auto [n, elem] : enumerate(listView)) ASSERT_THAT(*elem, IsIntEq(2 - static_cast(n))); } @@ -331,17 +332,17 @@ namespace nix { TEST_F(PrimOpTest, map) { auto v = eval("map (x: \"foo\" + x) [ \"bar\" \"bla\" \"abc\" ]"); ASSERT_THAT(v, IsListOfSize(3)); - auto elem = v.listElems()[0]; + auto elem = v.listView()[0]; ASSERT_THAT(*elem, IsThunk()); state.forceValue(*elem, noPos); ASSERT_THAT(*elem, IsStringEq("foobar")); - elem = v.listElems()[1]; + elem = v.listView()[1]; ASSERT_THAT(*elem, IsThunk()); state.forceValue(*elem, noPos); ASSERT_THAT(*elem, IsStringEq("foobla")); - elem = v.listElems()[2]; + elem = v.listView()[2]; ASSERT_THAT(*elem, IsThunk()); state.forceValue(*elem, noPos); ASSERT_THAT(*elem, IsStringEq("fooabc")); @@ -350,7 +351,7 @@ namespace nix { TEST_F(PrimOpTest, filter) { auto v = eval("builtins.filter (x: x == 2) [ 3 2 3 2 3 2 ]"); ASSERT_THAT(v, IsListOfSize(3)); - for (const auto elem : v.listItems()) + for (const auto elem : v.listView()) ASSERT_THAT(*elem, IsIntEq(2)); } @@ -367,7 +368,8 @@ namespace nix { TEST_F(PrimOpTest, concatLists) { auto v = eval("builtins.concatLists [[1 2] [3 4]]"); ASSERT_THAT(v, IsListOfSize(4)); - for (const auto [i, elem] : enumerate(v.listItems())) + auto listView = v.listView(); + for (const auto [i, elem] : enumerate(listView)) ASSERT_THAT(*elem, IsIntEq(static_cast(i)+1)); } @@ -405,7 +407,8 @@ namespace nix { auto v = eval("builtins.genList (x: x + 1) 3"); ASSERT_EQ(v.type(), nList); ASSERT_EQ(v.listSize(), 3u); - for (const auto [i, elem] : enumerate(v.listItems())) { + auto listView = v.listView(); + for (const auto [i, elem] : enumerate(listView)) { ASSERT_THAT(*elem, IsThunk()); state.forceValue(*elem, noPos); ASSERT_THAT(*elem, IsIntEq(static_cast(i)+1)); @@ -418,7 +421,8 @@ namespace nix { ASSERT_EQ(v.listSize(), 6u); const std::vector numbers = { 42, 77, 147, 249, 483, 526 }; - for (const auto [n, elem] : enumerate(v.listItems())) + auto listView = v.listView(); + for (const auto [n, elem] : enumerate(listView)) ASSERT_THAT(*elem, IsIntEq(numbers[n])); } @@ -429,17 +433,17 @@ namespace nix { auto right = v.attrs()->get(createSymbol("right")); ASSERT_NE(right, nullptr); ASSERT_THAT(*right->value, IsListOfSize(2)); - ASSERT_THAT(*right->value->listElems()[0], IsIntEq(23)); - ASSERT_THAT(*right->value->listElems()[1], IsIntEq(42)); + ASSERT_THAT(*right->value->listView()[0], IsIntEq(23)); + ASSERT_THAT(*right->value->listView()[1], IsIntEq(42)); auto wrong = v.attrs()->get(createSymbol("wrong")); ASSERT_NE(wrong, nullptr); ASSERT_EQ(wrong->value->type(), nList); ASSERT_EQ(wrong->value->listSize(), 3u); ASSERT_THAT(*wrong->value, IsListOfSize(3)); - ASSERT_THAT(*wrong->value->listElems()[0], IsIntEq(1)); - ASSERT_THAT(*wrong->value->listElems()[1], IsIntEq(9)); - ASSERT_THAT(*wrong->value->listElems()[2], IsIntEq(3)); + ASSERT_THAT(*wrong->value->listView()[0], IsIntEq(1)); + ASSERT_THAT(*wrong->value->listView()[1], IsIntEq(9)); + ASSERT_THAT(*wrong->value->listView()[2], IsIntEq(3)); } TEST_F(PrimOpTest, concatMap) { @@ -448,7 +452,8 @@ namespace nix { ASSERT_EQ(v.listSize(), 6u); const std::vector numbers = { 1, 2, 0, 3, 4, 0 }; - for (const auto [n, elem] : enumerate(v.listItems())) + auto listView = v.listView(); + for (const auto [n, elem] : enumerate(listView)) ASSERT_THAT(*elem, IsIntEq(numbers[n])); } @@ -682,7 +687,8 @@ namespace nix { ASSERT_THAT(v, IsListOfSize(4)); const std::vector strings = { "1", "2", "3", "git" }; - for (const auto [n, p] : enumerate(v.listItems())) + auto listView = v.listView(); + for (const auto [n, p] : enumerate(listView)) ASSERT_THAT(*p, IsStringEq(strings[n])); } @@ -772,12 +778,12 @@ namespace nix { auto v = eval("builtins.split \"(a)b\" \"abc\""); ASSERT_THAT(v, IsListOfSize(3)); - ASSERT_THAT(*v.listElems()[0], IsStringEq("")); + ASSERT_THAT(*v.listView()[0], IsStringEq("")); - ASSERT_THAT(*v.listElems()[1], IsListOfSize(1)); - ASSERT_THAT(*v.listElems()[1]->listElems()[0], IsStringEq("a")); + ASSERT_THAT(*v.listView()[1], IsListOfSize(1)); + ASSERT_THAT(*v.listView()[1]->listView()[0], IsStringEq("a")); - ASSERT_THAT(*v.listElems()[2], IsStringEq("c")); + ASSERT_THAT(*v.listView()[2], IsStringEq("c")); } TEST_F(PrimOpTest, split2) { @@ -785,17 +791,17 @@ namespace nix { auto v = eval("builtins.split \"([ac])\" \"abc\""); ASSERT_THAT(v, IsListOfSize(5)); - ASSERT_THAT(*v.listElems()[0], IsStringEq("")); + ASSERT_THAT(*v.listView()[0], IsStringEq("")); - ASSERT_THAT(*v.listElems()[1], IsListOfSize(1)); - ASSERT_THAT(*v.listElems()[1]->listElems()[0], IsStringEq("a")); + ASSERT_THAT(*v.listView()[1], IsListOfSize(1)); + ASSERT_THAT(*v.listView()[1]->listView()[0], IsStringEq("a")); - ASSERT_THAT(*v.listElems()[2], IsStringEq("b")); + ASSERT_THAT(*v.listView()[2], IsStringEq("b")); - ASSERT_THAT(*v.listElems()[3], IsListOfSize(1)); - ASSERT_THAT(*v.listElems()[3]->listElems()[0], IsStringEq("c")); + ASSERT_THAT(*v.listView()[3], IsListOfSize(1)); + ASSERT_THAT(*v.listView()[3]->listView()[0], IsStringEq("c")); - ASSERT_THAT(*v.listElems()[4], IsStringEq("")); + ASSERT_THAT(*v.listView()[4], IsStringEq("")); } TEST_F(PrimOpTest, split3) { @@ -803,36 +809,36 @@ namespace nix { ASSERT_THAT(v, IsListOfSize(5)); // First list element - ASSERT_THAT(*v.listElems()[0], IsStringEq("")); + ASSERT_THAT(*v.listView()[0], IsStringEq("")); // 2nd list element is a list [ "" null ] - ASSERT_THAT(*v.listElems()[1], IsListOfSize(2)); - ASSERT_THAT(*v.listElems()[1]->listElems()[0], IsStringEq("a")); - ASSERT_THAT(*v.listElems()[1]->listElems()[1], IsNull()); + ASSERT_THAT(*v.listView()[1], IsListOfSize(2)); + ASSERT_THAT(*v.listView()[1]->listView()[0], IsStringEq("a")); + ASSERT_THAT(*v.listView()[1]->listView()[1], IsNull()); // 3rd element - ASSERT_THAT(*v.listElems()[2], IsStringEq("b")); + ASSERT_THAT(*v.listView()[2], IsStringEq("b")); // 4th element is a list: [ null "c" ] - ASSERT_THAT(*v.listElems()[3], IsListOfSize(2)); - ASSERT_THAT(*v.listElems()[3]->listElems()[0], IsNull()); - ASSERT_THAT(*v.listElems()[3]->listElems()[1], IsStringEq("c")); + ASSERT_THAT(*v.listView()[3], IsListOfSize(2)); + ASSERT_THAT(*v.listView()[3]->listView()[0], IsNull()); + ASSERT_THAT(*v.listView()[3]->listView()[1], IsStringEq("c")); // 5th element is the empty string - ASSERT_THAT(*v.listElems()[4], IsStringEq("")); + ASSERT_THAT(*v.listView()[4], IsStringEq("")); } TEST_F(PrimOpTest, split4) { auto v = eval("builtins.split \"([[:upper:]]+)\" \" FOO \""); ASSERT_THAT(v, IsListOfSize(3)); - auto first = v.listElems()[0]; - auto second = v.listElems()[1]; - auto third = v.listElems()[2]; + auto first = v.listView()[0]; + auto second = v.listView()[1]; + auto third = v.listView()[2]; ASSERT_THAT(*first, IsStringEq(" ")); ASSERT_THAT(*second, IsListOfSize(1)); - ASSERT_THAT(*second->listElems()[0], IsStringEq("FOO")); + ASSERT_THAT(*second->listView()[0], IsStringEq("FOO")); ASSERT_THAT(*third, IsStringEq(" ")); } @@ -850,14 +856,14 @@ namespace nix { TEST_F(PrimOpTest, match3) { auto v = eval("builtins.match \"a(b)(c)\" \"abc\""); ASSERT_THAT(v, IsListOfSize(2)); - ASSERT_THAT(*v.listElems()[0], IsStringEq("b")); - ASSERT_THAT(*v.listElems()[1], IsStringEq("c")); + ASSERT_THAT(*v.listView()[0], IsStringEq("b")); + ASSERT_THAT(*v.listView()[1], IsStringEq("c")); } TEST_F(PrimOpTest, match4) { auto v = eval("builtins.match \"[[:space:]]+([[:upper:]]+)[[:space:]]+\" \" FOO \""); ASSERT_THAT(v, IsListOfSize(1)); - ASSERT_THAT(*v.listElems()[0], IsStringEq("FOO")); + ASSERT_THAT(*v.listView()[0], IsStringEq("FOO")); } TEST_F(PrimOpTest, match5) { @@ -874,7 +880,8 @@ namespace nix { // ensure that the list is sorted const std::vector expected { "a", "x", "y", "z" }; - for (const auto [n, elem] : enumerate(v.listItems())) + auto listView = v.listView(); + for (const auto [n, elem] : enumerate(listView)) ASSERT_THAT(*elem, IsStringEq(expected[n])); } diff --git a/src/libexpr/attr-path.cc b/src/libexpr/attr-path.cc index 722b57bbf..111d04cf2 100644 --- a/src/libexpr/attr-path.cc +++ b/src/libexpr/attr-path.cc @@ -95,7 +95,7 @@ std::pair findAlongAttrPath(EvalState & state, const std::strin if (*attrIndex >= v->listSize()) throw AttrPathNotFound("list index %1% in selection path '%2%' is out of range", *attrIndex, attrPath); - v = v->listElems()[*attrIndex]; + v = v->listView()[*attrIndex]; pos = noPos; } diff --git a/src/libexpr/eval-cache.cc b/src/libexpr/eval-cache.cc index 0d732f59c..27d60d6ef 100644 --- a/src/libexpr/eval-cache.cc +++ b/src/libexpr/eval-cache.cc @@ -721,7 +721,7 @@ std::vector AttrCursor::getListOfStrings() std::vector res; - for (auto & elem : v.listItems()) + for (auto elem : v.listView()) res.push_back(std::string(root->state.forceStringNoCtx(*elem, noPos, "while evaluating an attribute for caching"))); if (root->db) diff --git a/src/libexpr/eval.cc b/src/libexpr/eval.cc index fba1e6665..1321e00a5 100644 --- a/src/libexpr/eval.cc +++ b/src/libexpr/eval.cc @@ -2007,9 +2007,10 @@ void EvalState::concatLists(Value & v, size_t nrLists, Value * const * lists, co auto list = buildList(len); auto out = list.elems; for (size_t n = 0, pos = 0; n < nrLists; ++n) { - auto l = lists[n]->listSize(); + auto listView = lists[n]->listView(); + auto l = listView.size(); if (l) - memcpy(out + pos, lists[n]->listElems(), l * sizeof(Value *)); + memcpy(out + pos, listView.data(), l * sizeof(Value *)); pos += l; } v.mkList(list); @@ -2174,7 +2175,7 @@ void EvalState::forceValueDeep(Value & v) } else if (v.isList()) { - for (auto v2 : v.listItems()) + for (auto v2 : v.listView()) recurse(*v2); } }; @@ -2411,7 +2412,8 @@ BackedStringView EvalState::coerceToString( if (v.isList()) { std::string result; - for (auto [n, v2] : enumerate(v.listItems())) { + auto listView = v.listView(); + for (auto [n, v2] : enumerate(listView)) { try { result += *coerceToString(pos, *v2, context, "while evaluating one element of the list", @@ -2666,7 +2668,7 @@ void EvalState::assertEqValues(Value & v1, Value & v2, const PosIdx pos, std::st } for (size_t n = 0; n < v1.listSize(); ++n) { try { - assertEqValues(*v1.listElems()[n], *v2.listElems()[n], pos, errorCtx); + assertEqValues(*v1.listView()[n], *v2.listView()[n], pos, errorCtx); } catch (Error & e) { e.addTrace(positions[pos], "while comparing list element %d", n); throw; @@ -2818,7 +2820,7 @@ bool EvalState::eqValues(Value & v1, Value & v2, const PosIdx pos, std::string_v case nList: if (v1.listSize() != v2.listSize()) return false; for (size_t n = 0; n < v1.listSize(); ++n) - if (!eqValues(*v1.listElems()[n], *v2.listElems()[n], pos, errorCtx)) return false; + if (!eqValues(*v1.listView()[n], *v2.listView()[n], pos, errorCtx)) return false; return true; case nAttrs: { diff --git a/src/libexpr/get-drvs.cc b/src/libexpr/get-drvs.cc index f15ad4d73..3c9ff9ff3 100644 --- a/src/libexpr/get-drvs.cc +++ b/src/libexpr/get-drvs.cc @@ -117,7 +117,7 @@ PackageInfo::Outputs PackageInfo::queryOutputs(bool withPaths, bool onlyOutputsT state->forceList(*i->value, i->pos, "while evaluating the 'outputs' attribute of a derivation"); /* For each output... */ - for (auto elem : i->value->listItems()) { + for (auto elem : i->value->listView()) { std::string output(state->forceStringNoCtx(*elem, i->pos, "while evaluating the name of an output of a derivation")); if (withPaths) { @@ -159,7 +159,7 @@ PackageInfo::Outputs PackageInfo::queryOutputs(bool withPaths, bool onlyOutputsT /* ^ this shows during `nix-env -i` right under the bad derivation */ if (!outTI->isList()) throw errMsg; Outputs result; - for (auto elem : outTI->listItems()) { + for (auto elem : outTI->listView()) { if (elem->type() != nString) throw errMsg; auto out = outputs.find(elem->c_str()); if (out == outputs.end()) throw errMsg; @@ -206,7 +206,7 @@ bool PackageInfo::checkMeta(Value & v) { state->forceValue(v, v.determinePos(noPos)); if (v.type() == nList) { - for (auto elem : v.listItems()) + for (auto elem : v.listView()) if (!checkMeta(*elem)) return false; return true; } @@ -400,7 +400,8 @@ static void getDerivations(EvalState & state, Value & vIn, } else if (v.type() == nList) { - for (auto [n, elem] : enumerate(v.listItems())) { + auto listView = v.listView(); + for (auto [n, elem] : enumerate(listView)) { std::string pathPrefix2 = addToPath(pathPrefix, fmt("%d", n)); if (getDerivation(state, *elem, pathPrefix2, drvs, done, ignoreAssertionFailures)) getDerivations(state, *elem, pathPrefix2, autoArgs, drvs, done, ignoreAssertionFailures); diff --git a/src/libexpr/include/nix/expr/value.hh b/src/libexpr/include/nix/expr/value.hh index 642459463..3f28aca01 100644 --- a/src/libexpr/include/nix/expr/value.hh +++ b/src/libexpr/include/nix/expr/value.hh @@ -4,6 +4,7 @@ #include #include #include +#include #include "nix/expr/eval-gc.hh" #include "nix/expr/value/context.hh" @@ -317,12 +318,11 @@ protected: #undef NIX_VALUE_STORAGE_DEFINE_FIELD }; - Payload payload; - private: InternalType internalType = tUninitialized; + Payload payload; -public: +protected: #define NIX_VALUE_STORAGE_GET_IMPL(K, FIELD_NAME, DISCRIMINATOR) \ void getStorage(K & val) const noexcept \ { \ @@ -351,6 +351,189 @@ public: } }; +/** + * View into a list of Value * that is itself immutable. + * + * Since not all representations of ValueStorage can provide + * a pointer to a const array of Value * this proxy class either + * stores the small list inline or points to the big list. + */ +class ListView +{ + using SpanType = std::span; + using SmallList = detail::ValueBase::SmallList; + using List = detail::ValueBase::List; + + std::variant raw; + +public: + ListView(SmallList list) + : raw(list) + { + } + + ListView(List list) + : raw(list) + { + } + + Value * const * data() const & noexcept + { + return std::visit( + overloaded{ + [](const SmallList & list) { return list.data(); }, [](const List & list) { return list.elems; }}, + raw); + } + + std::size_t size() const noexcept + { + return std::visit( + overloaded{ + [](const SmallList & list) -> std::size_t { return list.back() == nullptr ? 1 : 2; }, + [](const List & list) -> std::size_t { return list.size; }}, + raw); + } + + Value * operator[](std::size_t i) const noexcept + { + return data()[i]; + } + + SpanType span() const & + { + return SpanType(data(), size()); + } + + /* Ensure that no dangling views can be created accidentally, as that + would lead to hard to diagnose bugs that only affect small lists. */ + SpanType span() && = delete; + Value * const * data() && noexcept = delete; + + /** + * Random-access iterator that only allows iterating over a constant range + * of mutable Value pointers. + * + * @note Not a pointer to minimize potential misuses and implicitly relying + * on the iterator being a pointer. + **/ + class iterator + { + public: + using value_type = Value *; + using pointer = const value_type *; + using reference = const value_type &; + using difference_type = std::ptrdiff_t; + using iterator_category = std::random_access_iterator_tag; + + private: + pointer ptr = nullptr; + + friend class ListView; + + iterator(pointer ptr) + : ptr(ptr) + { + } + + public: + iterator() = default; + + reference operator*() const + { + return *ptr; + } + + const value_type * operator->() const + { + return ptr; + } + + reference operator[](difference_type diff) const + { + return ptr[diff]; + } + + iterator & operator++() + { + ++ptr; + return *this; + } + + iterator operator++(int) + { + pointer tmp = ptr; + ++*this; + return iterator(tmp); + } + + iterator & operator--() + { + --ptr; + return *this; + } + + iterator operator--(int) + { + pointer tmp = ptr; + --*this; + return iterator(tmp); + } + + iterator & operator+=(difference_type diff) + { + ptr += diff; + return *this; + } + + iterator operator+(difference_type diff) const + { + return iterator(ptr + diff); + } + + friend iterator operator+(difference_type diff, const iterator & rhs) + { + return iterator(diff + rhs.ptr); + } + + iterator & operator-=(difference_type diff) + { + ptr -= diff; + return *this; + } + + iterator operator-(difference_type diff) const + { + return iterator(ptr - diff); + } + + difference_type operator-(const iterator & rhs) const + { + return ptr - rhs.ptr; + } + + std::strong_ordering operator<=>(const iterator & rhs) const = default; + }; + + using const_iterator = iterator; + + iterator begin() const & + { + return data(); + } + + iterator end() const & + { + return data() + size(); + } + + /* Ensure that no dangling iterators can be created accidentally, as that + would lead to hard to diagnose bugs that only affect small lists. */ + iterator begin() && = delete; + iterator end() && = delete; +}; + +static_assert(std::random_access_iterator); + struct Value : public ValueStorage { friend std::string showType(const Value & v); @@ -564,15 +747,9 @@ public: return isa(); } - std::span listItems() const noexcept + ListView listView() const noexcept { - assert(isList()); - return std::span(listElems(), listSize()); - } - - Value * const * listElems() const noexcept - { - return isa() ? payload.smallList.data() : getStorage().elems; + return isa() ? ListView(getStorage()) : ListView(getStorage()); } size_t listSize() const noexcept diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 40ff250a9..f9f834a62 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -415,7 +415,7 @@ void prim_importNative(EvalState & state, const PosIdx pos, Value * * args, Valu void prim_exec(EvalState & state, const PosIdx pos, Value * * args, Value & v) { state.forceList(*args[0], pos, "while evaluating the first argument passed to builtins.exec"); - auto elems = args[0]->listElems(); + auto elems = args[0]->listView(); auto count = args[0]->listSize(); if (count == 0) state.error("at least one argument to 'exec' required").atPos(pos).debugThrow(); @@ -660,8 +660,8 @@ struct CompareValues return false; } else if (i == v1->listSize()) { return true; - } else if (!state.eqValues(*v1->listElems()[i], *v2->listElems()[i], pos, errorCtx)) { - return (*this)(v1->listElems()[i], v2->listElems()[i], "while comparing two list elements"); + } else if (!state.eqValues(*v1->listView()[i], *v2->listView()[i], pos, errorCtx)) { + return (*this)(v1->listView()[i], v2->listView()[i], "while comparing two list elements"); } } default: @@ -689,7 +689,7 @@ static void prim_genericClosure(EvalState & state, const PosIdx pos, Value * * a state.forceList(*startSet->value, noPos, "while evaluating the 'startSet' attribute passed as argument to builtins.genericClosure"); ValueList workSet; - for (auto elem : startSet->value->listItems()) + for (auto elem : startSet->value->listView()) workSet.push_back(elem); if (startSet->value->listSize() == 0) { @@ -727,7 +727,7 @@ static void prim_genericClosure(EvalState & state, const PosIdx pos, Value * * a state.forceList(newElements, noPos, "while evaluating the return value of the `operator` passed to builtins.genericClosure"); /* Add the values returned by the operator to the work set. */ - for (auto elem : newElements.listItems()) { + for (auto elem : newElements.listView()) { state.forceValue(*elem, noPos); // "while evaluating one one of the elements returned by the `operator` passed to builtins.genericClosure"); workSet.push_back(elem); } @@ -1367,7 +1367,7 @@ static void derivationStrictInternal( command-line arguments to the builder. */ else if (i->name == state.sArgs) { state.forceList(*i->value, pos, context_below); - for (auto elem : i->value->listItems()) { + for (auto elem : i->value->listView()) { auto s = state.coerceToString(pos, *elem, context, "while evaluating an element of the argument list", true).toOwned(); @@ -1399,7 +1399,7 @@ static void derivationStrictInternal( /* Require ‘outputs’ to be a list of strings. */ state.forceList(*i->value, pos, context_below); Strings ss; - for (auto elem : i->value->listItems()) + for (auto elem : i->value->listView()) ss.emplace_back(state.forceStringNoCtx(*elem, pos, context_below)); handleOutputs(ss); } @@ -1882,7 +1882,7 @@ static void prim_findFile(EvalState & state, const PosIdx pos, Value * * args, V LookupPath lookupPath; - for (auto v2 : args[0]->listItems()) { + for (auto v2 : args[0]->listView()) { state.forceAttrs(*v2, pos, "while evaluating an element of the list passed to builtins.findFile"); std::string prefix; @@ -2920,7 +2920,7 @@ static void prim_removeAttrs(EvalState & state, const PosIdx pos, Value * * args // 64: large enough to fit the attributes of a derivation boost::container::small_vector names; names.reserve(args[1]->listSize()); - for (auto elem : args[1]->listItems()) { + for (auto elem : args[1]->listView()) { state.forceStringNoCtx(*elem, pos, "while evaluating the values of the second argument passed to builtins.removeAttrs"); names.emplace_back(state.symbols.create(elem->string_view()), nullptr); } @@ -2963,11 +2963,12 @@ static void prim_listToAttrs(EvalState & state, const PosIdx pos, Value * * args state.forceList(*args[0], pos, "while evaluating the argument passed to builtins.listToAttrs"); // Step 1. Sort the name-value attrsets in place using the memory we allocate for the result - size_t listSize = args[0]->listSize(); + auto listView = args[0]->listView(); + size_t listSize = listView.size(); auto & bindings = *state.allocBindings(listSize); using ElemPtr = decltype(&bindings[0].value); - for (const auto & [n, v2] : enumerate(args[0]->listItems())) { + for (const auto & [n, v2] : enumerate(listView)) { state.forceAttrs(*v2, pos, "while evaluating an element of the list passed to builtins.listToAttrs"); auto j = state.getAttr(state.sName, v2->attrs(), "in a {name=...; value=...;} pair"); @@ -3122,7 +3123,7 @@ static void prim_catAttrs(EvalState & state, const PosIdx pos, Value * * args, V SmallValueVector res(args[1]->listSize()); size_t found = 0; - for (auto v2 : args[1]->listItems()) { + for (auto v2 : args[1]->listView()) { state.forceAttrs(*v2, pos, "while evaluating an element in the list passed as second argument to builtins.catAttrs"); if (auto i = v2->attrs()->get(attrName)) res[found++] = i->value; @@ -3247,7 +3248,7 @@ static void prim_zipAttrsWith(EvalState & state, const PosIdx pos, Value * * arg state.forceFunction(*args[0], pos, "while evaluating the first argument passed to builtins.zipAttrsWith"); state.forceList(*args[1], pos, "while evaluating the second argument passed to builtins.zipAttrsWith"); - const auto listItems = args[1]->listItems(); + const auto listItems = args[1]->listView(); for (auto & vElem : listItems) { state.forceAttrs(*vElem, noPos, "while evaluating a value of the list passed as second argument to builtins.zipAttrsWith"); @@ -3346,8 +3347,8 @@ static void prim_elemAt(EvalState & state, const PosIdx pos, Value * * args, Val n, args[0]->listSize() ).atPos(pos).debugThrow(); - state.forceValue(*args[0]->listElems()[n], pos); - v = *args[0]->listElems()[n]; + state.forceValue(*args[0]->listView()[n], pos); + v = *args[0]->listView()[n]; } static RegisterPrimOp primop_elemAt({ @@ -3368,8 +3369,8 @@ static void prim_head(EvalState & state, const PosIdx pos, Value * * args, Value state.error( "'builtins.head' called on an empty list" ).atPos(pos).debugThrow(); - state.forceValue(*args[0]->listElems()[0], pos); - v = *args[0]->listElems()[0]; + state.forceValue(*args[0]->listView()[0], pos); + v = *args[0]->listView()[0]; } static RegisterPrimOp primop_head({ @@ -3394,7 +3395,7 @@ static void prim_tail(EvalState & state, const PosIdx pos, Value * * args, Value auto list = state.buildList(args[0]->listSize() - 1); for (const auto & [n, v] : enumerate(list)) - v = args[0]->listElems()[n + 1]; + v = args[0]->listView()[n + 1]; v.mkList(list); } @@ -3429,7 +3430,7 @@ static void prim_map(EvalState & state, const PosIdx pos, Value * * args, Value auto list = state.buildList(args[1]->listSize()); for (const auto & [n, v] : enumerate(list)) (v = state.allocValue())->mkApp( - args[0], args[1]->listElems()[n]); + args[0], args[1]->listView()[n]); v.mkList(list); } @@ -3470,9 +3471,9 @@ static void prim_filter(EvalState & state, const PosIdx pos, Value * * args, Val bool same = true; for (size_t n = 0; n < len; ++n) { Value res; - state.callFunction(*args[0], *args[1]->listElems()[n], res, noPos); + state.callFunction(*args[0], *args[1]->listView()[n], res, noPos); if (state.forceBool(res, pos, "while evaluating the return value of the filtering function passed to builtins.filter")) - vs[k++] = args[1]->listElems()[n]; + vs[k++] = args[1]->listView()[n]; else same = false; } @@ -3501,7 +3502,7 @@ static void prim_elem(EvalState & state, const PosIdx pos, Value * * args, Value { bool res = false; state.forceList(*args[1], pos, "while evaluating the second argument passed to builtins.elem"); - for (auto elem : args[1]->listItems()) + for (auto elem : args[1]->listView()) if (state.eqValues(*args[0], *elem, pos, "while searching for the presence of the given element in the list")) { res = true; break; @@ -3523,7 +3524,8 @@ static RegisterPrimOp primop_elem({ static void prim_concatLists(EvalState & state, const PosIdx pos, Value * * args, Value & v) { state.forceList(*args[0], pos, "while evaluating the first argument passed to builtins.concatLists"); - state.concatLists(v, args[0]->listSize(), args[0]->listElems(), pos, "while evaluating a value of the list passed to builtins.concatLists"); + auto listView = args[0]->listView(); + state.concatLists(v, args[0]->listSize(), listView.data(), pos, "while evaluating a value of the list passed to builtins.concatLists"); } static RegisterPrimOp primop_concatLists({ @@ -3561,7 +3563,8 @@ static void prim_foldlStrict(EvalState & state, const PosIdx pos, Value * * args if (args[2]->listSize()) { Value * vCur = args[1]; - for (auto [n, elem] : enumerate(args[2]->listItems())) { + auto listView = args[2]->listView(); + for (auto [n, elem] : enumerate(listView)) { Value * vs []{vCur, elem}; vCur = n == args[2]->listSize() - 1 ? &v : state.allocValue(); state.callFunction(*args[0], vs, *vCur, pos); @@ -3603,7 +3606,7 @@ static void anyOrAll(bool any, EvalState & state, const PosIdx pos, Value * * ar : "while evaluating the return value of the function passed to builtins.all"; Value vTmp; - for (auto elem : args[1]->listItems()) { + for (auto elem : args[1]->listView()) { state.callFunction(*args[0], *elem, vTmp, pos); bool res = state.forceBool(vTmp, pos, errorCtx); if (res == any) { @@ -3701,7 +3704,7 @@ static void prim_sort(EvalState & state, const PosIdx pos, Value * * args, Value auto list = state.buildList(len); for (const auto & [n, v] : enumerate(list)) - state.forceValue(*(v = args[1]->listElems()[n]), pos); + state.forceValue(*(v = args[1]->listView()[n]), pos); auto comparator = [&](Value * a, Value * b) { /* Optimization: if the comparator is lessThan, bypass @@ -3787,7 +3790,7 @@ static void prim_partition(EvalState & state, const PosIdx pos, Value * * args, ValueVector right, wrong; for (size_t n = 0; n < len; ++n) { - auto vElem = args[1]->listElems()[n]; + auto vElem = args[1]->listView()[n]; state.forceValue(*vElem, pos); Value res; state.callFunction(*args[0], *vElem, res, pos); @@ -3844,7 +3847,7 @@ static void prim_groupBy(EvalState & state, const PosIdx pos, Value * * args, Va ValueVectorMap attrs; - for (auto vElem : args[1]->listItems()) { + for (auto vElem : args[1]->listView()) { Value res; state.callFunction(*args[0], *vElem, res, pos); auto name = state.forceStringNoCtx(res, pos, "while evaluating the return value of the grouping function passed to builtins.groupBy"); @@ -3900,7 +3903,7 @@ static void prim_concatMap(EvalState & state, const PosIdx pos, Value * * args, size_t len = 0; for (size_t n = 0; n < nrLists; ++n) { - Value * vElem = args[1]->listElems()[n]; + Value * vElem = args[1]->listView()[n]; state.callFunction(*args[0], *vElem, lists[n], pos); state.forceList(lists[n], lists[n].determinePos(args[0]->determinePos(pos)), "while evaluating the return value of the function passed to builtins.concatMap"); len += lists[n].listSize(); @@ -3909,9 +3912,10 @@ static void prim_concatMap(EvalState & state, const PosIdx pos, Value * * args, auto list = state.buildList(len); auto out = list.elems; for (size_t n = 0, pos = 0; n < nrLists; ++n) { - auto l = lists[n].listSize(); + auto listView = lists[n].listView(); + auto l = listView.size(); if (l) - memcpy(out + pos, lists[n].listElems(), l * sizeof(Value *)); + memcpy(out + pos, listView.data(), l * sizeof(Value *)); pos += l; } v.mkList(list); @@ -4587,7 +4591,7 @@ static void prim_concatStringsSep(EvalState & state, const PosIdx pos, Value * * res.reserve((args[1]->listSize() + 32) * sep.size()); bool first = true; - for (auto elem : args[1]->listItems()) { + for (auto elem : args[1]->listView()) { if (first) first = false; else res += sep; res += *state.coerceToString(pos, *elem, context, "while evaluating one element of the list of strings to concat passed to builtins.concatStringsSep"); } @@ -4617,11 +4621,11 @@ static void prim_replaceStrings(EvalState & state, const PosIdx pos, Value * * a std::vector from; from.reserve(args[0]->listSize()); - for (auto elem : args[0]->listItems()) + for (auto elem : args[0]->listView()) from.emplace_back(state.forceString(*elem, pos, "while evaluating one of the strings to replace passed to builtins.replaceStrings")); std::unordered_map cache; - auto to = args[1]->listItems(); + auto to = args[1]->listView(); NixStringContext context; auto s = state.forceString(*args[2], context, pos, "while evaluating the third argument passed to builtins.replaceStrings"); diff --git a/src/libexpr/primops/context.cc b/src/libexpr/primops/context.cc index 496678884..56962d6a8 100644 --- a/src/libexpr/primops/context.cc +++ b/src/libexpr/primops/context.cc @@ -312,7 +312,7 @@ static void prim_appendContext(EvalState & state, const PosIdx pos, Value * * ar name ).atPos(i.pos).debugThrow(); } - for (auto elem : attr->value->listItems()) { + for (auto elem : attr->value->listView()) { auto outputName = state.forceStringNoCtx(*elem, attr->pos, "while evaluating an output name within a string context"); context.emplace(NixStringContextElem::Built { .drvPath = makeConstantStorePathRef(namePath), diff --git a/src/libexpr/print-ambiguous.cc b/src/libexpr/print-ambiguous.cc index 0646783c2..2a0b009eb 100644 --- a/src/libexpr/print-ambiguous.cc +++ b/src/libexpr/print-ambiguous.cc @@ -50,11 +50,13 @@ void printAmbiguous( break; } case nList: - if (seen && v.listSize() && !seen->insert(v.listElems()).second) + /* Use pointer to the Value instead of pointer to the elements, because + that would need to explicitly handle the case of SmallList. */ + if (seen && v.listSize() && !seen->insert(&v).second) str << "«repeated»"; else { str << "[ "; - for (auto v2 : v.listItems()) { + for (auto v2 : v.listView()) { if (v2) printAmbiguous(*v2, symbols, str, seen, depth - 1); else diff --git a/src/libexpr/print.cc b/src/libexpr/print.cc index f0e0eed2d..1f0c592c1 100644 --- a/src/libexpr/print.cc +++ b/src/libexpr/print.cc @@ -415,8 +415,8 @@ private: if (depth < options.maxDepth) { increaseIndent(); output << "["; - auto listItems = v.listItems(); - auto prettyPrint = shouldPrettyPrintList(listItems); + auto listItems = v.listView(); + auto prettyPrint = shouldPrettyPrintList(listItems.span()); size_t currentListItemsPrinted = 0; diff --git a/src/libexpr/value-to-json.cc b/src/libexpr/value-to-json.cc index 4c0667d9e..a9b51afa0 100644 --- a/src/libexpr/value-to-json.cc +++ b/src/libexpr/value-to-json.cc @@ -73,7 +73,7 @@ json printValueAsJSON(EvalState & state, bool strict, case nList: { out = json::array(); int i = 0; - for (auto elem : v.listItems()) { + for (auto elem : v.listView()) { try { out.push_back(printValueAsJSON(state, strict, *elem, pos, context, copyToStore)); } catch (Error & e) { diff --git a/src/libexpr/value-to-xml.cc b/src/libexpr/value-to-xml.cc index 54ff06f9e..235ef2627 100644 --- a/src/libexpr/value-to-xml.cc +++ b/src/libexpr/value-to-xml.cc @@ -114,7 +114,7 @@ static void printValueAsXML(EvalState & state, bool strict, bool location, case nList: { XMLOpenElement _(doc, "list"); - for (auto v2 : v.listItems()) + for (auto v2 : v.listView()) printValueAsXML(state, strict, location, *v2, doc, context, drvsSeen, pos); break; } diff --git a/src/libflake/flake.cc b/src/libflake/flake.cc index e59649fec..322abaa4a 100644 --- a/src/libflake/flake.cc +++ b/src/libflake/flake.cc @@ -289,7 +289,7 @@ static Flake readFlake( Explicit { state.forceBool(*setting.value, setting.pos, "") }); else if (setting.value->type() == nList) { std::vector ss; - for (auto elem : setting.value->listItems()) { + for (auto elem : setting.value->listView()) { if (elem->type() != nString) state.error("list element in flake configuration setting '%s' is %s while a string is expected", state.symbols[setting.name], showType(*setting.value)).debugThrow(); diff --git a/src/nix-env/nix-env.cc b/src/nix-env/nix-env.cc index 25ff39e38..fd48e67dc 100644 --- a/src/nix-env/nix-env.cc +++ b/src/nix-env/nix-env.cc @@ -1265,7 +1265,7 @@ static void opQuery(Globals & globals, Strings opFlags, Strings opArgs) } else if (v->type() == nList) { attrs2["type"] = "strings"; XMLOpenElement m(xml, "meta", attrs2); - for (auto elem : v->listItems()) { + for (auto elem : v->listView()) { if (elem->type() != nString) continue; XMLAttrs attrs3; attrs3["value"] = elem->c_str(); diff --git a/src/nix/prefetch.cc b/src/nix/prefetch.cc index 9e5e3c09a..96dcdb4e8 100644 --- a/src/nix/prefetch.cc +++ b/src/nix/prefetch.cc @@ -46,7 +46,7 @@ std::string resolveMirrorUrl(EvalState & state, const std::string & url) if (mirrorList->value->listSize() < 1) throw Error("mirror URL '%s' did not expand to anything", url); - std::string mirror(state.forceString(*mirrorList->value->listElems()[0], noPos, "while evaluating the first available mirror")); + std::string mirror(state.forceString(*mirrorList->value->listView()[0], noPos, "while evaluating the first available mirror")); return mirror + (hasSuffix(mirror, "/") ? "" : "/") + s.substr(p + 1); } @@ -221,7 +221,7 @@ static int main_nix_prefetch_url(int argc, char * * argv) state->forceList(*attr->value, noPos, "while evaluating the urls to prefetch"); if (attr->value->listSize() < 1) throw Error("'urls' list is empty"); - url = state->forceString(*attr->value->listElems()[0], noPos, "while evaluating the first url from the urls list"); + url = state->forceString(*attr->value->listView()[0], noPos, "while evaluating the first url from the urls list"); /* Extract the hash mode. */ auto attr2 = v.attrs()->get(state->symbols.create("outputHashMode")); From 5a20a48f1367234b52dca9569955f825a0d626b0 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Mon, 30 Jun 2025 22:03:07 +0300 Subject: [PATCH 6/6] libexpr: Reduce the size of Value down to 16 bytes This shaves off a very significand amount of memory used for evaluation as well as reduces the GC-managed heap. Previously the union discriminator (InternalType) was stored as a separate field in the Value, which takes up whole 8 bytes due to padding needed for member alignment. This effectively wasted 7 whole bytes of memory. Instead of doing that InternalType is instead packed into pointer alignment niches. As it turns out, there's more than enough unused bits there for the bit packing to be effective. See the doxygen comment in the ValueStorage specialization for more details. This does not add any performance overhead, even though we now consistently assert the InternalType in all getters. This can also be made atomic with a double width compare-and-swap instruction on x86_64 (CMPXCHG16B instruction) for parallel evaluation. --- src/libexpr/eval-gc.cc | 8 + src/libexpr/include/nix/expr/value.hh | 317 +++++++++++++++++++++++++- 2 files changed, 313 insertions(+), 12 deletions(-) diff --git a/src/libexpr/eval-gc.cc b/src/libexpr/eval-gc.cc index bec668001..5a4ecf035 100644 --- a/src/libexpr/eval-gc.cc +++ b/src/libexpr/eval-gc.cc @@ -4,6 +4,7 @@ #include "nix/util/config-global.hh" #include "nix/util/serialise.hh" #include "nix/expr/eval-gc.hh" +#include "nix/expr/value.hh" #include "expr-config-private.hh" @@ -52,6 +53,13 @@ static inline void initGCReal() GC_INIT(); + /* Register valid displacements in case we are using alignment niches + for storing the type information. This way tagged pointers are considered + to be valid, even when they are not aligned. */ + if constexpr (detail::useBitPackedValueStorage) + for (std::size_t i = 1; i < sizeof(std::uintptr_t); ++i) + GC_register_displacement(i); + GC_set_oom_fn(oomHandler); /* Set the initial heap size to something fairly big (25% of diff --git a/src/libexpr/include/nix/expr/value.hh b/src/libexpr/include/nix/expr/value.hh index 3f28aca01..098effa29 100644 --- a/src/libexpr/include/nix/expr/value.hh +++ b/src/libexpr/include/nix/expr/value.hh @@ -19,23 +19,35 @@ namespace nix { struct Value; class BindingsBuilder; +/** + * Internal type discriminator, which is more detailed than `ValueType`, as + * it specifies the exact representation used (for types that have multiple + * possible representations). + * + * @warning The ordering is very significant. See ValueStorage::getInternalType() for details + * about how this is mapped into the alignment bits to save significant memory. + * This also restricts the number of internal types represented with distinct memory layouts. + */ typedef enum { tUninitialized = 0, + /* layout: Single/zero field payload */ tInt = 1, tBool, + tNull, + tFloat, + tExternal, + tPrimOp, + tAttrs, + /* layout: Pair of pointers payload */ + tListSmall, + tPrimOpApp, + tApp, + tThunk, + tLambda, + /* layout: Single untaggable field */ + tListN, tString, tPath, - tNull, - tAttrs, - tListSmall, - tListN, - tThunk, - tApp, - tLambda, - tPrimOp, - tPrimOpApp, - tExternal, - tFloat } InternalType; /** @@ -307,7 +319,7 @@ inline constexpr InternalType payloadTypeToInternalType = PayloadTypeToInternalT * All specializations of this type need to implement getStorage, setStorage and * getInternalType methods. */ -template +template class ValueStorage : public detail::ValueBase { protected: @@ -351,6 +363,287 @@ protected: } }; +namespace detail { + +/* Whether to use a specialization of ValueStorage that does bitpacking into + alignment niches. */ +template +inline constexpr bool useBitPackedValueStorage = (ptrSize == 8) && (__STDCPP_DEFAULT_NEW_ALIGNMENT__ >= 8); + +} // namespace detail + +/** + * Value storage that is optimized for 64 bit systems. + * Packs discriminator bits into the pointer alignment niches. + */ +template +class ValueStorage>> : public detail::ValueBase +{ + /* Needs a dependent type name in order for member functions (and + * potentially ill-formed bit casts) to be SFINAE'd out. + * + * Otherwise some member functions could possibly be instantiated for 32 bit + * systems and fail due to an unsatisfied constraint. + */ + template + struct PackedPointerTypeStruct + { + using type = std::uint64_t; + }; + + using PackedPointer = typename PackedPointerTypeStruct::type; + using Payload = std::array; + Payload payload = {}; + + static constexpr int discriminatorBits = 3; + static constexpr PackedPointer discriminatorMask = (PackedPointer(1) << discriminatorBits) - 1; + + /** + * The value is stored as a pair of 8-byte double words. All pointers are assumed + * to be 8-byte aligned. This gives us at most 6 bits of discriminator bits + * of free storage. In some cases when one double word can't be tagged the whole + * discriminator is stored in the first double word. + * + * The layout of discriminator bits is determined by the 3 bits of PrimaryDiscriminator, + * which are always stored in the lower 3 bits of the first dword of the payload. + * The memory layout has 3 types depending on the PrimaryDiscriminator value. + * + * PrimaryDiscriminator::pdSingleDWord - Only the second dword carries the data. + * That leaves the first 8 bytes free for storing the InternalType in the upper + * bits. + * + * PrimaryDiscriminator::pdListN - pdPath - Only has 3 available padding bits + * because: + * - tListN needs a size, whose lower bits we can't borrow. + * - tString and tPath have C-string fields, which don't necessarily need to + * be aligned. + * + * In this case we reserve their discriminators directly in the PrimaryDiscriminator + * bits stored in payload[0]. + * + * PrimaryDiscriminator::pdPairOfPointers - Payloads that consist of a pair of pointers. + * In this case the 3 lower bits of payload[1] can be tagged. + * + * The primary discriminator with value 0 is reserved for uninitialized Values, + * which are useful for diagnostics in C bindings. + */ + enum PrimaryDiscriminator : int { + pdUninitialized = 0, + pdSingleDWord, //< layout: Single/zero field payload + /* The order of these enumations must be the same as in InternalType. */ + pdListN, //< layout: Single untaggable field. + pdString, + pdPath, + pdPairOfPointers, //< layout: Pair of pointers payload + }; + + template + requires std::is_pointer_v + static T untagPointer(PackedPointer val) noexcept + { + return std::bit_cast(val & ~discriminatorMask); + } + + PrimaryDiscriminator getPrimaryDiscriminator() const noexcept + { + return static_cast(payload[0] & discriminatorMask); + } + + static void assertAligned(PackedPointer val) noexcept + { + assert((val & discriminatorMask) == 0 && "Pointer is not 8 bytes aligned"); + } + + template + void setSingleDWordPayload(PackedPointer untaggedVal) noexcept + { + /* There's plenty of free upper bits in the first dword, which is + used only for the discriminator. */ + payload[0] = static_cast(pdSingleDWord) | (static_cast(type) << discriminatorBits); + payload[1] = untaggedVal; + } + + template + void setUntaggablePayload(T * firstPtrField, U untaggableField) noexcept + { + static_assert(discriminator >= pdListN && discriminator <= pdPath); + auto firstFieldPayload = std::bit_cast(firstPtrField); + assertAligned(firstFieldPayload); + payload[0] = static_cast(discriminator) | firstFieldPayload; + payload[1] = std::bit_cast(untaggableField); + } + + template + void setPairOfPointersPayload(T * firstPtrField, U * secondPtrField) noexcept + { + static_assert(type >= tListSmall && type <= tLambda); + { + auto firstFieldPayload = std::bit_cast(firstPtrField); + assertAligned(firstFieldPayload); + payload[0] = static_cast(pdPairOfPointers) | firstFieldPayload; + } + { + auto secondFieldPayload = std::bit_cast(secondPtrField); + assertAligned(secondFieldPayload); + payload[1] = (type - tListSmall) | secondFieldPayload; + } + } + + template + requires std::is_pointer_v && std::is_pointer_v + void getPairOfPointersPayload(T & firstPtrField, U & secondPtrField) const noexcept + { + firstPtrField = untagPointer(payload[0]); + secondPtrField = untagPointer(payload[1]); + } + +protected: + /** Get internal type currently occupying the storage. */ + InternalType getInternalType() const noexcept + { + switch (auto pd = getPrimaryDiscriminator()) { + case pdUninitialized: + /* Discriminator value of zero is used to distinguish uninitialized values. */ + return tUninitialized; + case pdSingleDWord: + /* Payloads that only use up a single double word store the InternalType + in the upper bits of the first double word. */ + return InternalType(payload[0] >> discriminatorBits); + /* The order must match that of the enumerations defined in InternalType. */ + case pdListN: + case pdString: + case pdPath: + return static_cast(tListN + (pd - pdListN)); + case pdPairOfPointers: + return static_cast(tListSmall + (payload[1] & discriminatorMask)); + [[unlikely]] default: + unreachable(); + } + } + +#define NIX_VALUE_STORAGE_DEF_PAIR_OF_PTRS(TYPE, MEMBER_A, MEMBER_B) \ + \ + void getStorage(TYPE & val) const noexcept \ + { \ + getPairOfPointersPayload(val MEMBER_A, val MEMBER_B); \ + } \ + \ + void setStorage(TYPE val) noexcept \ + { \ + setPairOfPointersPayload>(val MEMBER_A, val MEMBER_B); \ + } + + NIX_VALUE_STORAGE_DEF_PAIR_OF_PTRS(SmallList, [0], [1]) + NIX_VALUE_STORAGE_DEF_PAIR_OF_PTRS(PrimOpApplicationThunk, .left, .right) + NIX_VALUE_STORAGE_DEF_PAIR_OF_PTRS(FunctionApplicationThunk, .left, .right) + NIX_VALUE_STORAGE_DEF_PAIR_OF_PTRS(ClosureThunk, .env, .expr) + NIX_VALUE_STORAGE_DEF_PAIR_OF_PTRS(Lambda, .env, .fun) + +#undef NIX_VALUE_STORAGE_DEF_PAIR_OF_PTRS + + void getStorage(NixInt & integer) const noexcept + { + /* PackedPointerType -> int64_t here is well-formed, since the standard requires + this conversion to follow 2's complement rules. This is just a no-op. */ + integer = NixInt(payload[1]); + } + + void getStorage(bool & boolean) const noexcept + { + boolean = payload[1]; + } + + void getStorage(Null & null) const noexcept {} + + void getStorage(NixFloat & fpoint) const noexcept + { + fpoint = std::bit_cast(payload[1]); + } + + void getStorage(ExternalValueBase *& external) const noexcept + { + external = std::bit_cast(payload[1]); + } + + void getStorage(PrimOp *& primOp) const noexcept + { + primOp = std::bit_cast(payload[1]); + } + + void getStorage(Bindings *& attrs) const noexcept + { + attrs = std::bit_cast(payload[1]); + } + + void getStorage(List & list) const noexcept + { + list.elems = untagPointer(payload[0]); + list.size = payload[1]; + } + + void getStorage(StringWithContext & string) const noexcept + { + string.context = untagPointer(payload[0]); + string.c_str = std::bit_cast(payload[1]); + } + + void getStorage(Path & path) const noexcept + { + path.accessor = untagPointer(payload[0]); + path.path = std::bit_cast(payload[1]); + } + + void setStorage(NixInt integer) noexcept + { + setSingleDWordPayload(integer.value); + } + + void setStorage(bool boolean) noexcept + { + setSingleDWordPayload(boolean); + } + + void setStorage(Null path) noexcept + { + setSingleDWordPayload(0); + } + + void setStorage(NixFloat fpoint) noexcept + { + setSingleDWordPayload(std::bit_cast(fpoint)); + } + + void setStorage(ExternalValueBase * external) noexcept + { + setSingleDWordPayload(std::bit_cast(external)); + } + + void setStorage(PrimOp * primOp) noexcept + { + setSingleDWordPayload(std::bit_cast(primOp)); + } + + void setStorage(Bindings * bindings) noexcept + { + setSingleDWordPayload(std::bit_cast(bindings)); + } + + void setStorage(List list) noexcept + { + setUntaggablePayload(list.elems, list.size); + } + + void setStorage(StringWithContext string) noexcept + { + setUntaggablePayload(string.context, string.c_str); + } + + void setStorage(Path path) noexcept + { + setUntaggablePayload(path.accessor, path.path); + } +}; + /** * View into a list of Value * that is itself immutable. *