mirror of
https://github.com/NixOS/nix
synced 2025-06-24 22:11:15 +02:00
Merge pull request #13258 from NaN-git/opt-symbol-table
Optimize symbol table
This commit is contained in:
commit
102259898c
6 changed files with 212 additions and 65 deletions
|
@ -2,6 +2,7 @@
|
|||
#include "nix/expr/eval-settings.hh"
|
||||
#include "nix/expr/primops.hh"
|
||||
#include "nix/expr/print-options.hh"
|
||||
#include "nix/expr/symbol-table.hh"
|
||||
#include "nix/util/exit.hh"
|
||||
#include "nix/util/types.hh"
|
||||
#include "nix/util/util.hh"
|
||||
|
@ -89,6 +90,11 @@ std::string printValue(EvalState & state, Value & v)
|
|||
return out.str();
|
||||
}
|
||||
|
||||
Value * Value::toPtr(SymbolStr str) noexcept
|
||||
{
|
||||
return const_cast<Value *>(str.valuePtr());
|
||||
}
|
||||
|
||||
void Value::print(EvalState & state, std::ostream & str, PrintOptions options)
|
||||
{
|
||||
printValue(state, str, *this, options);
|
||||
|
@ -3013,7 +3019,7 @@ void EvalState::printStatistics()
|
|||
// XXX: overrides earlier assignment
|
||||
topObj["symbols"] = json::array();
|
||||
auto &list = topObj["symbols"];
|
||||
symbols.dump([&](const std::string & s) { list.emplace_back(s); });
|
||||
symbols.dump([&](std::string_view s) { list.emplace_back(s); });
|
||||
}
|
||||
if (outPath == "-") {
|
||||
std::cerr << topObj.dump(2) << std::endl;
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "nix/expr/value.hh"
|
||||
#include "nix/expr/symbol-table.hh"
|
||||
|
||||
namespace nix {
|
||||
|
||||
|
|
|
@ -1,51 +1,35 @@
|
|||
#pragma once
|
||||
///@file
|
||||
|
||||
#include <list>
|
||||
#include <map>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "nix/util/types.hh"
|
||||
#include <memory_resource>
|
||||
#include "nix/expr/value.hh"
|
||||
#include "nix/util/chunked-vector.hh"
|
||||
#include "nix/util/error.hh"
|
||||
|
||||
#include <boost/version.hpp>
|
||||
#define USE_FLAT_SYMBOL_SET (BOOST_VERSION >= 108100)
|
||||
#if USE_FLAT_SYMBOL_SET
|
||||
# include <boost/unordered/unordered_flat_set.hpp>
|
||||
#else
|
||||
# include <boost/unordered/unordered_set.hpp>
|
||||
#endif
|
||||
|
||||
namespace nix {
|
||||
|
||||
/**
|
||||
* This class mainly exists to give us an operator<< for ostreams. We could also
|
||||
* return plain strings from SymbolTable, but then we'd have to wrap every
|
||||
* instance of a symbol that is fmt()ed, which is inconvenient and error-prone.
|
||||
*/
|
||||
class SymbolStr
|
||||
class SymbolValue : protected Value
|
||||
{
|
||||
friend class SymbolStr;
|
||||
friend class SymbolTable;
|
||||
|
||||
private:
|
||||
const std::string * s;
|
||||
uint32_t size_;
|
||||
uint32_t idx;
|
||||
|
||||
explicit SymbolStr(const std::string & symbol): s(&symbol) {}
|
||||
SymbolValue() = default;
|
||||
|
||||
public:
|
||||
bool operator == (std::string_view s2) const
|
||||
operator std::string_view() const noexcept
|
||||
{
|
||||
return *s == s2;
|
||||
}
|
||||
|
||||
const char * c_str() const
|
||||
{
|
||||
return s->c_str();
|
||||
}
|
||||
|
||||
operator const std::string_view () const
|
||||
{
|
||||
return *s;
|
||||
}
|
||||
|
||||
friend std::ostream & operator <<(std::ostream & os, const SymbolStr & symbol);
|
||||
|
||||
bool empty() const
|
||||
{
|
||||
return s->empty();
|
||||
return {payload.string.c_str, size_};
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -56,24 +40,161 @@ public:
|
|||
*/
|
||||
class Symbol
|
||||
{
|
||||
friend class SymbolStr;
|
||||
friend class SymbolTable;
|
||||
|
||||
private:
|
||||
uint32_t id;
|
||||
|
||||
explicit Symbol(uint32_t id): id(id) {}
|
||||
explicit Symbol(uint32_t id) noexcept : id(id) {}
|
||||
|
||||
public:
|
||||
Symbol() : id(0) {}
|
||||
Symbol() noexcept : id(0) {}
|
||||
|
||||
explicit operator bool() const { return id > 0; }
|
||||
[[gnu::always_inline]]
|
||||
explicit operator bool() const noexcept { return id > 0; }
|
||||
|
||||
auto operator<=>(const Symbol other) const { return id <=> other.id; }
|
||||
bool operator==(const Symbol other) const { return id == other.id; }
|
||||
auto operator<=>(const Symbol other) const noexcept { return id <=> other.id; }
|
||||
bool operator==(const Symbol other) const noexcept { return id == other.id; }
|
||||
|
||||
friend class std::hash<Symbol>;
|
||||
};
|
||||
|
||||
/**
|
||||
* This class mainly exists to give us an operator<< for ostreams. We could also
|
||||
* return plain strings from SymbolTable, but then we'd have to wrap every
|
||||
* instance of a symbol that is fmt()ed, which is inconvenient and error-prone.
|
||||
*/
|
||||
class SymbolStr
|
||||
{
|
||||
friend class SymbolTable;
|
||||
|
||||
constexpr static size_t chunkSize{8192};
|
||||
using SymbolValueStore = ChunkedVector<SymbolValue, chunkSize>;
|
||||
|
||||
const SymbolValue * s;
|
||||
|
||||
struct Key
|
||||
{
|
||||
using HashType = boost::hash<std::string_view>;
|
||||
|
||||
SymbolValueStore & store;
|
||||
std::string_view s;
|
||||
std::size_t hash;
|
||||
std::pmr::polymorphic_allocator<char> & alloc;
|
||||
|
||||
Key(SymbolValueStore & store, std::string_view s, std::pmr::polymorphic_allocator<char> & stringAlloc)
|
||||
: store(store)
|
||||
, s(s)
|
||||
, hash(HashType{}(s))
|
||||
, alloc(stringAlloc) {}
|
||||
};
|
||||
|
||||
public:
|
||||
SymbolStr(const SymbolValue & s) noexcept : s(&s) {}
|
||||
|
||||
SymbolStr(const Key & key)
|
||||
{
|
||||
auto size = key.s.size();
|
||||
if (size >= std::numeric_limits<uint32_t>::max()) {
|
||||
throw Error("Size of symbol exceeds 4GiB and cannot be stored");
|
||||
}
|
||||
// for multi-threaded implementations: lock store and allocator here
|
||||
const auto & [v, idx] = key.store.add(SymbolValue{});
|
||||
if (size == 0) {
|
||||
v.mkString("", nullptr);
|
||||
} else {
|
||||
auto s = key.alloc.allocate(size + 1);
|
||||
memcpy(s, key.s.data(), size);
|
||||
s[size] = '\0';
|
||||
v.mkString(s, nullptr);
|
||||
}
|
||||
v.size_ = size;
|
||||
v.idx = idx;
|
||||
this->s = &v;
|
||||
}
|
||||
|
||||
bool operator == (std::string_view s2) const noexcept
|
||||
{
|
||||
return *s == s2;
|
||||
}
|
||||
|
||||
[[gnu::always_inline]]
|
||||
const char * c_str() const noexcept
|
||||
{
|
||||
return s->payload.string.c_str;
|
||||
}
|
||||
|
||||
[[gnu::always_inline]]
|
||||
operator std::string_view () const noexcept
|
||||
{
|
||||
return *s;
|
||||
}
|
||||
|
||||
friend std::ostream & operator <<(std::ostream & os, const SymbolStr & symbol);
|
||||
|
||||
[[gnu::always_inline]]
|
||||
bool empty() const noexcept
|
||||
{
|
||||
return s->size_ == 0;
|
||||
}
|
||||
|
||||
[[gnu::always_inline]]
|
||||
size_t size() const noexcept
|
||||
{
|
||||
return s->size_;
|
||||
}
|
||||
|
||||
[[gnu::always_inline]]
|
||||
const Value * valuePtr() const noexcept
|
||||
{
|
||||
return s;
|
||||
}
|
||||
|
||||
explicit operator Symbol() const noexcept
|
||||
{
|
||||
return Symbol{s->idx + 1};
|
||||
}
|
||||
|
||||
struct Hash
|
||||
{
|
||||
using is_transparent = void;
|
||||
using is_avalanching = std::true_type;
|
||||
|
||||
std::size_t operator()(SymbolStr str) const
|
||||
{
|
||||
return Key::HashType{}(*str.s);
|
||||
}
|
||||
|
||||
std::size_t operator()(const Key & key) const noexcept
|
||||
{
|
||||
return key.hash;
|
||||
}
|
||||
};
|
||||
|
||||
struct Equal
|
||||
{
|
||||
using is_transparent = void;
|
||||
|
||||
bool operator()(SymbolStr a, SymbolStr b) const noexcept
|
||||
{
|
||||
// strings are unique, so that a pointer comparison is OK
|
||||
return a.s == b.s;
|
||||
}
|
||||
|
||||
bool operator()(SymbolStr a, const Key & b) const noexcept
|
||||
{
|
||||
return a == b.s;
|
||||
}
|
||||
|
||||
[[gnu::always_inline]]
|
||||
bool operator()(const Key & a, SymbolStr b) const noexcept
|
||||
{
|
||||
return operator()(b, a);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* Symbol table used by the parser and evaluator to represent and look
|
||||
* up identifiers and attributes efficiently.
|
||||
|
@ -82,29 +203,46 @@ class SymbolTable
|
|||
{
|
||||
private:
|
||||
/**
|
||||
* Map from string view (backed by ChunkedVector) -> offset into the store.
|
||||
* SymbolTable is an append only data structure.
|
||||
* During its lifetime the monotonic buffer holds all strings and nodes, if the symbol set is node based.
|
||||
*/
|
||||
std::pmr::monotonic_buffer_resource buffer;
|
||||
std::pmr::polymorphic_allocator<char> stringAlloc{&buffer};
|
||||
SymbolStr::SymbolValueStore store{16};
|
||||
|
||||
/**
|
||||
* Transparent lookup of string view for a pointer to a ChunkedVector entry -> return offset into the store.
|
||||
* ChunkedVector references are never invalidated.
|
||||
*/
|
||||
std::unordered_map<std::string_view, uint32_t> symbols;
|
||||
ChunkedVector<std::string, 8192> store{16};
|
||||
#if USE_FLAT_SYMBOL_SET
|
||||
boost::unordered_flat_set<SymbolStr, SymbolStr::Hash, SymbolStr::Equal> symbols{SymbolStr::chunkSize};
|
||||
#else
|
||||
using SymbolValueAlloc = std::pmr::polymorphic_allocator<SymbolStr>;
|
||||
boost::unordered_set<SymbolStr, SymbolStr::Hash, SymbolStr::Equal, SymbolValueAlloc> symbols{SymbolStr::chunkSize, {&buffer}};
|
||||
#endif
|
||||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Converts a string into a symbol.
|
||||
*/
|
||||
Symbol create(std::string_view s)
|
||||
{
|
||||
Symbol create(std::string_view s) {
|
||||
// Most symbols are looked up more than once, so we trade off insertion performance
|
||||
// for lookup performance.
|
||||
// FIXME: make this thread-safe.
|
||||
auto it = symbols.find(s);
|
||||
if (it != symbols.end())
|
||||
return Symbol(it->second + 1);
|
||||
return [&]<typename T>(T && key) -> Symbol {
|
||||
if constexpr (requires { symbols.insert<T>(key); }) {
|
||||
auto [it, _] = symbols.insert<T>(key);
|
||||
return Symbol(*it);
|
||||
} else {
|
||||
auto it = symbols.find<T>(key);
|
||||
if (it != symbols.end())
|
||||
return Symbol(*it);
|
||||
|
||||
const auto & [rawSym, idx] = store.add(s);
|
||||
symbols.emplace(rawSym, idx);
|
||||
return Symbol(idx + 1);
|
||||
it = symbols.emplace(key).first;
|
||||
return Symbol(*it);
|
||||
}
|
||||
}(SymbolStr::Key{store, s, stringAlloc});
|
||||
}
|
||||
|
||||
std::vector<SymbolStr> resolve(const std::vector<Symbol> & symbols) const
|
||||
|
@ -118,12 +256,14 @@ public:
|
|||
|
||||
SymbolStr operator[](Symbol s) const
|
||||
{
|
||||
if (s.id == 0 || s.id > store.size())
|
||||
uint32_t idx = s.id - uint32_t(1);
|
||||
if (idx >= store.size())
|
||||
unreachable();
|
||||
return SymbolStr(store[s.id - 1]);
|
||||
return store[idx];
|
||||
}
|
||||
|
||||
size_t size() const
|
||||
[[gnu::always_inline]]
|
||||
size_t size() const noexcept
|
||||
{
|
||||
return store.size();
|
||||
}
|
||||
|
@ -147,3 +287,5 @@ struct std::hash<nix::Symbol>
|
|||
return std::hash<decltype(s.id)>{}(s.id);
|
||||
}
|
||||
};
|
||||
|
||||
#undef USE_FLAT_SYMBOL_SET
|
||||
|
|
|
@ -5,7 +5,6 @@
|
|||
#include <span>
|
||||
|
||||
#include "nix/expr/eval-gc.hh"
|
||||
#include "nix/expr/symbol-table.hh"
|
||||
#include "nix/expr/value/context.hh"
|
||||
#include "nix/util/source-path.hh"
|
||||
#include "nix/expr/print-options.hh"
|
||||
|
@ -65,6 +64,7 @@ struct ExprLambda;
|
|||
struct ExprBlackHole;
|
||||
struct PrimOp;
|
||||
class Symbol;
|
||||
class SymbolStr;
|
||||
class PosIdx;
|
||||
struct Pos;
|
||||
class StorePath;
|
||||
|
@ -172,6 +172,11 @@ private:
|
|||
|
||||
public:
|
||||
|
||||
/**
|
||||
* Never modify the backing `Value` object!
|
||||
*/
|
||||
static Value * toPtr(SymbolStr str) noexcept;
|
||||
|
||||
void print(EvalState &state, std::ostream &str, PrintOptions options = PrintOptions {});
|
||||
|
||||
// Functions needed to distinguish the type
|
||||
|
@ -331,11 +336,6 @@ public:
|
|||
|
||||
void mkStringMove(const char * s, const NixStringContext & context);
|
||||
|
||||
inline void mkString(const SymbolStr & s)
|
||||
{
|
||||
mkString(s.c_str());
|
||||
}
|
||||
|
||||
void mkPath(const SourcePath & path);
|
||||
void mkPath(std::string_view path);
|
||||
|
||||
|
|
|
@ -606,7 +606,7 @@ void ExprLambda::setDocComment(DocComment docComment) {
|
|||
size_t SymbolTable::totalSize() const
|
||||
{
|
||||
size_t n = 0;
|
||||
dump([&] (const std::string & s) { n += s.size(); });
|
||||
dump([&] (SymbolStr s) { n += s.size(); });
|
||||
return n;
|
||||
}
|
||||
|
||||
|
|
|
@ -2720,7 +2720,7 @@ static void prim_attrNames(EvalState & state, const PosIdx pos, Value * * args,
|
|||
auto list = state.buildList(args[0]->attrs()->size());
|
||||
|
||||
for (const auto & [n, i] : enumerate(*args[0]->attrs()))
|
||||
(list[n] = state.allocValue())->mkString(state.symbols[i.name]);
|
||||
list[n] = Value::toPtr(state.symbols[i.name]);
|
||||
|
||||
std::sort(list.begin(), list.end(),
|
||||
[](Value * v1, Value * v2) { return strcmp(v1->c_str(), v2->c_str()) < 0; });
|
||||
|
@ -3180,9 +3180,8 @@ static void prim_mapAttrs(EvalState & state, const PosIdx pos, Value * * args, V
|
|||
auto attrs = state.buildBindings(args[1]->attrs()->size());
|
||||
|
||||
for (auto & i : *args[1]->attrs()) {
|
||||
Value * vName = state.allocValue();
|
||||
Value * vName = Value::toPtr(state.symbols[i.name]);
|
||||
Value * vFun2 = state.allocValue();
|
||||
vName->mkString(state.symbols[i.name]);
|
||||
vFun2->mkApp(args[0], vName);
|
||||
attrs.alloc(i.name).mkApp(vFun2, i.value);
|
||||
}
|
||||
|
@ -3246,8 +3245,7 @@ static void prim_zipAttrsWith(EvalState & state, const PosIdx pos, Value * * arg
|
|||
auto attrs = state.buildBindings(attrsSeen.size());
|
||||
|
||||
for (auto & [sym, elem] : attrsSeen) {
|
||||
auto name = state.allocValue();
|
||||
name->mkString(state.symbols[sym]);
|
||||
auto name = Value::toPtr(state.symbols[sym]);
|
||||
auto call1 = state.allocValue();
|
||||
call1->mkApp(args[0], name);
|
||||
auto call2 = state.allocValue();
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue