From 4711720efe3e1cf27d0e80f818b5d02ca1075457 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Thu, 15 May 2025 22:28:41 +0000 Subject: [PATCH 1/2] libutil: Add `LRUCache::getOrNullptr` For heavier objects it doesn't make sense to return a std::optional with the copy of the data, when it can be used by const reference. --- src/libutil/include/nix/util/lru-cache.hh | 46 +++++++++++++++++------ 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/src/libutil/include/nix/util/lru-cache.hh b/src/libutil/include/nix/util/lru-cache.hh index c9bcd7ee0..0834a8e74 100644 --- a/src/libutil/include/nix/util/lru-cache.hh +++ b/src/libutil/include/nix/util/lru-cache.hh @@ -33,6 +33,18 @@ private: Data data; LRU lru; + /** + * Move this item to the back of the LRU list. + */ + void promote(LRU::iterator it) + { + /* Think of std::list iterators as stable pointers to the list node, + * which never get invalidated. Thus, we can reuse the same lru list + * element and just splice it to the back of the list without the need + * to update its value in the key -> list iterator map. */ + lru.splice(/*pos=*/lru.end(), /*other=*/lru, it); + } + public: LRUCache(size_t capacity) @@ -83,7 +95,9 @@ public: /** * Look up an item in the cache. If it exists, it becomes the most * recently used item. - * */ + * + * @returns corresponding cache entry, std::nullopt if it's not in the cache + */ template std::optional get(const K & key) { @@ -91,20 +105,30 @@ public: if (i == data.end()) return {}; - /** - * Move this item to the back of the LRU list. - * - * Think of std::list iterators as stable pointers to the list node, - * which never get invalidated. Thus, we can reuse the same lru list - * element and just splice it to the back of the list without the need - * to update its value in the key -> list iterator map. - */ auto & [it, value] = i->second; - lru.splice(/*pos=*/lru.end(), /*other=*/lru, it.it); - + promote(it.it); return value; } + /** + * Look up an item in the cache. If it exists, it becomes the most + * recently used item. + * + * @returns mutable pointer to the corresponding cache entry, nullptr if + * it's not in the cache + */ + template + Value * getOrNullptr(const K & key) + { + auto i = data.find(key); + if (i == data.end()) + return nullptr; + + auto & [it, value] = i->second; + promote(it.it); + return &value; + } + size_t size() const noexcept { return data.size(); From 5ea81f5b8f36f351981f53d27e85c6e4733c2b3e Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Thu, 15 May 2025 23:07:25 +0000 Subject: [PATCH 2/2] libexpr: Actually cache line information in PosTable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previous code had a sneaky bug due to which no caching actually happened: ```cpp auto linesForInput = (*lines)[origin->offset]; ``` That should have been: ```cpp auto & linesForInput = (*lines)[origin->offset]; ``` See [1]. Now that it also makes sense to make the cache bound in side in order not to memoize all the sources without freeing any memory. The default cache size has been chosen somewhat arbitrarily to be ~64k origins. For reference, 25.05 nixpkgs has ~50k .nix files. Simple benchmark: ```nix let pkgs = import { }; in builtins.foldl' (acc: el: acc + el.line) 0 ( builtins.genList (x: builtins.unsafeGetAttrPos "gcc" pkgs) 10000 ) ``` (After) ``` $ hyperfine "result/bin/nix eval -f ./test.nix" Benchmark 1: result/bin/nix eval -f ./test.nix Time (mean ± σ): 292.7 ms ± 3.9 ms [User: 131.0 ms, System: 120.5 ms] Range (min … max): 288.1 ms … 300.5 ms 10 runs ``` (Before) ``` hyperfine "nix eval -f ./test.nix" Benchmark 1: nix eval -f ./test.nix Time (mean ± σ): 666.7 ms ± 6.4 ms [User: 428.3 ms, System: 191.2 ms] Range (min … max): 659.7 ms … 681.3 ms 10 runs ``` If the origin happens to be a `all-packages.nix` or similar in size then the difference is much more dramatic. [1]: https://www.github.com/lix-project/lix/commit/22e3f0e9875082be7f4eec8e3caeb134a7f1c05f --- src/libutil/include/nix/util/pos-table.hh | 18 ++++++++++- src/libutil/pos-table.cc | 38 ++++++++++++++++------- 2 files changed, 43 insertions(+), 13 deletions(-) diff --git a/src/libutil/include/nix/util/pos-table.hh b/src/libutil/include/nix/util/pos-table.hh index ef170e0f1..f64466c21 100644 --- a/src/libutil/include/nix/util/pos-table.hh +++ b/src/libutil/include/nix/util/pos-table.hh @@ -4,6 +4,7 @@ #include #include +#include "nix/util/lru-cache.hh" #include "nix/util/pos-idx.hh" #include "nix/util/position.hh" #include "nix/util/sync.hh" @@ -37,10 +38,20 @@ public: }; private: + /** + * Vector of byte offsets (in the virtual input buffer) of initial line character's position. + * Sorted by construction. Binary search over it allows for efficient translation of arbitrary + * byte offsets in the virtual input buffer to its line + column position. + */ using Lines = std::vector; + /** + * Cache from byte offset in the virtual buffer of Origins -> @ref Lines in that origin. + */ + using LinesCache = LRUCache; std::map origins; - mutable Sync> lines; + + mutable Sync linesCache; const Origin * resolve(PosIdx p) const { @@ -56,6 +67,11 @@ private: } public: + PosTable(std::size_t linesCacheCapacity = 65536) + : linesCache(linesCacheCapacity) + { + } + Origin addOrigin(Pos::Origin origin, size_t size) { uint32_t offset = 0; diff --git a/src/libutil/pos-table.cc b/src/libutil/pos-table.cc index 5a61ffbc5..e50b12873 100644 --- a/src/libutil/pos-table.cc +++ b/src/libutil/pos-table.cc @@ -15,21 +15,35 @@ Pos PosTable::operator[](PosIdx p) const const auto offset = origin->offsetOf(p); Pos result{0, 0, origin->origin}; - auto lines = this->lines.lock(); - auto linesForInput = (*lines)[origin->offset]; + auto linesCache = this->linesCache.lock(); - if (linesForInput.empty()) { - auto source = result.getSource().value_or(""); - const char * begin = source.data(); - for (Pos::LinesIterator it(source), end; it != end; it++) - linesForInput.push_back(it->data() - begin); - if (linesForInput.empty()) - linesForInput.push_back(0); + /* Try the origin's line cache */ + const auto * linesForInput = linesCache->getOrNullptr(origin->offset); + + auto fillCacheForOrigin = [](std::string_view content) { + auto contentLines = Lines(); + + const char * begin = content.data(); + for (Pos::LinesIterator it(content), end; it != end; it++) + contentLines.push_back(it->data() - begin); + if (contentLines.empty()) + contentLines.push_back(0); + + return contentLines; + }; + + /* Calculate line offsets and fill the cache */ + if (!linesForInput) { + auto originContent = result.getSource().value_or(""); + linesCache->upsert(origin->offset, fillCacheForOrigin(originContent)); + linesForInput = linesCache->getOrNullptr(origin->offset); } - // as above: the first line starts at byte 0 and is always present - auto lineStartOffset = std::prev(std::upper_bound(linesForInput.begin(), linesForInput.end(), offset)); - result.line = 1 + (lineStartOffset - linesForInput.begin()); + assert(linesForInput); + + // as above: the first line starts at byte 0 and is always present + auto lineStartOffset = std::prev(std::upper_bound(linesForInput->begin(), linesForInput->end(), offset)); + result.line = 1 + (lineStartOffset - linesForInput->begin()); result.column = 1 + (offset - *lineStartOffset); return result; }