1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-06-24 22:11:15 +02:00

libexpr: Actually cache line information in PosTable

Previous code had a sneaky bug due to which no caching
actually happened:

```cpp
auto linesForInput = (*lines)[origin->offset];
```

That should have been:
```cpp
auto & linesForInput = (*lines)[origin->offset];
```

See [1].

Now that it also makes sense to make the cache bound in side
in order not to memoize all the sources without freeing any memory.
The default cache size has been chosen somewhat arbitrarily to be ~64k
origins. For reference, 25.05 nixpkgs has ~50k .nix files.

Simple benchmark:

```nix
let
  pkgs = import <nixpkgs> { };
in
builtins.foldl' (acc: el: acc + el.line) 0 (
  builtins.genList (x: builtins.unsafeGetAttrPos "gcc" pkgs) 10000
)
```

(After)

```
$ hyperfine "result/bin/nix eval -f ./test.nix"
Benchmark 1: result/bin/nix eval -f ./test.nix
  Time (mean ± σ):     292.7 ms ±   3.9 ms    [User: 131.0 ms, System: 120.5 ms]
  Range (min … max):   288.1 ms … 300.5 ms    10 runs
```

(Before)

```
hyperfine "nix eval -f ./test.nix"
Benchmark 1: nix eval -f ./test.nix
  Time (mean ± σ):     666.7 ms ±   6.4 ms    [User: 428.3 ms, System: 191.2 ms]
  Range (min … max):   659.7 ms … 681.3 ms    10 runs
```

If the origin happens to be a `all-packages.nix` or similar in size then the
difference is much more dramatic.

[1]: 22e3f0e987
This commit is contained in:
Sergei Zimmerman 2025-05-15 23:07:25 +00:00
parent 4711720efe
commit 5ea81f5b8f
No known key found for this signature in database
GPG key ID: A9B0B557CA632325
2 changed files with 43 additions and 13 deletions

View file

@ -4,6 +4,7 @@
#include <cstdint>
#include <vector>
#include "nix/util/lru-cache.hh"
#include "nix/util/pos-idx.hh"
#include "nix/util/position.hh"
#include "nix/util/sync.hh"
@ -37,10 +38,20 @@ public:
};
private:
/**
* Vector of byte offsets (in the virtual input buffer) of initial line character's position.
* Sorted by construction. Binary search over it allows for efficient translation of arbitrary
* byte offsets in the virtual input buffer to its line + column position.
*/
using Lines = std::vector<uint32_t>;
/**
* Cache from byte offset in the virtual buffer of Origins -> @ref Lines in that origin.
*/
using LinesCache = LRUCache<uint32_t, Lines>;
std::map<uint32_t, Origin> origins;
mutable Sync<std::map<uint32_t, Lines>> lines;
mutable Sync<LinesCache> linesCache;
const Origin * resolve(PosIdx p) const
{
@ -56,6 +67,11 @@ private:
}
public:
PosTable(std::size_t linesCacheCapacity = 65536)
: linesCache(linesCacheCapacity)
{
}
Origin addOrigin(Pos::Origin origin, size_t size)
{
uint32_t offset = 0;

View file

@ -15,21 +15,35 @@ Pos PosTable::operator[](PosIdx p) const
const auto offset = origin->offsetOf(p);
Pos result{0, 0, origin->origin};
auto lines = this->lines.lock();
auto linesForInput = (*lines)[origin->offset];
auto linesCache = this->linesCache.lock();
if (linesForInput.empty()) {
auto source = result.getSource().value_or("");
const char * begin = source.data();
for (Pos::LinesIterator it(source), end; it != end; it++)
linesForInput.push_back(it->data() - begin);
if (linesForInput.empty())
linesForInput.push_back(0);
/* Try the origin's line cache */
const auto * linesForInput = linesCache->getOrNullptr(origin->offset);
auto fillCacheForOrigin = [](std::string_view content) {
auto contentLines = Lines();
const char * begin = content.data();
for (Pos::LinesIterator it(content), end; it != end; it++)
contentLines.push_back(it->data() - begin);
if (contentLines.empty())
contentLines.push_back(0);
return contentLines;
};
/* Calculate line offsets and fill the cache */
if (!linesForInput) {
auto originContent = result.getSource().value_or("");
linesCache->upsert(origin->offset, fillCacheForOrigin(originContent));
linesForInput = linesCache->getOrNullptr(origin->offset);
}
// as above: the first line starts at byte 0 and is always present
auto lineStartOffset = std::prev(std::upper_bound(linesForInput.begin(), linesForInput.end(), offset));
result.line = 1 + (lineStartOffset - linesForInput.begin());
assert(linesForInput);
// as above: the first line starts at byte 0 and is always present
auto lineStartOffset = std::prev(std::upper_bound(linesForInput->begin(), linesForInput->end(), offset));
result.line = 1 + (lineStartOffset - linesForInput->begin());
result.column = 1 + (offset - *lineStartOffset);
return result;
}