1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-06-25 14:51:16 +02:00

libutil/tarfile: Create the scratch std::vector only once

I can't find a good way to benchmark in isolation from the
git cache, but common sense dictates that creating (and destroying)
a 131KiB std::vector for each regular file from the archive imposes
quite a significant overhead regardless of the IO bound git cache.

AFAICT there is no reason to keep a copy of the data since
it always gets fed into the sink and there are no coroutines/threads
in sight.
This commit is contained in:
Sergei Zimmerman 2025-05-13 10:50:46 +00:00
parent ed521760bc
commit 7628155d2b
No known key found for this signature in database
GPG key ID: A9B0B557CA632325

View file

@ -178,6 +178,10 @@ time_t unpackTarfileToSink(TarArchive & archive, ExtendedFileSystemObjectSink &
{ {
time_t lastModified = 0; time_t lastModified = 0;
/* Only allocate the buffer once. Use the heap because 131 KiB is a bit too
much for the stack. */
std::vector<unsigned char> buf(128 * 1024);
for (;;) { for (;;) {
// FIXME: merge with extract_archive // FIXME: merge with extract_archive
struct archive_entry * entry; struct archive_entry * entry;
@ -212,7 +216,6 @@ time_t unpackTarfileToSink(TarArchive & archive, ExtendedFileSystemObjectSink &
crf.isExecutable(); crf.isExecutable();
while (true) { while (true) {
std::vector<unsigned char> buf(128 * 1024);
auto n = archive_read_data(archive.archive, buf.data(), buf.size()); auto n = archive_read_data(archive.archive, buf.data(), buf.size());
if (n < 0) if (n < 0)
throw Error("cannot read file '%s' from tarball", path); throw Error("cannot read file '%s' from tarball", path);