From 7628155d2be3a7b95c79c989abdb031b4741f063 Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Tue, 13 May 2025 10:50:46 +0000 Subject: [PATCH] libutil/tarfile: Create the scratch `std::vector` only once I can't find a good way to benchmark in isolation from the git cache, but common sense dictates that creating (and destroying) a 131KiB std::vector for each regular file from the archive imposes quite a significant overhead regardless of the IO bound git cache. AFAICT there is no reason to keep a copy of the data since it always gets fed into the sink and there are no coroutines/threads in sight. --- src/libutil/tarfile.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/libutil/tarfile.cc b/src/libutil/tarfile.cc index 75373e3eb..66a7fef02 100644 --- a/src/libutil/tarfile.cc +++ b/src/libutil/tarfile.cc @@ -178,6 +178,10 @@ time_t unpackTarfileToSink(TarArchive & archive, ExtendedFileSystemObjectSink & { time_t lastModified = 0; + /* Only allocate the buffer once. Use the heap because 131 KiB is a bit too + much for the stack. */ + std::vector buf(128 * 1024); + for (;;) { // FIXME: merge with extract_archive struct archive_entry * entry; @@ -212,7 +216,6 @@ time_t unpackTarfileToSink(TarArchive & archive, ExtendedFileSystemObjectSink & crf.isExecutable(); while (true) { - std::vector buf(128 * 1024); auto n = archive_read_data(archive.archive, buf.data(), buf.size()); if (n < 0) throw Error("cannot read file '%s' from tarball", path);