From a87c3711b6eec35b498f8c71ebd7d4038e5b4545 Mon Sep 17 00:00:00 2001 From: silvanshade Date: Wed, 30 Apr 2025 08:25:02 -0600 Subject: [PATCH 1/3] Update flake nixpkgs --- flake.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flake.lock b/flake.lock index 7abe8bd62..b8ff29a0c 100644 --- a/flake.lock +++ b/flake.lock @@ -63,11 +63,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1745391562, - "narHash": "sha256-sPwcCYuiEopaafePqlG826tBhctuJsLx/mhKKM5Fmjo=", + "lastModified": 1746141548, + "narHash": "sha256-IgBWhX7A2oJmZFIrpRuMnw5RAufVnfvOgHWgIdds+hc=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "8a2f738d9d1f1d986b5a4cd2fd2061a7127237d7", + "rev": "f02fddb8acef29a8b32f10a335d44828d7825b78", "type": "github" }, "original": { From b1783ff6151d24344cb22cb39e5f2975424fb22f Mon Sep 17 00:00:00 2001 From: silvanshade Date: Tue, 18 Mar 2025 14:22:15 -0600 Subject: [PATCH 2/3] Implement memory-mapped IO for Sinks --- packaging/dependencies.nix | 1 + src/libutil/file-system.cc | 19 +++++++++++++++++-- src/libutil/include/nix/util/file-system.hh | 2 +- src/libutil/meson.build | 2 +- 4 files changed, 20 insertions(+), 4 deletions(-) diff --git a/packaging/dependencies.nix b/packaging/dependencies.nix index 16d1f1376..a90ef1b4a 100644 --- a/packaging/dependencies.nix +++ b/packaging/dependencies.nix @@ -61,6 +61,7 @@ scope: { "--with-container" "--with-context" "--with-coroutine" + "--with-iostreams" ]; }).overrideAttrs (old: { diff --git a/src/libutil/file-system.cc b/src/libutil/file-system.cc index ad17f837f..90ec5eda5 100644 --- a/src/libutil/file-system.cc +++ b/src/libutil/file-system.cc @@ -21,6 +21,8 @@ #include #include +#include + #ifdef _WIN32 # include #endif @@ -273,9 +275,22 @@ std::string readFile(const std::filesystem::path & path) return readFile(os_string_to_string(PathViewNG { path })); } - -void readFile(const Path & path, Sink & sink) +void readFile(const Path & path, Sink & sink, bool memory_map) { + // Memory-map the file for faster processing where possible. + if (memory_map) { + try { + boost::iostreams::mapped_file_source mmap(path); + if (mmap.is_open()) { + sink({mmap.data(), mmap.size()}); + return; + } + } catch (const boost::exception & e) { + } + debug("memory-mapping failed for path: %s", path); + } + + // Stream the file instead if memory-mapping fails or is disabled. AutoCloseFD fd = toDescriptor(open(path.c_str(), O_RDONLY // TODO #ifndef _WIN32 diff --git a/src/libutil/include/nix/util/file-system.hh b/src/libutil/include/nix/util/file-system.hh index 1d7b5e3aa..b8fa4cfa0 100644 --- a/src/libutil/include/nix/util/file-system.hh +++ b/src/libutil/include/nix/util/file-system.hh @@ -173,7 +173,7 @@ Descriptor openDirectory(const std::filesystem::path & path); */ std::string readFile(const Path & path); std::string readFile(const std::filesystem::path & path); -void readFile(const Path & path, Sink & sink); +void readFile(const Path & path, Sink & sink, bool memory_map = true); /** * Write a string to a file. diff --git a/src/libutil/meson.build b/src/libutil/meson.build index 782c361e0..944198970 100644 --- a/src/libutil/meson.build +++ b/src/libutil/meson.build @@ -56,7 +56,7 @@ deps_private += blake3 boost = dependency( 'boost', - modules : ['context', 'coroutine'], + modules : ['context', 'coroutine', 'iostreams'], include_type: 'system', ) # boost is a public dependency, but not a pkg-config dependency unfortunately, so we From 7db388f597f0db5b2b7e6217a931908d31df1299 Mon Sep 17 00:00:00 2001 From: silvanshade Date: Tue, 18 Mar 2025 15:16:56 -0600 Subject: [PATCH 3/3] Implement multi-threaded BLAKE3 hashing --- src/libutil/hash.cc | 22 +++++++++++++++++++++- src/libutil/meson.build | 3 ++- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/libutil/hash.cc b/src/libutil/hash.cc index 0a654b914..50b050e3b 100644 --- a/src/libutil/hash.cc +++ b/src/libutil/hash.cc @@ -307,11 +307,31 @@ static void start(HashAlgorithm ha, Ctx & ctx) else if (ha == HashAlgorithm::SHA512) SHA512_Init(&ctx.sha512); } +// BLAKE3 data size threshold beyond which parallel hashing with TBB is likely faster. +// +// NOTE: This threshold is based on the recommended rule-of-thumb from the official BLAKE3 documentation for typical +// x86_64 hardware as of 2025. In the future it may make sense to allow the user to tune this through nix.conf. +const size_t blake3TbbThreshold = 128000; + +// Decide which BLAKE3 update strategy to use based on some heuristics. Currently this just checks the data size but in +// the future it might also take into consideration available system resources or the presence of a shared-memory +// capable GPU for a heterogenous compute implementation. +void blake3_hasher_update_with_heuristics(blake3_hasher * blake3, std::string_view data) +{ +#ifdef BLAKE3_USE_TBB + if (data.size() >= blake3TbbThreshold) { + blake3_hasher_update_tbb(blake3, data.data(), data.size()); + } else +#endif + { + blake3_hasher_update(blake3, data.data(), data.size()); + } +} static void update(HashAlgorithm ha, Ctx & ctx, std::string_view data) { - if (ha == HashAlgorithm::BLAKE3) blake3_hasher_update(&ctx.blake3, data.data(), data.size()); + if (ha == HashAlgorithm::BLAKE3) blake3_hasher_update_with_heuristics(&ctx.blake3, data); else if (ha == HashAlgorithm::MD5) MD5_Update(&ctx.md5, data.data(), data.size()); else if (ha == HashAlgorithm::SHA1) SHA1_Update(&ctx.sha1, data.data(), data.size()); else if (ha == HashAlgorithm::SHA256) SHA256_Update(&ctx.sha256, data.data(), data.size()); diff --git a/src/libutil/meson.build b/src/libutil/meson.build index 944198970..b0e82e46a 100644 --- a/src/libutil/meson.build +++ b/src/libutil/meson.build @@ -50,7 +50,8 @@ endif blake3 = dependency( 'libblake3', - version: '>= 1.5.5', + version: '>= 1.8.2', + method : 'pkg-config', ) deps_private += blake3