1
0
Fork 0
mirror of https://github.com/NixOS/nix synced 2025-06-25 14:51:16 +02:00

Merge pull request #12676 from silvanshade/blake3-tbb

Implement memory-mapped IO and multi-threading for BLAKE3 hashing
This commit is contained in:
John Ericson 2025-05-04 10:58:53 -04:00 committed by GitHub
commit 2676ae7ca6
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 46 additions and 9 deletions

6
flake.lock generated
View file

@ -63,11 +63,11 @@
}, },
"nixpkgs": { "nixpkgs": {
"locked": { "locked": {
"lastModified": 1745391562, "lastModified": 1746141548,
"narHash": "sha256-sPwcCYuiEopaafePqlG826tBhctuJsLx/mhKKM5Fmjo=", "narHash": "sha256-IgBWhX7A2oJmZFIrpRuMnw5RAufVnfvOgHWgIdds+hc=",
"owner": "NixOS", "owner": "NixOS",
"repo": "nixpkgs", "repo": "nixpkgs",
"rev": "8a2f738d9d1f1d986b5a4cd2fd2061a7127237d7", "rev": "f02fddb8acef29a8b32f10a335d44828d7825b78",
"type": "github" "type": "github"
}, },
"original": { "original": {

View file

@ -61,6 +61,7 @@ scope: {
"--with-container" "--with-container"
"--with-context" "--with-context"
"--with-coroutine" "--with-coroutine"
"--with-iostreams"
]; ];
}).overrideAttrs }).overrideAttrs
(old: { (old: {

View file

@ -21,6 +21,8 @@
#include <sys/time.h> #include <sys/time.h>
#include <unistd.h> #include <unistd.h>
#include <boost/iostreams/device/mapped_file.hpp>
#ifdef _WIN32 #ifdef _WIN32
# include <io.h> # include <io.h>
#endif #endif
@ -273,9 +275,22 @@ std::string readFile(const std::filesystem::path & path)
return readFile(os_string_to_string(PathViewNG { path })); return readFile(os_string_to_string(PathViewNG { path }));
} }
void readFile(const Path & path, Sink & sink, bool memory_map)
void readFile(const Path & path, Sink & sink)
{ {
// Memory-map the file for faster processing where possible.
if (memory_map) {
try {
boost::iostreams::mapped_file_source mmap(path);
if (mmap.is_open()) {
sink({mmap.data(), mmap.size()});
return;
}
} catch (const boost::exception & e) {
}
debug("memory-mapping failed for path: %s", path);
}
// Stream the file instead if memory-mapping fails or is disabled.
AutoCloseFD fd = toDescriptor(open(path.c_str(), O_RDONLY AutoCloseFD fd = toDescriptor(open(path.c_str(), O_RDONLY
// TODO // TODO
#ifndef _WIN32 #ifndef _WIN32

View file

@ -307,11 +307,31 @@ static void start(HashAlgorithm ha, Ctx & ctx)
else if (ha == HashAlgorithm::SHA512) SHA512_Init(&ctx.sha512); else if (ha == HashAlgorithm::SHA512) SHA512_Init(&ctx.sha512);
} }
// BLAKE3 data size threshold beyond which parallel hashing with TBB is likely faster.
//
// NOTE: This threshold is based on the recommended rule-of-thumb from the official BLAKE3 documentation for typical
// x86_64 hardware as of 2025. In the future it may make sense to allow the user to tune this through nix.conf.
const size_t blake3TbbThreshold = 128000;
// Decide which BLAKE3 update strategy to use based on some heuristics. Currently this just checks the data size but in
// the future it might also take into consideration available system resources or the presence of a shared-memory
// capable GPU for a heterogenous compute implementation.
void blake3_hasher_update_with_heuristics(blake3_hasher * blake3, std::string_view data)
{
#ifdef BLAKE3_USE_TBB
if (data.size() >= blake3TbbThreshold) {
blake3_hasher_update_tbb(blake3, data.data(), data.size());
} else
#endif
{
blake3_hasher_update(blake3, data.data(), data.size());
}
}
static void update(HashAlgorithm ha, Ctx & ctx, static void update(HashAlgorithm ha, Ctx & ctx,
std::string_view data) std::string_view data)
{ {
if (ha == HashAlgorithm::BLAKE3) blake3_hasher_update(&ctx.blake3, data.data(), data.size()); if (ha == HashAlgorithm::BLAKE3) blake3_hasher_update_with_heuristics(&ctx.blake3, data);
else if (ha == HashAlgorithm::MD5) MD5_Update(&ctx.md5, data.data(), data.size()); else if (ha == HashAlgorithm::MD5) MD5_Update(&ctx.md5, data.data(), data.size());
else if (ha == HashAlgorithm::SHA1) SHA1_Update(&ctx.sha1, data.data(), data.size()); else if (ha == HashAlgorithm::SHA1) SHA1_Update(&ctx.sha1, data.data(), data.size());
else if (ha == HashAlgorithm::SHA256) SHA256_Update(&ctx.sha256, data.data(), data.size()); else if (ha == HashAlgorithm::SHA256) SHA256_Update(&ctx.sha256, data.data(), data.size());

View file

@ -173,7 +173,7 @@ Descriptor openDirectory(const std::filesystem::path & path);
*/ */
std::string readFile(const Path & path); std::string readFile(const Path & path);
std::string readFile(const std::filesystem::path & path); std::string readFile(const std::filesystem::path & path);
void readFile(const Path & path, Sink & sink); void readFile(const Path & path, Sink & sink, bool memory_map = true);
/** /**
* Write a string to a file. * Write a string to a file.

View file

@ -50,13 +50,14 @@ endif
blake3 = dependency( blake3 = dependency(
'libblake3', 'libblake3',
version: '>= 1.5.5', version: '>= 1.8.2',
method : 'pkg-config',
) )
deps_private += blake3 deps_private += blake3
boost = dependency( boost = dependency(
'boost', 'boost',
modules : ['context', 'coroutine'], modules : ['context', 'coroutine', 'iostreams'],
include_type: 'system', include_type: 'system',
) )
# boost is a public dependency, but not a pkg-config dependency unfortunately, so we # boost is a public dependency, but not a pkg-config dependency unfortunately, so we