From 1f56ea4c7204a5b014bf96953a7f7c1e44cc66f3 Mon Sep 17 00:00:00 2001 From: silvanshade Date: Wed, 29 Jan 2025 12:24:37 -0700 Subject: [PATCH] Add BLAKE3 hashing algorithm This uses the single-threaded C-based routines from libblake3. This is not optimal performance-wise but should be a good starting point for nix compatibility with BLAKE3 hashing until a more performant implementation based on the multi-threaded BLAKE3 routines (written in Rust) can be developed. --- src/libcmd/misc-store-flags.cc | 4 +-- src/libexpr-tests/error_traces.cc | 2 +- src/libutil-tests/hash.cc | 42 ++++++++++++++++++++++++++++ src/libutil/experimental-features.cc | 10 ++++++- src/libutil/experimental-features.hh | 1 + src/libutil/hash.cc | 31 +++++++++++++------- src/libutil/hash.hh | 9 +++--- src/libutil/meson.build | 6 ++++ src/libutil/package.nix | 2 ++ 9 files changed, 89 insertions(+), 18 deletions(-) diff --git a/src/libcmd/misc-store-flags.cc b/src/libcmd/misc-store-flags.cc index 06552c032..242bd4483 100644 --- a/src/libcmd/misc-store-flags.cc +++ b/src/libcmd/misc-store-flags.cc @@ -50,7 +50,7 @@ Args::Flag hashAlgo(std::string && longName, HashAlgorithm * ha) { return Args::Flag { .longName = std::move(longName), - .description = "Hash algorithm (`md5`, `sha1`, `sha256`, or `sha512`).", + .description = "Hash algorithm (`blake3`, `md5`, `sha1`, `sha256`, or `sha512`).", .labels = {"hash-algo"}, .handler = {[ha](std::string s) { *ha = parseHashAlgo(s); @@ -63,7 +63,7 @@ Args::Flag hashAlgoOpt(std::string && longName, std::optional * o { return Args::Flag { .longName = std::move(longName), - .description = "Hash algorithm (`md5`, `sha1`, `sha256`, or `sha512`). Can be omitted for SRI hashes.", + .description = "Hash algorithm (`blake3`, `md5`, `sha1`, `sha256`, or `sha512`). Can be omitted for SRI hashes.", .labels = {"hash-algo"}, .handler = {[oha](std::string s) { *oha = std::optional{parseHashAlgo(s)}; diff --git a/src/libexpr-tests/error_traces.cc b/src/libexpr-tests/error_traces.cc index 2aa13cf62..53013a34a 100644 --- a/src/libexpr-tests/error_traces.cc +++ b/src/libexpr-tests/error_traces.cc @@ -1152,7 +1152,7 @@ namespace nix { ASSERT_TRACE1("hashString \"foo\" \"content\"", UsageError, - HintFmt("unknown hash algorithm '%s', expect 'md5', 'sha1', 'sha256', or 'sha512'", "foo")); + HintFmt("unknown hash algorithm '%s', expect 'blake3', 'md5', 'sha1', 'sha256', or 'sha512'", "foo")); ASSERT_TRACE2("hashString \"sha256\" {}", TypeError, diff --git a/src/libutil-tests/hash.cc b/src/libutil-tests/hash.cc index a88994d0b..3a639aef9 100644 --- a/src/libutil-tests/hash.cc +++ b/src/libutil-tests/hash.cc @@ -6,10 +6,52 @@ namespace nix { +class BLAKE3HashTest : public virtual ::testing::Test +{ +public: + + /** + * We set these in tests rather than the regular globals so we don't have + * to worry about race conditions if the tests run concurrently. + */ + ExperimentalFeatureSettings mockXpSettings; + +private: + + void SetUp() override + { + mockXpSettings.set("experimental-features", "blake3-hashes"); + } +}; + /* ---------------------------------------------------------------------------- * hashString * --------------------------------------------------------------------------*/ + TEST_F(BLAKE3HashTest, testKnownBLAKE3Hashes1) { + // values taken from: https://tools.ietf.org/html/rfc4634 + auto s = "abc"; + auto hash = hashString(HashAlgorithm::BLAKE3, s, mockXpSettings); + ASSERT_EQ(hash.to_string(HashFormat::Base16, true), + "blake3:6437b3ac38465133ffb63b75273a8db548c558465d79db03fd359c6cd5bd9d85"); + } + + TEST_F(BLAKE3HashTest, testKnownBLAKE3Hashes2) { + // values taken from: https://tools.ietf.org/html/rfc4634 + auto s = "abcdbcdecdefdefgefghfghighijhijkijkljklmklmnlmnomnopnopq"; + auto hash = hashString(HashAlgorithm::BLAKE3, s, mockXpSettings); + ASSERT_EQ(hash.to_string(HashFormat::Base16, true), + "blake3:c19012cc2aaf0dc3d8e5c45a1b79114d2df42abb2a410bf54be09e891af06ff8"); + } + + TEST_F(BLAKE3HashTest, testKnownBLAKE3Hashes3) { + // values taken from: https://www.ietf.org/archive/id/draft-aumasson-blake3-00.txt + auto s = "IETF"; + auto hash = hashString(HashAlgorithm::BLAKE3, s, mockXpSettings); + ASSERT_EQ(hash.to_string(HashFormat::Base16, true), + "blake3:83a2de1ee6f4e6ab686889248f4ec0cf4cc5709446a682ffd1cbb4d6165181e2"); + } + TEST(hashString, testKnownMD5Hashes1) { // values taken from: https://tools.ietf.org/html/rfc1321 auto s1 = ""; diff --git a/src/libutil/experimental-features.cc b/src/libutil/experimental-features.cc index a0c955816..dba5893a8 100644 --- a/src/libutil/experimental-features.cc +++ b/src/libutil/experimental-features.cc @@ -24,7 +24,7 @@ struct ExperimentalFeatureDetails * feature, we either have no issue at all if few features are not added * at the end of the list, or a proper merge conflict if they are. */ -constexpr size_t numXpFeatures = 1 + static_cast(Xp::PipeOperators); +constexpr size_t numXpFeatures = 1 + static_cast(Xp::BLAKE3Hashes); constexpr std::array xpFeatureDetails = {{ { @@ -302,6 +302,14 @@ constexpr std::array xpFeatureDetails )", .trackingUrl = "https://github.com/NixOS/nix/milestone/55", }, + { + .tag = Xp::BLAKE3Hashes, + .name = "blake3-hashes", + .description = R"( + Enables support for BLAKE3 hashes. + )", + .trackingUrl = "", + }, }}; static_assert( diff --git a/src/libutil/experimental-features.hh b/src/libutil/experimental-features.hh index 412bf0886..1d02ba94d 100644 --- a/src/libutil/experimental-features.hh +++ b/src/libutil/experimental-features.hh @@ -37,6 +37,7 @@ enum struct ExperimentalFeature MountedSSHStore, VerifiedFetches, PipeOperators, + BLAKE3Hashes, }; /** diff --git a/src/libutil/hash.cc b/src/libutil/hash.cc index b69dec685..6a7a8b092 100644 --- a/src/libutil/hash.cc +++ b/src/libutil/hash.cc @@ -1,6 +1,7 @@ #include #include +#include #include #include #include @@ -8,6 +9,7 @@ #include "args.hh" #include "hash.hh" #include "archive.hh" +#include "config.hh" #include "split.hh" #include @@ -20,6 +22,7 @@ namespace nix { static size_t regularHashSize(HashAlgorithm type) { switch (type) { + case HashAlgorithm::BLAKE3: return blake3HashSize; case HashAlgorithm::MD5: return md5HashSize; case HashAlgorithm::SHA1: return sha1HashSize; case HashAlgorithm::SHA256: return sha256HashSize; @@ -29,12 +32,15 @@ static size_t regularHashSize(HashAlgorithm type) { } -const std::set hashAlgorithms = {"md5", "sha1", "sha256", "sha512" }; +const std::set hashAlgorithms = {"blake3", "md5", "sha1", "sha256", "sha512" }; const std::set hashFormats = {"base64", "nix32", "base16", "sri" }; -Hash::Hash(HashAlgorithm algo) : algo(algo) +Hash::Hash(HashAlgorithm algo, const ExperimentalFeatureSettings & xpSettings) : algo(algo) { + if (algo == HashAlgorithm::BLAKE3) { + xpSettings.require(Xp::BLAKE3Hashes); + } hashSize = regularHashSize(algo); assert(hashSize <= maxHashSize); memset(hash, 0, maxHashSize); @@ -284,6 +290,7 @@ Hash newHashAllowEmpty(std::string_view hashStr, std::optional ha union Ctx { + blake3_hasher blake3; MD5_CTX md5; SHA_CTX sha1; SHA256_CTX sha256; @@ -293,7 +300,8 @@ union Ctx static void start(HashAlgorithm ha, Ctx & ctx) { - if (ha == HashAlgorithm::MD5) MD5_Init(&ctx.md5); + if (ha == HashAlgorithm::BLAKE3) blake3_hasher_init(&ctx.blake3); + else if (ha == HashAlgorithm::MD5) MD5_Init(&ctx.md5); else if (ha == HashAlgorithm::SHA1) SHA1_Init(&ctx.sha1); else if (ha == HashAlgorithm::SHA256) SHA256_Init(&ctx.sha256); else if (ha == HashAlgorithm::SHA512) SHA512_Init(&ctx.sha512); @@ -303,7 +311,8 @@ static void start(HashAlgorithm ha, Ctx & ctx) static void update(HashAlgorithm ha, Ctx & ctx, std::string_view data) { - if (ha == HashAlgorithm::MD5) MD5_Update(&ctx.md5, data.data(), data.size()); + if (ha == HashAlgorithm::BLAKE3) blake3_hasher_update(&ctx.blake3, data.data(), data.size()); + else if (ha == HashAlgorithm::MD5) MD5_Update(&ctx.md5, data.data(), data.size()); else if (ha == HashAlgorithm::SHA1) SHA1_Update(&ctx.sha1, data.data(), data.size()); else if (ha == HashAlgorithm::SHA256) SHA256_Update(&ctx.sha256, data.data(), data.size()); else if (ha == HashAlgorithm::SHA512) SHA512_Update(&ctx.sha512, data.data(), data.size()); @@ -312,24 +321,24 @@ static void update(HashAlgorithm ha, Ctx & ctx, static void finish(HashAlgorithm ha, Ctx & ctx, unsigned char * hash) { - if (ha == HashAlgorithm::MD5) MD5_Final(hash, &ctx.md5); + if (ha == HashAlgorithm::BLAKE3) blake3_hasher_finalize(&ctx.blake3, hash, BLAKE3_OUT_LEN); + else if (ha == HashAlgorithm::MD5) MD5_Final(hash, &ctx.md5); else if (ha == HashAlgorithm::SHA1) SHA1_Final(hash, &ctx.sha1); else if (ha == HashAlgorithm::SHA256) SHA256_Final(hash, &ctx.sha256); else if (ha == HashAlgorithm::SHA512) SHA512_Final(hash, &ctx.sha512); } - -Hash hashString(HashAlgorithm ha, std::string_view s) +Hash hashString( + HashAlgorithm ha, std::string_view s, const ExperimentalFeatureSettings & xpSettings) { Ctx ctx; - Hash hash(ha); + Hash hash(ha, xpSettings); start(ha, ctx); update(ha, ctx, s); finish(ha, ctx, hash.hash); return hash; } - Hash hashFile(HashAlgorithm ha, const Path & path) { HashSink sink(ha); @@ -426,6 +435,7 @@ std::string_view printHashFormat(HashFormat HashFormat) std::optional parseHashAlgoOpt(std::string_view s) { + if (s == "blake3") return HashAlgorithm::BLAKE3; if (s == "md5") return HashAlgorithm::MD5; if (s == "sha1") return HashAlgorithm::SHA1; if (s == "sha256") return HashAlgorithm::SHA256; @@ -439,12 +449,13 @@ HashAlgorithm parseHashAlgo(std::string_view s) if (opt_h) return *opt_h; else - throw UsageError("unknown hash algorithm '%1%', expect 'md5', 'sha1', 'sha256', or 'sha512'", s); + throw UsageError("unknown hash algorithm '%1%', expect 'blake3', 'md5', 'sha1', 'sha256', or 'sha512'", s); } std::string_view printHashAlgo(HashAlgorithm ha) { switch (ha) { + case HashAlgorithm::BLAKE3: return "blake3"; case HashAlgorithm::MD5: return "md5"; case HashAlgorithm::SHA1: return "sha1"; case HashAlgorithm::SHA256: return "sha256"; diff --git a/src/libutil/hash.hh b/src/libutil/hash.hh index dc95b9f2f..13d526f42 100644 --- a/src/libutil/hash.hh +++ b/src/libutil/hash.hh @@ -1,6 +1,7 @@ #pragma once ///@file +#include "config.hh" #include "types.hh" #include "serialise.hh" #include "file-system.hh" @@ -11,9 +12,9 @@ namespace nix { MakeError(BadHash, Error); -enum struct HashAlgorithm : char { MD5 = 42, SHA1, SHA256, SHA512 }; - +enum struct HashAlgorithm : char { MD5 = 42, SHA1, SHA256, SHA512, BLAKE3 }; +const int blake3HashSize = 32; const int md5HashSize = 16; const int sha1HashSize = 20; const int sha256HashSize = 32; @@ -52,7 +53,7 @@ struct Hash /** * Create a zero-filled hash object. */ - explicit Hash(HashAlgorithm algo); + explicit Hash(HashAlgorithm algo, const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); /** * Parse the hash from a string representation in the format @@ -157,7 +158,7 @@ std::string printHash16or32(const Hash & hash); /** * Compute the hash of the given string. */ -Hash hashString(HashAlgorithm ha, std::string_view s); +Hash hashString(HashAlgorithm ha, std::string_view s, const ExperimentalFeatureSettings & xpSettings = experimentalFeatureSettings); /** * Compute the hash of the given file, hashing its contents directly. diff --git a/src/libutil/meson.build b/src/libutil/meson.build index ac701d8fd..9ee3770de 100644 --- a/src/libutil/meson.build +++ b/src/libutil/meson.build @@ -62,6 +62,12 @@ elif host_machine.system() == 'sunos' deps_other += [socket, network_service_library] endif +blake3 = dependency( + 'libblake3', + version: '>= 1.5.5', +) +deps_private += blake3 + boost = dependency( 'boost', modules : ['context', 'coroutine'], diff --git a/src/libutil/package.nix b/src/libutil/package.nix index 586119a6e..2f19b5822 100644 --- a/src/libutil/package.nix +++ b/src/libutil/package.nix @@ -6,6 +6,7 @@ boost, brotli, libarchive, + libblake3, libcpuid, libsodium, nlohmann_json, @@ -42,6 +43,7 @@ mkMesonLibrary (finalAttrs: { buildInputs = [ brotli + libblake3 libsodium openssl ] ++ lib.optional stdenv.hostPlatform.isx86_64 libcpuid;