From 36c583dae08907f38ea3ab6216047ab91682b1cc Mon Sep 17 00:00:00 2001 From: Sergei Zimmerman Date: Sun, 4 May 2025 16:03:57 +0000 Subject: [PATCH] libexpr: Use C++20 heterogeneous lookup for RegexCache --- src/libexpr/primops.cc | 14 ++++++---- src/libutil/include/nix/util/strings.hh | 35 +++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/src/libexpr/primops.cc b/src/libexpr/primops.cc index 5b24849d2..b7b027fba 100644 --- a/src/libexpr/primops.cc +++ b/src/libexpr/primops.cc @@ -4311,9 +4311,7 @@ struct RegexCache { struct State { - // TODO use C++20 transparent comparison when available - std::unordered_map cache; - std::list keys; + std::unordered_map> cache; }; Sync state_; @@ -4324,8 +4322,14 @@ struct RegexCache auto it = state->cache.find(re); if (it != state->cache.end()) return it->second; - state->keys.emplace_back(re); - return state->cache.emplace(state->keys.back(), std::regex(state->keys.back(), std::regex::extended)).first->second; + /* No std::regex constructor overload from std::string_view, but can be constructed + from a pointer + size or an iterator range. */ + return state->cache + .emplace( + std::piecewise_construct, + std::forward_as_tuple(re), + std::forward_as_tuple(/*s=*/re.data(), /*count=*/re.size(), std::regex::extended)) + .first->second; } }; diff --git a/src/libutil/include/nix/util/strings.hh b/src/libutil/include/nix/util/strings.hh index 4c213de87..4c77516a3 100644 --- a/src/libutil/include/nix/util/strings.hh +++ b/src/libutil/include/nix/util/strings.hh @@ -97,4 +97,39 @@ extern template std::string dropEmptyInitThenConcatStringsSep(std::string_view, * Arguments that need to be passed to ssh with spaces in them. */ std::list shellSplitString(std::string_view s); + +/** + * Hash implementation that can be used for zero-copy heterogenous lookup from + * P1690R1[1] in unordered containers. + * + * [1]: https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2019/p1690r1.html + */ +struct StringViewHash +{ +private: + using HashType = std::hash; + +public: + using is_transparent = void; + + auto operator()(const char * str) const + { + /* This has a slight overhead due to an implicit strlen, but there isn't + a good way around it because the hash value of all overloads must be + consistent. Delegating to string_view is the solution initially proposed + in P0919R3. */ + return HashType{}(std::string_view{str}); + } + + auto operator()(std::string_view str) const + { + return HashType{}(str); + } + + auto operator()(const std::string & str) const + { + return HashType{}(std::string_view{str}); + } +}; + }