mirror of
https://github.com/NixOS/nix
synced 2025-06-24 18:01:16 +02:00
Merge pull request #12623 from xokdvium/ord-safe-sort
Switch builtins.sort to a custom stable PeekSort
This commit is contained in:
commit
af01fea5d6
7 changed files with 685 additions and 5 deletions
|
@ -14,6 +14,7 @@
|
||||||
#include "nix/expr/value-to-xml.hh"
|
#include "nix/expr/value-to-xml.hh"
|
||||||
#include "nix/expr/primops.hh"
|
#include "nix/expr/primops.hh"
|
||||||
#include "nix/fetchers/fetch-to-store.hh"
|
#include "nix/fetchers/fetch-to-store.hh"
|
||||||
|
#include "nix/util/sort.hh"
|
||||||
|
|
||||||
#include <boost/container/small_vector.hpp>
|
#include <boost/container/small_vector.hpp>
|
||||||
#include <nlohmann/json.hpp>
|
#include <nlohmann/json.hpp>
|
||||||
|
@ -3695,10 +3696,14 @@ static void prim_sort(EvalState & state, const PosIdx pos, Value * * args, Value
|
||||||
return state.forceBool(vBool, pos, "while evaluating the return value of the sorting function passed to builtins.sort");
|
return state.forceBool(vBool, pos, "while evaluating the return value of the sorting function passed to builtins.sort");
|
||||||
};
|
};
|
||||||
|
|
||||||
/* FIXME: std::sort can segfault if the comparator is not a strict
|
/* NOTE: Using custom implementation because std::sort and std::stable_sort
|
||||||
weak ordering. What to do? std::stable_sort() seems more
|
are not resilient to comparators that violate strict weak ordering. Diagnosing
|
||||||
resilient, but no guarantees... */
|
incorrect implementations is a O(n^3) problem, so doing the checks is much more
|
||||||
std::stable_sort(list.begin(), list.end(), comparator);
|
expensive that doing the sorting. For this reason we choose to use sorting algorithms
|
||||||
|
that are can't be broken by invalid comprators. peeksort (mergesort)
|
||||||
|
doesn't misbehave when any of the strict weak order properties is
|
||||||
|
violated - output is always a reordering of the input. */
|
||||||
|
peeksort(list.begin(), list.end(), comparator);
|
||||||
|
|
||||||
v.mkList(list);
|
v.mkList(list);
|
||||||
}
|
}
|
||||||
|
@ -3720,6 +3725,32 @@ static RegisterPrimOp primop_sort({
|
||||||
|
|
||||||
This is a stable sort: it preserves the relative order of elements
|
This is a stable sort: it preserves the relative order of elements
|
||||||
deemed equal by the comparator.
|
deemed equal by the comparator.
|
||||||
|
|
||||||
|
*comparator* must impose a strict weak ordering on the set of values
|
||||||
|
in the *list*. This means that for any elements *a*, *b* and *c* from the
|
||||||
|
*list*, *comparator* must satisfy the following relations:
|
||||||
|
|
||||||
|
1. Transitivity
|
||||||
|
|
||||||
|
```nix
|
||||||
|
comparator a b && comparator b c -> comparator a c
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Irreflexivity
|
||||||
|
|
||||||
|
```nix
|
||||||
|
comparator a a == false
|
||||||
|
```
|
||||||
|
|
||||||
|
1. Transitivity of equivalence
|
||||||
|
|
||||||
|
```nix
|
||||||
|
let equiv = a: b: (!comparator a b && !comparator b a); in
|
||||||
|
equiv a b && equiv b c -> equiv a c
|
||||||
|
```
|
||||||
|
|
||||||
|
If the *comparator* violates any of these properties, then `builtins.sort`
|
||||||
|
reorders elements in an unspecified manner.
|
||||||
)",
|
)",
|
||||||
.fun = prim_sort,
|
.fun = prim_sort,
|
||||||
});
|
});
|
||||||
|
|
|
@ -65,6 +65,7 @@ sources = files(
|
||||||
'position.cc',
|
'position.cc',
|
||||||
'processes.cc',
|
'processes.cc',
|
||||||
'references.cc',
|
'references.cc',
|
||||||
|
'sort.cc',
|
||||||
'spawn.cc',
|
'spawn.cc',
|
||||||
'strings.cc',
|
'strings.cc',
|
||||||
'suggestions.cc',
|
'suggestions.cc',
|
||||||
|
|
274
src/libutil-tests/sort.cc
Normal file
274
src/libutil-tests/sort.cc
Normal file
|
@ -0,0 +1,274 @@
|
||||||
|
#include <gtest/gtest.h>
|
||||||
|
#include <rapidcheck/gtest.h>
|
||||||
|
#include "nix/util/sort.hh"
|
||||||
|
|
||||||
|
#include <vector>
|
||||||
|
#include <list>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <random>
|
||||||
|
|
||||||
|
namespace nix {
|
||||||
|
|
||||||
|
struct MonotonicSubranges : public ::testing::Test
|
||||||
|
{
|
||||||
|
std::vector<int> empty_;
|
||||||
|
std::vector<int> basic_ = {1, 0, -1, -100, 10, 10, 20, 40, 5, 5, 20, 10, 10, 1, -5};
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_F(MonotonicSubranges, empty)
|
||||||
|
{
|
||||||
|
ASSERT_EQ(weaklyIncreasingPrefix(empty_.begin(), empty_.end()), empty_.begin());
|
||||||
|
ASSERT_EQ(weaklyIncreasingSuffix(empty_.begin(), empty_.end()), empty_.begin());
|
||||||
|
ASSERT_EQ(strictlyDecreasingPrefix(empty_.begin(), empty_.end()), empty_.begin());
|
||||||
|
ASSERT_EQ(strictlyDecreasingSuffix(empty_.begin(), empty_.end()), empty_.begin());
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(MonotonicSubranges, basic)
|
||||||
|
{
|
||||||
|
ASSERT_EQ(strictlyDecreasingPrefix(basic_.begin(), basic_.end()), basic_.begin() + 4);
|
||||||
|
ASSERT_EQ(strictlyDecreasingSuffix(basic_.begin(), basic_.end()), basic_.begin() + 12);
|
||||||
|
std::reverse(basic_.begin(), basic_.end());
|
||||||
|
ASSERT_EQ(weaklyIncreasingPrefix(basic_.begin(), basic_.end()), basic_.begin() + 5);
|
||||||
|
ASSERT_EQ(weaklyIncreasingSuffix(basic_.begin(), basic_.end()), basic_.begin() + 11);
|
||||||
|
}
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
class SortTestPermutations : public ::testing::Test
|
||||||
|
{
|
||||||
|
std::vector<T> initialData = {std::numeric_limits<T>::max(), std::numeric_limits<T>::min(), 0, 0, 42, 126, 36};
|
||||||
|
std::vector<T> vectorData;
|
||||||
|
std::list<T> listData;
|
||||||
|
|
||||||
|
public:
|
||||||
|
std::vector<T> scratchVector;
|
||||||
|
std::list<T> scratchList;
|
||||||
|
std::vector<T> empty;
|
||||||
|
|
||||||
|
void SetUp() override
|
||||||
|
{
|
||||||
|
vectorData = initialData;
|
||||||
|
std::sort(vectorData.begin(), vectorData.end());
|
||||||
|
listData = std::list(vectorData.begin(), vectorData.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
bool nextPermutation()
|
||||||
|
{
|
||||||
|
std::next_permutation(vectorData.begin(), vectorData.end());
|
||||||
|
std::next_permutation(listData.begin(), listData.end());
|
||||||
|
scratchList = listData;
|
||||||
|
scratchVector = vectorData;
|
||||||
|
return vectorData == initialData;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
using SortPermutationsTypes = ::testing::Types<int, long long, short, unsigned, unsigned long>;
|
||||||
|
|
||||||
|
TYPED_TEST_SUITE(SortTestPermutations, SortPermutationsTypes);
|
||||||
|
|
||||||
|
TYPED_TEST(SortTestPermutations, insertionsort)
|
||||||
|
{
|
||||||
|
while (!this->nextPermutation()) {
|
||||||
|
auto & list = this->scratchList;
|
||||||
|
insertionsort(list.begin(), list.end());
|
||||||
|
ASSERT_TRUE(std::is_sorted(list.begin(), list.end()));
|
||||||
|
auto & vector = this->scratchVector;
|
||||||
|
insertionsort(vector.begin(), vector.end());
|
||||||
|
ASSERT_TRUE(std::is_sorted(vector.begin(), vector.end()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TYPED_TEST(SortTestPermutations, peeksort)
|
||||||
|
{
|
||||||
|
while (!this->nextPermutation()) {
|
||||||
|
auto & vector = this->scratchVector;
|
||||||
|
peeksort(vector.begin(), vector.end());
|
||||||
|
ASSERT_TRUE(std::is_sorted(vector.begin(), vector.end()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(InsertionSort, empty)
|
||||||
|
{
|
||||||
|
std::vector<int> empty;
|
||||||
|
insertionsort(empty.begin(), empty.end());
|
||||||
|
}
|
||||||
|
|
||||||
|
struct RandomPeekSort : public ::testing::TestWithParam<
|
||||||
|
std::tuple</*maxSize*/ std::size_t, /*min*/ int, /*max*/ int, /*iterations*/ std::size_t>>
|
||||||
|
{
|
||||||
|
using ValueType = int;
|
||||||
|
std::vector<ValueType> data_;
|
||||||
|
std::mt19937 urng_;
|
||||||
|
std::uniform_int_distribution<int> distribution_;
|
||||||
|
|
||||||
|
void SetUp() override
|
||||||
|
{
|
||||||
|
auto [maxSize, min, max, iterations] = GetParam();
|
||||||
|
urng_ = std::mt19937(GTEST_FLAG_GET(random_seed));
|
||||||
|
distribution_ = std::uniform_int_distribution<int>(min, max);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto regenerate()
|
||||||
|
{
|
||||||
|
auto [maxSize, min, max, iterations] = GetParam();
|
||||||
|
std::size_t dataSize = std::uniform_int_distribution<std::size_t>(0, maxSize)(urng_);
|
||||||
|
data_.resize(dataSize);
|
||||||
|
std::generate(data_.begin(), data_.end(), [&]() { return distribution_(urng_); });
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
TEST_P(RandomPeekSort, defaultComparator)
|
||||||
|
{
|
||||||
|
auto [maxSize, min, max, iterations] = GetParam();
|
||||||
|
|
||||||
|
for (std::size_t i = 0; i < iterations; ++i) {
|
||||||
|
regenerate();
|
||||||
|
peeksort(data_.begin(), data_.end());
|
||||||
|
ASSERT_TRUE(std::is_sorted(data_.begin(), data_.end()));
|
||||||
|
/* Sorting is idempotent */
|
||||||
|
peeksort(data_.begin(), data_.end());
|
||||||
|
ASSERT_TRUE(std::is_sorted(data_.begin(), data_.end()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(RandomPeekSort, greater)
|
||||||
|
{
|
||||||
|
auto [maxSize, min, max, iterations] = GetParam();
|
||||||
|
|
||||||
|
for (std::size_t i = 0; i < iterations; ++i) {
|
||||||
|
regenerate();
|
||||||
|
peeksort(data_.begin(), data_.end(), std::greater<int>{});
|
||||||
|
ASSERT_TRUE(std::is_sorted(data_.begin(), data_.end(), std::greater<int>{}));
|
||||||
|
/* Sorting is idempotent */
|
||||||
|
peeksort(data_.begin(), data_.end(), std::greater<int>{});
|
||||||
|
ASSERT_TRUE(std::is_sorted(data_.begin(), data_.end(), std::greater<int>{}));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(RandomPeekSort, brokenComparator)
|
||||||
|
{
|
||||||
|
auto [maxSize, min, max, iterations] = GetParam();
|
||||||
|
|
||||||
|
/* This is a pretty nice way of modeling a worst-case scenario for a broken comparator.
|
||||||
|
If the sorting algorithm doesn't break in such case, then surely all deterministic
|
||||||
|
predicates won't break it. */
|
||||||
|
auto comp = [&]([[maybe_unused]] const auto & lhs, [[maybe_unused]] const auto & rhs) -> bool {
|
||||||
|
return std::uniform_int_distribution<unsigned>(0, 1)(urng_);
|
||||||
|
};
|
||||||
|
|
||||||
|
for (std::size_t i = 0; i < iterations; ++i) {
|
||||||
|
regenerate();
|
||||||
|
auto originalData = data_;
|
||||||
|
peeksort(data_.begin(), data_.end(), comp);
|
||||||
|
/* Check that the output is just a reordering of the input. This is the
|
||||||
|
contract of the implementation in regard to comparators that don't
|
||||||
|
define a strict weak order. */
|
||||||
|
std::sort(data_.begin(), data_.end());
|
||||||
|
std::sort(originalData.begin(), originalData.end());
|
||||||
|
ASSERT_EQ(originalData, data_);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_P(RandomPeekSort, stability)
|
||||||
|
{
|
||||||
|
auto [maxSize, min, max, iterations] = GetParam();
|
||||||
|
|
||||||
|
for (std::size_t i = 0; i < iterations; ++i) {
|
||||||
|
regenerate();
|
||||||
|
std::vector<std::pair<int, int>> pairs;
|
||||||
|
|
||||||
|
/* Assign sequential ids to objects. After the sort ids for equivalent
|
||||||
|
elements should be in ascending order. */
|
||||||
|
std::transform(
|
||||||
|
data_.begin(), data_.end(), std::back_inserter(pairs), [id = std::size_t{0}](auto && val) mutable {
|
||||||
|
return std::pair{val, ++id};
|
||||||
|
});
|
||||||
|
|
||||||
|
auto comp = [&]([[maybe_unused]] const auto & lhs, [[maybe_unused]] const auto & rhs) -> bool {
|
||||||
|
return lhs.first > rhs.first;
|
||||||
|
};
|
||||||
|
|
||||||
|
peeksort(pairs.begin(), pairs.end(), comp);
|
||||||
|
ASSERT_TRUE(std::is_sorted(pairs.begin(), pairs.end(), comp));
|
||||||
|
|
||||||
|
for (auto begin = pairs.begin(), end = pairs.end(); begin < end; ++begin) {
|
||||||
|
auto key = begin->first;
|
||||||
|
auto innerEnd = std::find_if_not(begin, end, [key](const auto & lhs) { return lhs.first == key; });
|
||||||
|
ASSERT_TRUE(std::is_sorted(begin, innerEnd, [](const auto & lhs, const auto & rhs) {
|
||||||
|
return lhs.second < rhs.second;
|
||||||
|
}));
|
||||||
|
begin = innerEnd;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
using RandomPeekSortParamType = RandomPeekSort::ParamType;
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_SUITE_P(
|
||||||
|
PeekSort,
|
||||||
|
RandomPeekSort,
|
||||||
|
::testing::Values(
|
||||||
|
RandomPeekSortParamType{128, std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 1024},
|
||||||
|
RandomPeekSortParamType{7753, -32, 32, 128},
|
||||||
|
RandomPeekSortParamType{11719, std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 64},
|
||||||
|
RandomPeekSortParamType{4063, 0, 32, 256},
|
||||||
|
RandomPeekSortParamType{771, -8, 8, 2048},
|
||||||
|
RandomPeekSortParamType{433, 0, 1, 2048},
|
||||||
|
RandomPeekSortParamType{0, 0, 0, 1}, /* empty case */
|
||||||
|
RandomPeekSortParamType{
|
||||||
|
1, std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 1}, /* single element */
|
||||||
|
RandomPeekSortParamType{
|
||||||
|
2, std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 2}, /* two elements */
|
||||||
|
RandomPeekSortParamType{55425, std::numeric_limits<int>::min(), std::numeric_limits<int>::max(), 128}));
|
||||||
|
|
||||||
|
template<typename T>
|
||||||
|
struct SortProperty : public ::testing::Test
|
||||||
|
{};
|
||||||
|
|
||||||
|
using SortPropertyTypes = ::testing::Types<int, unsigned, long long, short, std::string>;
|
||||||
|
TYPED_TEST_SUITE(SortProperty, SortPropertyTypes);
|
||||||
|
|
||||||
|
RC_GTEST_TYPED_FIXTURE_PROP(SortProperty, peeksortSorted, (std::vector<TypeParam> vec))
|
||||||
|
{
|
||||||
|
peeksort(vec.begin(), vec.end());
|
||||||
|
RC_ASSERT(std::is_sorted(vec.begin(), vec.end()));
|
||||||
|
}
|
||||||
|
|
||||||
|
RC_GTEST_TYPED_FIXTURE_PROP(SortProperty, peeksortSortedGreater, (std::vector<TypeParam> vec))
|
||||||
|
{
|
||||||
|
auto comp = std::greater<TypeParam>();
|
||||||
|
peeksort(vec.begin(), vec.end(), comp);
|
||||||
|
RC_ASSERT(std::is_sorted(vec.begin(), vec.end(), comp));
|
||||||
|
}
|
||||||
|
|
||||||
|
RC_GTEST_TYPED_FIXTURE_PROP(SortProperty, insertionsortSorted, (std::vector<TypeParam> vec))
|
||||||
|
{
|
||||||
|
insertionsort(vec.begin(), vec.end());
|
||||||
|
RC_ASSERT(std::is_sorted(vec.begin(), vec.end()));
|
||||||
|
}
|
||||||
|
|
||||||
|
RC_GTEST_PROP(SortProperty, peeksortStability, (std::vector<std::pair<char, char>> vec))
|
||||||
|
{
|
||||||
|
auto comp = [](auto lhs, auto rhs) { return lhs.first < rhs.first; };
|
||||||
|
auto copy = vec;
|
||||||
|
std::stable_sort(copy.begin(), copy.end(), comp);
|
||||||
|
peeksort(vec.begin(), vec.end(), comp);
|
||||||
|
RC_ASSERT(copy == vec);
|
||||||
|
}
|
||||||
|
|
||||||
|
RC_GTEST_TYPED_FIXTURE_PROP(SortProperty, peeksortSortedLinearComparisonComplexity, (std::vector<TypeParam> vec))
|
||||||
|
{
|
||||||
|
peeksort(vec.begin(), vec.end());
|
||||||
|
RC_ASSERT(std::is_sorted(vec.begin(), vec.end()));
|
||||||
|
std::size_t comparisonCount = 0;
|
||||||
|
auto countingComp = [&](auto lhs, auto rhs) {
|
||||||
|
++comparisonCount;
|
||||||
|
return lhs < rhs;
|
||||||
|
};
|
||||||
|
|
||||||
|
peeksort(vec.begin(), vec.end(), countingComp);
|
||||||
|
|
||||||
|
/* In the sorted case comparison complexify should be linear. */
|
||||||
|
RC_ASSERT(comparisonCount <= vec.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace nix
|
|
@ -59,6 +59,7 @@ headers = files(
|
||||||
'signals.hh',
|
'signals.hh',
|
||||||
'signature/local-keys.hh',
|
'signature/local-keys.hh',
|
||||||
'signature/signer.hh',
|
'signature/signer.hh',
|
||||||
|
'sort.hh',
|
||||||
'source-accessor.hh',
|
'source-accessor.hh',
|
||||||
'source-path.hh',
|
'source-path.hh',
|
||||||
'split.hh',
|
'split.hh',
|
||||||
|
|
299
src/libutil/include/nix/util/sort.hh
Normal file
299
src/libutil/include/nix/util/sort.hh
Normal file
|
@ -0,0 +1,299 @@
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <iterator>
|
||||||
|
#include <concepts>
|
||||||
|
#include <vector>
|
||||||
|
#include <type_traits>
|
||||||
|
#include <functional>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @file
|
||||||
|
*
|
||||||
|
* In-house implementation of sorting algorithms. Used for cases when several properties
|
||||||
|
* need to be upheld regardless of the stdlib implementation of std::sort or
|
||||||
|
* std::stable_sort.
|
||||||
|
*
|
||||||
|
* PeekSort implementation is adapted from reference implementation
|
||||||
|
* https://github.com/sebawild/powersort licensed under the MIT License.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* PeekSort attribution:
|
||||||
|
*
|
||||||
|
* MIT License
|
||||||
|
*
|
||||||
|
* Copyright (c) 2022 Sebastian Wild
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
* of this software and associated documentation files (the "Software"), to deal
|
||||||
|
* in the Software without restriction, including without limitation the rights
|
||||||
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the Software is
|
||||||
|
* furnished to do so, subject to the following conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be included in all
|
||||||
|
* copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
* SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
namespace nix {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Merge sorted runs [begin, middle) with [middle, end) in-place [begin, end).
|
||||||
|
* Uses a temporary working buffer by first copying [begin, end) to it.
|
||||||
|
*
|
||||||
|
* @param begin Start of the first subrange to be sorted.
|
||||||
|
* @param middle End of the first sorted subrange and the start of the second.
|
||||||
|
* @param end End of the second sorted subrange.
|
||||||
|
* @param workingBegin Start of the working buffer.
|
||||||
|
* @param comp Comparator implementing an operator()(const ValueType& lhs, const ValueType& rhs).
|
||||||
|
*
|
||||||
|
* @pre workingBegin buffer must have at least std::distance(begin, end) elements.
|
||||||
|
*
|
||||||
|
* @note We can't use std::inplace_merge or std::merge, because their behavior
|
||||||
|
* is undefined if the comparator is not strict weak ordering.
|
||||||
|
*/
|
||||||
|
template<
|
||||||
|
std::forward_iterator Iter,
|
||||||
|
std::random_access_iterator BufIter,
|
||||||
|
typename Comparator = std::less<std::iter_value_t<Iter>>>
|
||||||
|
void mergeSortedRunsInPlace(Iter begin, Iter middle, Iter end, BufIter workingBegin, Comparator comp = {})
|
||||||
|
{
|
||||||
|
const BufIter workingMiddle = std::move(begin, middle, workingBegin);
|
||||||
|
const BufIter workingEnd = std::move(middle, end, workingMiddle);
|
||||||
|
|
||||||
|
Iter output = begin;
|
||||||
|
BufIter workingLeft = workingBegin;
|
||||||
|
BufIter workingRight = workingMiddle;
|
||||||
|
|
||||||
|
while (workingLeft != workingMiddle && workingRight != workingEnd) {
|
||||||
|
/* Note the inversion here !comp(...., ....). This is required for the merge to be stable.
|
||||||
|
If a == b where a if from the left part and b is the the right, then we have to pick
|
||||||
|
a. */
|
||||||
|
*output++ = !comp(*workingRight, *workingLeft) ? std::move(*workingLeft++) : std::move(*workingRight++);
|
||||||
|
}
|
||||||
|
|
||||||
|
std::move(workingLeft, workingMiddle, output);
|
||||||
|
std::move(workingRight, workingEnd, output);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple insertion sort.
|
||||||
|
*
|
||||||
|
* Does not require that the std::iter_value_t<Iter> is copyable.
|
||||||
|
*
|
||||||
|
* @param begin Start of the range to sort.
|
||||||
|
* @param end End of the range to sort.
|
||||||
|
* @comp Comparator the defines the ordering. Order of elements if the comp is not strict weak ordering
|
||||||
|
* is not specified.
|
||||||
|
* @throws Nothing.
|
||||||
|
*
|
||||||
|
* Note on exception safety: this function provides weak exception safety
|
||||||
|
* guarantees. To elaborate: if the comparator throws or move assignment
|
||||||
|
* throws (value type is not nothrow_move_assignable) then the range is left in
|
||||||
|
* a consistent, but unspecified state.
|
||||||
|
*
|
||||||
|
* @note This can't be implemented in terms of binary search if the strict weak ordering
|
||||||
|
* needs to be handled in a well-defined but unspecified manner.
|
||||||
|
*/
|
||||||
|
template<std::bidirectional_iterator Iter, typename Comparator = std::less<std::iter_value_t<Iter>>>
|
||||||
|
void insertionsort(Iter begin, Iter end, Comparator comp = {})
|
||||||
|
{
|
||||||
|
if (begin == end)
|
||||||
|
return;
|
||||||
|
for (Iter current = std::next(begin); current != end; ++current) {
|
||||||
|
for (Iter insertionPoint = current;
|
||||||
|
insertionPoint != begin && comp(*insertionPoint, *std::prev(insertionPoint));
|
||||||
|
--insertionPoint) {
|
||||||
|
std::swap(*insertionPoint, *std::prev(insertionPoint));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find maximal i <= end such that [begin, i) is strictly decreasing according
|
||||||
|
* to the specified comparator.
|
||||||
|
*/
|
||||||
|
template<std::forward_iterator Iter, typename Comparator = std::less<std::iter_value_t<Iter>>>
|
||||||
|
Iter strictlyDecreasingPrefix(Iter begin, Iter end, Comparator && comp = {})
|
||||||
|
{
|
||||||
|
if (begin == end)
|
||||||
|
return begin;
|
||||||
|
while (std::next(begin) != end && /* *std::next(begin) < begin */
|
||||||
|
comp(*std::next(begin), *begin))
|
||||||
|
++begin;
|
||||||
|
return std::next(begin);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find minimal i >= start such that [i, end) is strictly decreasing according
|
||||||
|
* to the specified comparator.
|
||||||
|
*/
|
||||||
|
template<std::bidirectional_iterator Iter, typename Comparator = std::less<std::iter_value_t<Iter>>>
|
||||||
|
Iter strictlyDecreasingSuffix(Iter begin, Iter end, Comparator && comp = {})
|
||||||
|
{
|
||||||
|
if (begin == end)
|
||||||
|
return end;
|
||||||
|
while (std::prev(end) > begin && /* *std::prev(end) < *std::prev(end, 2) */
|
||||||
|
comp(*std::prev(end), *std::prev(end, 2)))
|
||||||
|
--end;
|
||||||
|
return std::prev(end);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find maximal i <= end such that [begin, i) is weakly increasing according
|
||||||
|
* to the specified comparator.
|
||||||
|
*/
|
||||||
|
template<std::bidirectional_iterator Iter, typename Comparator = std::less<std::iter_value_t<Iter>>>
|
||||||
|
Iter weaklyIncreasingPrefix(Iter begin, Iter end, Comparator && comp = {})
|
||||||
|
{
|
||||||
|
return strictlyDecreasingPrefix(begin, end, std::not_fn(std::forward<Comparator>(comp)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Find minimal i >= start such that [i, end) is weakly increasing according
|
||||||
|
* to the specified comparator.
|
||||||
|
*/
|
||||||
|
template<std::bidirectional_iterator Iter, typename Comparator = std::less<std::iter_value_t<Iter>>>
|
||||||
|
Iter weaklyIncreasingSuffix(Iter begin, Iter end, Comparator && comp = {})
|
||||||
|
{
|
||||||
|
return strictlyDecreasingSuffix(begin, end, std::not_fn(std::forward<Comparator>(comp)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Peeksort stable sorting algorithm. Sorts elements in-place.
|
||||||
|
* Allocates additional memory as needed.
|
||||||
|
*
|
||||||
|
* @details
|
||||||
|
* PeekSort is a stable, near-optimal natural mergesort. Most importantly, like any
|
||||||
|
* other mergesort it upholds the "Ord safety" property. Meaning that even for
|
||||||
|
* comparator predicates that don't satisfy strict weak ordering it can't result
|
||||||
|
* in infinite loops/out of bounds memory accesses or other undefined behavior.
|
||||||
|
*
|
||||||
|
* As a quick reminder, strict weak ordering relation operator< must satisfy
|
||||||
|
* the following properties. Keep in mind that in C++ an equvalence relation
|
||||||
|
* is specified in terms of operator< like so: a ~ b iff !(a < b) && !(b < a).
|
||||||
|
*
|
||||||
|
* 1. a < a === false - relation is irreflexive
|
||||||
|
* 2. a < b, b < c => a < c - transitivity
|
||||||
|
* 3. a ~ b, a ~ b, b ~ c => a ~ c, transitivity of equivalence
|
||||||
|
*
|
||||||
|
* @see https://www.wild-inter.net/publications/munro-wild-2018
|
||||||
|
* @see https://github.com/Voultapher/sort-research-rs/blob/main/writeup/sort_safety/text.md#property-analysis
|
||||||
|
*
|
||||||
|
* The order of elements when comp is not strict weak ordering is not specified, but
|
||||||
|
* is not undefined. The output is always some permutation of the input, regardless
|
||||||
|
* of the comparator provided.
|
||||||
|
* Relying on ordering in such cases is erroneous, but this implementation
|
||||||
|
* will happily accept broken comparators and will not crash.
|
||||||
|
*
|
||||||
|
* @param begin Start of the range to be sorted.
|
||||||
|
* @param end End of the range to be sorted.
|
||||||
|
* @comp comp Comparator implementing an operator()(const ValueType& lhs, const ValueType& rhs).
|
||||||
|
*
|
||||||
|
* @throws std::bad_alloc if the temporary buffer can't be allocated.
|
||||||
|
*
|
||||||
|
* @return Nothing.
|
||||||
|
*
|
||||||
|
* Note on exception safety: this function provides weak exception safety
|
||||||
|
* guarantees. To elaborate: if the comparator throws or move assignment
|
||||||
|
* throws (value type is not nothrow_move_assignable) then the range is left in
|
||||||
|
* a consistent, but unspecified state.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
template<std::random_access_iterator Iter, typename Comparator = std::less<std::iter_value_t<Iter>>>
|
||||||
|
/* ValueType must be default constructible to create the temporary buffer */
|
||||||
|
requires std::is_default_constructible_v<std::iter_value_t<Iter>>
|
||||||
|
void peeksort(Iter begin, Iter end, Comparator comp = {})
|
||||||
|
{
|
||||||
|
auto length = std::distance(begin, end);
|
||||||
|
|
||||||
|
/* Special-case very simple inputs. This is identical to how libc++ does it. */
|
||||||
|
switch (length) {
|
||||||
|
case 0:
|
||||||
|
[[fallthrough]];
|
||||||
|
case 1:
|
||||||
|
return;
|
||||||
|
case 2:
|
||||||
|
if (comp(*--end, *begin)) /* [a, b], b < a */
|
||||||
|
std::swap(*begin, *end);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
using ValueType = std::iter_value_t<Iter>;
|
||||||
|
auto workingBuffer = std::vector<ValueType>(length);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* sorts [begin, end), assuming that [begin, leftRunEnd) and
|
||||||
|
* [rightRunBegin, end) are sorted.
|
||||||
|
* Modified implementation from:
|
||||||
|
* https://github.com/sebawild/powersort/blob/1d078b6be9023e134c4f8f6de88e2406dc681e89/src/sorts/peeksort.h
|
||||||
|
*/
|
||||||
|
auto peeksortImpl = [&workingBuffer,
|
||||||
|
&comp](auto & peeksortImpl, Iter begin, Iter end, Iter leftRunEnd, Iter rightRunBegin) {
|
||||||
|
if (leftRunEnd == end || rightRunBegin == begin)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* Dispatch to simpler insertion sort implementation for smaller cases
|
||||||
|
Cut-off limit is the same as in libstdc++
|
||||||
|
https://github.com/gcc-mirror/gcc/blob/d9375e490072d1aae73a93949aa158fcd2a27018/libstdc%2B%2B-v3/include/bits/stl_algo.h#L4977
|
||||||
|
*/
|
||||||
|
static constexpr std::size_t insertionsortThreshold = 16;
|
||||||
|
size_t length = std::distance(begin, end);
|
||||||
|
if (length <= insertionsortThreshold)
|
||||||
|
return insertionsort(begin, end, comp);
|
||||||
|
|
||||||
|
Iter middle = std::next(begin, (length / 2)); /* Middle split between m and m - 1 */
|
||||||
|
|
||||||
|
if (middle <= leftRunEnd) {
|
||||||
|
/* |XXXXXXXX|XX X| */
|
||||||
|
peeksortImpl(peeksortImpl, leftRunEnd, end, std::next(leftRunEnd), rightRunBegin);
|
||||||
|
mergeSortedRunsInPlace(begin, leftRunEnd, end, workingBuffer.begin(), comp);
|
||||||
|
return;
|
||||||
|
} else if (middle >= rightRunBegin) {
|
||||||
|
/* |XX X|XXXXXXXX| */
|
||||||
|
peeksortImpl(peeksortImpl, begin, rightRunBegin, leftRunEnd, std::prev(rightRunBegin));
|
||||||
|
mergeSortedRunsInPlace(begin, rightRunBegin, end, workingBuffer.begin(), comp);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Find middle run, i.e., run containing m - 1 */
|
||||||
|
Iter i, j;
|
||||||
|
|
||||||
|
if (!comp(*middle, *std::prev(middle)) /* *std::prev(middle) <= *middle */) {
|
||||||
|
i = weaklyIncreasingSuffix(leftRunEnd, middle, comp);
|
||||||
|
j = weaklyIncreasingPrefix(std::prev(middle), rightRunBegin, comp);
|
||||||
|
} else {
|
||||||
|
i = strictlyDecreasingSuffix(leftRunEnd, middle, comp);
|
||||||
|
j = strictlyDecreasingPrefix(std::prev(middle), rightRunBegin, comp);
|
||||||
|
std::reverse(i, j);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (i == begin && j == end)
|
||||||
|
return; /* single run */
|
||||||
|
|
||||||
|
if (middle - i < j - middle) {
|
||||||
|
/* |XX x|xxxx X| */
|
||||||
|
peeksortImpl(peeksortImpl, begin, i, leftRunEnd, std::prev(i));
|
||||||
|
peeksortImpl(peeksortImpl, i, end, j, rightRunBegin);
|
||||||
|
mergeSortedRunsInPlace(begin, i, end, workingBuffer.begin(), comp);
|
||||||
|
} else {
|
||||||
|
/* |XX xxx|x X| */
|
||||||
|
peeksortImpl(peeksortImpl, begin, j, leftRunEnd, i);
|
||||||
|
peeksortImpl(peeksortImpl, j, end, std::next(j), rightRunBegin);
|
||||||
|
mergeSortedRunsInPlace(begin, j, end, workingBuffer.begin(), comp);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
peeksortImpl(peeksortImpl, begin, end, /*leftRunEnd=*/begin, /*rightRunBegin=*/end);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1 +1 @@
|
||||||
[ [ 42 77 147 249 483 526 ] [ 526 483 249 147 77 42 ] [ "bar" "fnord" "foo" "xyzzy" ] [ { key = 1; value = "foo"; } { key = 1; value = "fnord"; } { key = 2; value = "bar"; } ] [ [ ] [ ] [ 1 ] [ 1 4 ] [ 1 5 ] [ 1 6 ] [ 2 ] [ 2 3 ] [ 3 ] [ 3 ] ] ]
|
[ [ 42 77 147 249 483 526 ] [ 526 483 249 147 77 42 ] [ "bar" "fnord" "foo" "xyzzy" ] [ { key = 1; value = "foo"; } { key = 1; value = "fnord"; } { key = 2; value = "bar"; } ] [ { key = 1; value = "foo"; } { key = 1; value = "foo2"; } { key = 1; value = "foo3"; } { key = 1; value = "foo4"; } { key = 1; value = "foo5"; } { key = 1; value = "foo6"; } { key = 1; value = "foo7"; } { key = 1; value = "foo8"; } { key = 2; value = "bar"; } { key = 2; value = "bar2"; } { key = 2; value = "bar3"; } { key = 2; value = "bar4"; } { key = 2; value = "bar5"; } { key = 3; value = "baz"; } { key = 3; value = "baz2"; } { key = 3; value = "baz3"; } { key = 3; value = "baz4"; } { key = 4; value = "biz1"; } ] [ [ ] [ ] [ 1 ] [ 1 4 ] [ 1 5 ] [ 1 6 ] [ 2 ] [ 2 3 ] [ 3 ] [ 3 ] ] ]
|
||||||
|
|
|
@ -37,6 +37,80 @@ with builtins;
|
||||||
value = "fnord";
|
value = "fnord";
|
||||||
}
|
}
|
||||||
])
|
])
|
||||||
|
(sort (x: y: x.key < y.key) [
|
||||||
|
{
|
||||||
|
key = 1;
|
||||||
|
value = "foo";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 2;
|
||||||
|
value = "bar";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 1;
|
||||||
|
value = "foo2";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 2;
|
||||||
|
value = "bar2";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 2;
|
||||||
|
value = "bar3";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 2;
|
||||||
|
value = "bar4";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 1;
|
||||||
|
value = "foo3";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 3;
|
||||||
|
value = "baz";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 3;
|
||||||
|
value = "baz2";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 1;
|
||||||
|
value = "foo4";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 3;
|
||||||
|
value = "baz3";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 1;
|
||||||
|
value = "foo5";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 1;
|
||||||
|
value = "foo6";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 2;
|
||||||
|
value = "bar5";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 3;
|
||||||
|
value = "baz4";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 1;
|
||||||
|
value = "foo7";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 4;
|
||||||
|
value = "biz1";
|
||||||
|
}
|
||||||
|
{
|
||||||
|
key = 1;
|
||||||
|
value = "foo8";
|
||||||
|
}
|
||||||
|
])
|
||||||
(sort lessThan [
|
(sort lessThan [
|
||||||
[
|
[
|
||||||
1
|
1
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue