* [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity
@ 2021-05-05 1:42 Patrick Palka
2021-05-05 9:39 ` Jonathan Wakely
0 siblings, 1 reply; 7+ messages in thread
From: Patrick Palka @ 2021-05-05 1:42 UTC (permalink / raw)
To: gcc-patches; +Cc: libstdc++, Patrick Palka
This rewrites ranges::minmax and ranges::minmax_element so that it
performs at most 3*N/2 many comparisons, as required by the standard.
In passing, this also fixes PR100387 by avoiding a premature std::move
in ranges::minmax and in std::shift_right.
Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
10/11?
libstdc++-v3/ChangeLog:
PR libstdc++/100387
* include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
to limit comparison complexity to 3*N/2. Avoid premature std::move.
(__minmax_element_fn::operator()): Likewise.
(shift_right): Avoid premature std::move of __result.
* testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
New tests.
* testsuite/25_algorithms/minmax_element/constrained.cc (test02):
Likewise.
---
libstdc++-v3/include/bits/ranges_algo.h | 87 ++++++++++++++-----
.../25_algorithms/minmax/constrained.cc | 31 +++++++
.../minmax_element/constrained.cc | 19 ++++
3 files changed, 113 insertions(+), 24 deletions(-)
diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h
index cda3042c11f..bbd29127e89 100644
--- a/libstdc++-v3/include/bits/ranges_algo.h
+++ b/libstdc++-v3/include/bits/ranges_algo.h
@@ -3291,18 +3291,39 @@ namespace ranges
auto __first = ranges::begin(__r);
auto __last = ranges::end(__r);
__glibcxx_assert(__first != __last);
+ auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
while (++__first != __last)
{
- auto __tmp = *__first;
- if (std::__invoke(__comp,
- std::__invoke(__proj, __tmp),
- std::__invoke(__proj, __result.min)))
- __result.min = std::move(__tmp);
- if (!(bool)std::__invoke(__comp,
- std::__invoke(__proj, __tmp),
- std::__invoke(__proj, __result.max)))
- __result.max = std::move(__tmp);
+ // Process two elements at a time so that we perform at most
+ // 3*N/2 many comparisons in total (each of the N/2 iterations
+ // of this loop performs three comparisions).
+ auto __val1 = *__first;
+ if (++__first == __last)
+ {
+ // N is odd; in this final iteration, we perform a just one
+ // comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2 comparisons.
+ if (__comp_proj(__val1, __result.min))
+ __result.min = std::move(__val1);
+ else if (!__comp_proj(__val1, __result.max))
+ __result.max = std::move(__val1);
+ break;
+ }
+ auto __val2 = *__first;
+ if (!__comp_proj(__val2, __val1))
+ {
+ if (__comp_proj(__val1, __result.min))
+ __result.min = std::move(__val1);
+ if (!__comp_proj(__val2, __result.max))
+ __result.max = std::move(__val2);
+ }
+ else
+ {
+ if (__comp_proj(__val2, __result.min))
+ __result.min = std::move(__val2);
+ if (!__comp_proj(__val1, __result.max))
+ __result.max = std::move(__val1);
+ }
}
return __result;
}
@@ -3408,21 +3429,40 @@ namespace ranges
operator()(_Iter __first, _Sent __last,
_Comp __comp = {}, _Proj __proj = {}) const
{
- if (__first == __last)
- return {__first, __first};
-
+ auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
minmax_element_result<_Iter> __result = {__first, __first};
- auto __i = __first;
- while (++__i != __last)
+ if (__first == __last)
+ return __result;
+ while (++__first != __last)
{
- if (std::__invoke(__comp,
- std::__invoke(__proj, *__i),
- std::__invoke(__proj, *__result.min)))
- __result.min = __i;
- if (!(bool)std::__invoke(__comp,
- std::__invoke(__proj, *__i),
- std::__invoke(__proj, *__result.max)))
- __result.max = __i;
+ // Process two elements at a time so that we perform at most
+ // 3*N/2 many comparisons in total (each of the N/2 iterations
+ // of this loop performs three comparisions).
+ auto __prev = __first;
+ if (++__first == __last)
+ {
+ // N is odd; in this final iteration, we perform a just one
+ // comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2 comparisons.
+ if (__comp_proj(*__prev, *__result.min))
+ __result.min = __prev;
+ else if (!__comp_proj(*__prev, *__result.max))
+ __result.max = __prev;
+ break;
+ }
+ if (!__comp_proj(*__first, *__prev))
+ {
+ if (__comp_proj(*__prev, *__result.min))
+ __result.min = __prev;
+ if (!__comp_proj(*__first, *__result.max))
+ __result.max = __first;
+ }
+ else
+ {
+ if (__comp_proj(*__first, *__result.min))
+ __result.min = __first;
+ if (!__comp_proj(*__prev, *__result.max))
+ __result.max = __prev;
+ }
}
return __result;
}
@@ -3749,8 +3789,7 @@ namespace ranges
// i.e. we are shifting out at least half of the range. In
// this case we can safely perform the shift with a single
// move.
- std::move(std::move(__first), std::move(__dest_head),
- std::move(__result));
+ std::move(std::move(__first), std::move(__dest_head), __result);
return __result;
}
++__dest_head;
diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
index 786922414b5..2373c15a9d2 100644
--- a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
@@ -19,6 +19,8 @@
// { dg-do run { target c++2a } }
#include <algorithm>
+#include <string>
+#include <vector>
#include <testsuite_hooks.h>
#include <testsuite_iterators.h>
@@ -89,10 +91,39 @@ test03()
== res_t(1,4) );
}
+void
+test04()
+{
+ // Verify we perform most 3*N/2 applications of the comparison predicate.
+ static int counter;
+ struct counted_less
+ { bool operator()(int a, int b) { ++counter; return a < b; } };
+
+ ranges::minmax({1,2,3,4,5,6,7,8,9,10}, counted_less{});
+ VERIFY( counter <= 15 );
+
+ counter = 0;
+ ranges::minmax({10,9,8,7,6,5,4,3,2,1}, counted_less{});
+ VERIFY( counter <= 15 );
+}
+
+void
+test05()
+{
+ // PR libstdc++/100387
+ std::vector<std::string> v{"b", "a"};
+ auto [min, max] = ranges::minmax(v, [](const auto& a, const auto& b) {
+ return a.size() == b.size() ? a.front() < b.front() : a.size() > b.size();
+ });
+ VERIFY( min == "a" && max == "b" );
+}
+
int
main()
{
test01();
test02();
test03();
+ test04();
+ test05();
}
diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
index 3b11c0dd96c..89044181530 100644
--- a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
@@ -61,8 +61,27 @@ test01()
static_assert(ranges::minmax_element(y, y+3, {}, &X::i).max->j == 3);
}
+void
+test02()
+{
+ // Verify we perform most 3*N/2 applications of the comparison predicate.
+ static int counter;
+ struct counted_less
+ { bool operator()(int a, int b) { ++counter; return a < b; } };
+
+ int x[] = {1,2,3,4,5,6,7,8,9,10};
+ ranges::minmax(x, counted_less{});
+ VERIFY( counter <= 15 );
+
+ ranges::reverse(x);
+ counter = 0;
+ ranges::minmax(x, counted_less{});
+ VERIFY( counter <= 15 );
+}
+
int
main()
{
test01();
+ test02();
}
--
2.31.1.442.g7e39198978
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity
2021-05-05 1:42 [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity Patrick Palka
@ 2021-05-05 9:39 ` Jonathan Wakely
2021-05-05 9:46 ` Jonathan Wakely
2021-05-05 15:33 ` Patrick Palka
0 siblings, 2 replies; 7+ messages in thread
From: Jonathan Wakely @ 2021-05-05 9:39 UTC (permalink / raw)
To: Patrick Palka; +Cc: gcc-patches, libstdc++
On 04/05/21 21:42 -0400, Patrick Palka via Libstdc++ wrote:
>This rewrites ranges::minmax and ranges::minmax_element so that it
>performs at most 3*N/2 many comparisons, as required by the standard.
>In passing, this also fixes PR100387 by avoiding a premature std::move
>in ranges::minmax and in std::shift_right.
>
>Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
>10/11?
>
>libstdc++-v3/ChangeLog:
>
> PR libstdc++/100387
> * include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
> to limit comparison complexity to 3*N/2. Avoid premature std::move.
> (__minmax_element_fn::operator()): Likewise.
> (shift_right): Avoid premature std::move of __result.
> * testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
> New tests.
> * testsuite/25_algorithms/minmax_element/constrained.cc (test02):
> Likewise.
>---
> libstdc++-v3/include/bits/ranges_algo.h | 87 ++++++++++++++-----
> .../25_algorithms/minmax/constrained.cc | 31 +++++++
> .../minmax_element/constrained.cc | 19 ++++
> 3 files changed, 113 insertions(+), 24 deletions(-)
>
>diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h
>index cda3042c11f..bbd29127e89 100644
>--- a/libstdc++-v3/include/bits/ranges_algo.h
>+++ b/libstdc++-v3/include/bits/ranges_algo.h
>@@ -3291,18 +3291,39 @@ namespace ranges
> auto __first = ranges::begin(__r);
> auto __last = ranges::end(__r);
> __glibcxx_assert(__first != __last);
>+ auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
> while (++__first != __last)
> {
>- auto __tmp = *__first;
>- if (std::__invoke(__comp,
>- std::__invoke(__proj, __tmp),
>- std::__invoke(__proj, __result.min)))
>- __result.min = std::move(__tmp);
>- if (!(bool)std::__invoke(__comp,
>- std::__invoke(__proj, __tmp),
>- std::__invoke(__proj, __result.max)))
>- __result.max = std::move(__tmp);
>+ // Process two elements at a time so that we perform at most
>+ // 3*N/2 many comparisons in total (each of the N/2 iterations
Is "many" a typo here?
>+ // of this loop performs three comparisions).
>+ auto __val1 = *__first;
Can we avoid making this copy if the range satisfies forward_range, by
keeping copies of the min/max iterators, or just forwarding to
ranges::minmax_element?
>+ if (++__first == __last)
>+ {
>+ // N is odd; in this final iteration, we perform a just one
s/perform a just one/perform just one/
>+ // comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2 comparisons.
I find this a bit hard to parse with the inequality there.
>+ if (__comp_proj(__val1, __result.min))
>+ __result.min = std::move(__val1);
>+ else if (!__comp_proj(__val1, __result.max))
>+ __result.max = std::move(__val1);
This can be two comparisons, can't it? Would this be better...
// N is odd; in this final iteration, we perform at most two
// comparisons, for a total of 3*(N-1)/2 + 2 comparisons,
// which is not more than 3*N/2, as required.
?
>+ break;
>+ }
>+ auto __val2 = *__first;
>+ if (!__comp_proj(__val2, __val1))
>+ {
>+ if (__comp_proj(__val1, __result.min))
>+ __result.min = std::move(__val1);
>+ if (!__comp_proj(__val2, __result.max))
>+ __result.max = std::move(__val2);
>+ }
>+ else
>+ {
>+ if (__comp_proj(__val2, __result.min))
>+ __result.min = std::move(__val2);
>+ if (!__comp_proj(__val1, __result.max))
>+ __result.max = std::move(__val1);
>+ }
I thought we might be able to simplify this to something like:
auto __val2 = *__first;
auto&& [__min, __max] = (*this)(__val1, __val2, __comp, __proj);
if (__comp_proj(__min, __result.min))
__result.min = __min;
if (__comp_proj(__result.max, __max))
__result.max = __max;
But it doesn't work because we need to move from __min and __max, but
the (*this)(...) returns minmax_result<const T&> and can't be moved
from.
We could get around that but it's not much of a simplification:
range_value_t<Range> __val2 = *__first;
auto [__min, __max] = (*this)(std::addressof(__val1),
std::addressof(__val2),
__comp,
[](auto __p) -> const auto& {
return *__p;
});
if (__comp_proj(*__min, __result.min))
__result.min = std::move(*__min);
if (__comp_proj(__result.max, *__max))
__result.max = std::move(*__max);
> }
> return __result;
> }
>@@ -3408,21 +3429,40 @@ namespace ranges
> operator()(_Iter __first, _Sent __last,
> _Comp __comp = {}, _Proj __proj = {}) const
> {
>- if (__first == __last)
>- return {__first, __first};
>-
>+ auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> minmax_element_result<_Iter> __result = {__first, __first};
>- auto __i = __first;
>- while (++__i != __last)
>+ if (__first == __last)
>+ return __result;
>+ while (++__first != __last)
> {
>- if (std::__invoke(__comp,
>- std::__invoke(__proj, *__i),
>- std::__invoke(__proj, *__result.min)))
>- __result.min = __i;
>- if (!(bool)std::__invoke(__comp,
>- std::__invoke(__proj, *__i),
>- std::__invoke(__proj, *__result.max)))
>- __result.max = __i;
>+ // Process two elements at a time so that we perform at most
>+ // 3*N/2 many comparisons in total (each of the N/2 iterations
>+ // of this loop performs three comparisions).
>+ auto __prev = __first;
>+ if (++__first == __last)
>+ {
>+ // N is odd; in this final iteration, we perform a just one
>+ // comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2 comparisons.
Same comments on the comments as above.
>+ if (__comp_proj(*__prev, *__result.min))
>+ __result.min = __prev;
>+ else if (!__comp_proj(*__prev, *__result.max))
>+ __result.max = __prev;
>+ break;
>+ }
>+ if (!__comp_proj(*__first, *__prev))
>+ {
>+ if (__comp_proj(*__prev, *__result.min))
>+ __result.min = __prev;
>+ if (!__comp_proj(*__first, *__result.max))
>+ __result.max = __first;
>+ }
>+ else
>+ {
>+ if (__comp_proj(*__first, *__result.min))
>+ __result.min = __first;
>+ if (!__comp_proj(*__prev, *__result.max))
>+ __result.max = __prev;
>+ }
We don't need to move anything here, so this could be written using
ranges::minmax. I'm not sure it is an improvement though (except for
being slightly fewer lines of code):
auto __mm = minmax(__prev, __first, __comp,
[](auto&& __it) -> auto&& { return *__it; });
if (__comp_proj(*__mm.min, *__result.min))
__result.min = __mm.min;
if (__comp_proj(*__result.max, *__mm.max))
__result.max = __mm.max;
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity
2021-05-05 9:39 ` Jonathan Wakely
@ 2021-05-05 9:46 ` Jonathan Wakely
2021-05-05 15:33 ` Patrick Palka
1 sibling, 0 replies; 7+ messages in thread
From: Jonathan Wakely @ 2021-05-05 9:46 UTC (permalink / raw)
To: Patrick Palka; +Cc: gcc-patches, libstdc++
On 05/05/21 10:39 +0100, Jonathan Wakely wrote:
>On 04/05/21 21:42 -0400, Patrick Palka via Libstdc++ wrote:
>>This rewrites ranges::minmax and ranges::minmax_element so that it
>>performs at most 3*N/2 many comparisons, as required by the standard.
>>In passing, this also fixes PR100387 by avoiding a premature std::move
>>in ranges::minmax and in std::shift_right.
>>
>>Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
>>10/11?
>>
>>libstdc++-v3/ChangeLog:
>>
>> PR libstdc++/100387
>> * include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
>> to limit comparison complexity to 3*N/2. Avoid premature std::move.
>> (__minmax_element_fn::operator()): Likewise.
>> (shift_right): Avoid premature std::move of __result.
>> * testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
>> New tests.
>> * testsuite/25_algorithms/minmax_element/constrained.cc (test02):
>> Likewise.
>>---
>>libstdc++-v3/include/bits/ranges_algo.h | 87 ++++++++++++++-----
>>.../25_algorithms/minmax/constrained.cc | 31 +++++++
>>.../minmax_element/constrained.cc | 19 ++++
>>3 files changed, 113 insertions(+), 24 deletions(-)
>>
>>diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h
>>index cda3042c11f..bbd29127e89 100644
>>--- a/libstdc++-v3/include/bits/ranges_algo.h
>>+++ b/libstdc++-v3/include/bits/ranges_algo.h
>>@@ -3291,18 +3291,39 @@ namespace ranges
>> auto __first = ranges::begin(__r);
>> auto __last = ranges::end(__r);
>> __glibcxx_assert(__first != __last);
>>+ auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
>> minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
>> while (++__first != __last)
>> {
>>- auto __tmp = *__first;
>>- if (std::__invoke(__comp,
>>- std::__invoke(__proj, __tmp),
>>- std::__invoke(__proj, __result.min)))
>>- __result.min = std::move(__tmp);
>>- if (!(bool)std::__invoke(__comp,
>>- std::__invoke(__proj, __tmp),
>>- std::__invoke(__proj, __result.max)))
>>- __result.max = std::move(__tmp);
>>+ // Process two elements at a time so that we perform at most
>>+ // 3*N/2 many comparisons in total (each of the N/2 iterations
>
>Is "many" a typo here?
>
>>+ // of this loop performs three comparisions).
>>+ auto __val1 = *__first;
>
>Can we avoid making this copy if the range satisfies forward_range, by
>keeping copies of the min/max iterators, or just forwarding to
>ranges::minmax_element?
Hmm, on the other hand, for a forward range of ints we're probably
better off jsut making the copy here and not going through the
indirections done by minmax_element.
Maybe something like:
if constexpr (forward_range<_Range> && is_scalar_v<range_value_t<_Range>>)
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity
2021-05-05 9:39 ` Jonathan Wakely
2021-05-05 9:46 ` Jonathan Wakely
@ 2021-05-05 15:33 ` Patrick Palka
2021-05-05 15:48 ` Patrick Palka
1 sibling, 1 reply; 7+ messages in thread
From: Patrick Palka @ 2021-05-05 15:33 UTC (permalink / raw)
To: Jonathan Wakely; +Cc: Patrick Palka, gcc-patches, libstdc++
On Wed, 5 May 2021, Jonathan Wakely wrote:
> On 04/05/21 21:42 -0400, Patrick Palka via Libstdc++ wrote:
> > This rewrites ranges::minmax and ranges::minmax_element so that it
> > performs at most 3*N/2 many comparisons, as required by the standard.
> > In passing, this also fixes PR100387 by avoiding a premature std::move
> > in ranges::minmax and in std::shift_right.
> >
> > Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
> > 10/11?
> >
> > libstdc++-v3/ChangeLog:
> >
> > PR libstdc++/100387
> > * include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
> > to limit comparison complexity to 3*N/2. Avoid premature std::move.
> > (__minmax_element_fn::operator()): Likewise.
> > (shift_right): Avoid premature std::move of __result.
> > * testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
> > New tests.
> > * testsuite/25_algorithms/minmax_element/constrained.cc (test02):
> > Likewise.
> > ---
> > libstdc++-v3/include/bits/ranges_algo.h | 87 ++++++++++++++-----
> > .../25_algorithms/minmax/constrained.cc | 31 +++++++
> > .../minmax_element/constrained.cc | 19 ++++
> > 3 files changed, 113 insertions(+), 24 deletions(-)
> >
> > diff --git a/libstdc++-v3/include/bits/ranges_algo.h
> > b/libstdc++-v3/include/bits/ranges_algo.h
> > index cda3042c11f..bbd29127e89 100644
> > --- a/libstdc++-v3/include/bits/ranges_algo.h
> > +++ b/libstdc++-v3/include/bits/ranges_algo.h
> > @@ -3291,18 +3291,39 @@ namespace ranges
> > auto __first = ranges::begin(__r);
> > auto __last = ranges::end(__r);
> > __glibcxx_assert(__first != __last);
> > + auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> > minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
> > while (++__first != __last)
> > {
> > - auto __tmp = *__first;
> > - if (std::__invoke(__comp,
> > - std::__invoke(__proj, __tmp),
> > - std::__invoke(__proj, __result.min)))
> > - __result.min = std::move(__tmp);
> > - if (!(bool)std::__invoke(__comp,
> > - std::__invoke(__proj, __tmp),
> > - std::__invoke(__proj, __result.max)))
> > - __result.max = std::move(__tmp);
> > + // Process two elements at a time so that we perform at most
> > + // 3*N/2 many comparisons in total (each of the N/2 iterations
>
> Is "many" a typo here?
Just a bad habit of mine to usually write "<count> many" instead of just
"<count>" :) Consider the "many" removed.
>
> > + // of this loop performs three comparisions).
> > + auto __val1 = *__first;
>
> Can we avoid making this copy if the range satisfies forward_range, by
> keeping copies of the min/max iterators, or just forwarding to
> ranges::minmax_element?
Maybe we can make __val1 and __val2 universal references? Ah, but then
__val1 would potentially be invalidated after incrementing __first. I
think it should be safe to make __val2 a universal reference though.
I've done this in v2 below.
Forwarding to ranges::minmax_element seems like it would be profitable
in some situations, e.g if the value type isn't trivially copyable. I
can do this in a followup patch for ranges::max/max_element and
ranges::min/min_element too, they should all use the same heuristic.
>
>
> > + if (++__first == __last)
> > + {
> > + // N is odd; in this final iteration, we perform a just one
>
> s/perform a just one/perform just one/
Fixed.
>
> > + // comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2
> > comparisons.
>
> I find this a bit hard to parse with the inequality there.
Removed.
>
> > + if (__comp_proj(__val1, __result.min))
> > + __result.min = std::move(__val1);
> > + else if (!__comp_proj(__val1, __result.max))
> > + __result.max = std::move(__val1);
>
> This can be two comparisons, can't it? Would this be better...
Whoops, yeah...
>
> // N is odd; in this final iteration, we perform at most two
> // comparisons, for a total of 3*(N-1)/2 + 2 comparisons,
> // which is not more than 3*N/2, as required.
>
> ?
Ah, but then the total is more than 3*N/2 :( And I think we reach this
case really when N is even, not odd (sorry, I really botched this
patch).
And when N=2 in particular, we perform up two comparisons instead of
three, but actually a single comparison should suffice in this case. I
think all this is fixed in v2 below by handling the second element in
the range specially.
>
> > + break;
> > + }
> > + auto __val2 = *__first;
> > + if (!__comp_proj(__val2, __val1))
> > + {
> > + if (__comp_proj(__val1, __result.min))
> > + __result.min = std::move(__val1);
> > + if (!__comp_proj(__val2, __result.max))
> > + __result.max = std::move(__val2);
> > + }
> > + else
> > + {
> > + if (__comp_proj(__val2, __result.min))
> > + __result.min = std::move(__val2);
> > + if (!__comp_proj(__val1, __result.max))
> > + __result.max = std::move(__val1);
> > + }
>
> I thought we might be able to simplify this to something like:
>
> auto __val2 = *__first;
> auto&& [__min, __max] = (*this)(__val1, __val2, __comp, __proj);
> if (__comp_proj(__min, __result.min))
> __result.min = __min;
> if (__comp_proj(__result.max, __max))
> __result.max = __max;
>
> But it doesn't work because we need to move from __min and __max, but
> the (*this)(...) returns minmax_result<const T&> and can't be moved
> from.
>
> We could get around that but it's not much of a simplification:
>
> range_value_t<Range> __val2 = *__first;
> auto [__min, __max] = (*this)(std::addressof(__val1),
> std::addressof(__val2),
> __comp,
> [](auto __p) -> const auto& {
> return *__p;
> });
> if (__comp_proj(*__min, __result.min))
> __result.min = std::move(*__min);
> if (__comp_proj(__result.max, *__max))
> __result.max = std::move(*__max);
Hmm, now that __val2 was made a universal reference, this simplifcation
might not work.
>
> > }
> > return __result;
> > }
> > @@ -3408,21 +3429,40 @@ namespace ranges
> > operator()(_Iter __first, _Sent __last,
> > _Comp __comp = {}, _Proj __proj = {}) const
> > {
> > - if (__first == __last)
> > - return {__first, __first};
> > -
> > + auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> > minmax_element_result<_Iter> __result = {__first, __first};
> > - auto __i = __first;
> > - while (++__i != __last)
> > + if (__first == __last)
> > + return __result;
> > + while (++__first != __last)
> > {
> > - if (std::__invoke(__comp,
> > - std::__invoke(__proj, *__i),
> > - std::__invoke(__proj, *__result.min)))
> > - __result.min = __i;
> > - if (!(bool)std::__invoke(__comp,
> > - std::__invoke(__proj, *__i),
> > - std::__invoke(__proj, *__result.max)))
> > - __result.max = __i;
> > + // Process two elements at a time so that we perform at most
> > + // 3*N/2 many comparisons in total (each of the N/2 iterations
> > + // of this loop performs three comparisions).
> > + auto __prev = __first;
> > + if (++__first == __last)
> > + {
> > + // N is odd; in this final iteration, we perform a just one
> > + // comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2
> > comparisons.
>
> Same comments on the comments as above.
Fixed.
>
> > + if (__comp_proj(*__prev, *__result.min))
> > + __result.min = __prev;
> > + else if (!__comp_proj(*__prev, *__result.max))
> > + __result.max = __prev;
> > + break;
> > + }
> > + if (!__comp_proj(*__first, *__prev))
> > + {
> > + if (__comp_proj(*__prev, *__result.min))
> > + __result.min = __prev;
> > + if (!__comp_proj(*__first, *__result.max))
> > + __result.max = __first;
> > + }
> > + else
> > + {
> > + if (__comp_proj(*__first, *__result.min))
> > + __result.min = __first;
> > + if (!__comp_proj(*__prev, *__result.max))
> > + __result.max = __prev;
> > + }
>
> We don't need to move anything here, so this could be written using
> ranges::minmax. I'm not sure it is an improvement though (except for
> being slightly fewer lines of code):
>
> auto __mm = minmax(__prev, __first, __comp,
> [](auto&& __it) -> auto&& { return *__it; });
>
> if (__comp_proj(*__mm.min, *__result.min))
> __result.min = __mm.min;
> if (__comp_proj(*__result.max, *__mm.max))
> __result.max = __mm.max;
Hmm, it's shorter, but on the other hand it'd make the implementations
of minmax and minmax_element no longer mirror each other as closely. So
I didn't make this change for now.
Here's v2, which makes the following additional changes:
* Fixes comment typos, and some nearby indentation in the signature
of minmax
* Makes both minmax and minmax_element handle the second element
specially in order to optimally perform just a single comparison
when N=2 and to always stay below 3*N/2 comparisons
* Avoids using a deduced return type for __val1
* Makes __val2 a universal reference to avoid a move
* Adds comparison complexity tests for the N=2,3 cases
-- >8 --
Subject: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison
complexity
This rewrites ranges::minmax and ranges::minmax_element so that it
performs at most 3*N/2 many comparisons, as required by the standard.
In passing, this also fixes PR100387 by avoiding a premature std::move
in ranges::minmax and in std::shift_right.
Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
10/11?
libstdc++-v3/ChangeLog:
PR libstdc++/100387
* include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
to limit comparison complexity to 3*N/2.
(__minmax_element_fn::operator()): Likewise.
(shift_right): Avoid premature std::move of __result.
* testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
New tests.
* testsuite/25_algorithms/minmax_element/constrained.cc (test02):
Likewise.
---
libstdc++-v3/include/bits/ranges_algo.h | 113 ++++++++++++++----
.../25_algorithms/minmax/constrained.cc | 42 +++++++
.../minmax_element/constrained.cc | 27 +++++
3 files changed, 156 insertions(+), 26 deletions(-)
diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h
index cda3042c11f..2091cbf5b4e 100644
--- a/libstdc++-v3/include/bits/ranges_algo.h
+++ b/libstdc++-v3/include/bits/ranges_algo.h
@@ -3283,26 +3283,59 @@ namespace ranges
template<input_range _Range, typename _Proj = identity,
indirect_strict_weak_order<projected<iterator_t<_Range>, _Proj>>
_Comp = ranges::less>
- requires indirectly_copyable_storable<iterator_t<_Range>,
- range_value_t<_Range>*>
+ requires indirectly_copyable_storable<iterator_t<_Range>, range_value_t<_Range>*>
constexpr minmax_result<range_value_t<_Range>>
operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const
{
auto __first = ranges::begin(__r);
auto __last = ranges::end(__r);
__glibcxx_assert(__first != __last);
+ auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
+ if (++__first == __last)
+ return __result;
+ else
+ {
+ // At this point __result.min == __result.max, so a single
+ // comparison with the next element suffices.
+ auto&& __val = *__first;
+ if (__comp_proj(__val, __result.min))
+ __result.min = std::forward<decltype(__val)>(__val);
+ else
+ __result.max = std::forward<decltype(__val)>(__val);
+ }
while (++__first != __last)
{
- auto __tmp = *__first;
- if (std::__invoke(__comp,
- std::__invoke(__proj, __tmp),
- std::__invoke(__proj, __result.min)))
- __result.min = std::move(__tmp);
- if (!(bool)std::__invoke(__comp,
- std::__invoke(__proj, __tmp),
- std::__invoke(__proj, __result.max)))
- __result.max = std::move(__tmp);
+ // Now process two elements at a time so that we perform at most
+ // 3*(N-2)/2 comparisons in total (each of the (N-2)/2 iterations
+ // of this loop performs three comparisons).
+ range_value_t<_Range> __val1 = *__first;
+ if (++__first == __last)
+ {
+ // N is odd; in this final iteration, we perform at most two
+ // comparisons, for a total of 1 + 3*(N-3)/2 + 2 comparisons,
+ // which is not more than 3*N/2, as required.
+ if (__comp_proj(__val1, __result.min))
+ __result.min = std::move(__val1);
+ else if (!__comp_proj(__val1, __result.max))
+ __result.max = std::move(__val1);
+ break;
+ }
+ auto&& __val2 = *__first;
+ if (!__comp_proj(__val2, __val1))
+ {
+ if (__comp_proj(__val1, __result.min))
+ __result.min = std::move(__val1);
+ if (!__comp_proj(__val2, __result.max))
+ __result.max = std::forward<decltype(__val2)>(__val2);
+ }
+ else
+ {
+ if (__comp_proj(__val2, __result.min))
+ __result.min = std::forward<decltype(__val2)>(__val2);
+ if (!__comp_proj(__val1, __result.max))
+ __result.max = std::move(__val1);
+ }
}
return __result;
}
@@ -3408,21 +3441,50 @@ namespace ranges
operator()(_Iter __first, _Sent __last,
_Comp __comp = {}, _Proj __proj = {}) const
{
- if (__first == __last)
- return {__first, __first};
-
+ auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
minmax_element_result<_Iter> __result = {__first, __first};
- auto __i = __first;
- while (++__i != __last)
+ if (__first == __last || ++__first == __last)
+ return __result;
+ else
{
- if (std::__invoke(__comp,
- std::__invoke(__proj, *__i),
- std::__invoke(__proj, *__result.min)))
- __result.min = __i;
- if (!(bool)std::__invoke(__comp,
- std::__invoke(__proj, *__i),
- std::__invoke(__proj, *__result.max)))
- __result.max = __i;
+ // At this point __result.min == __result.max, so a single
+ // comparison with the next element suffices.
+ if (__comp_proj(*__first, *__result.min))
+ __result.min = __first;
+ else
+ __result.max = __first;
+ }
+ while (++__first != __last)
+ {
+ // Now process two elements at a time so that we perform at most
+ // 3*(N-2)/2 comparisons in total (each of the (N-2)/2 iterations
+ // of this loop performs three comparisons).
+ auto __prev = __first;
+ if (++__first == __last)
+ {
+ // N is odd; in this final iteration, we perform at most two
+ // comparisons, for a total of 1 + 3*(N-3)/2 + 2 comparisons,
+ // which is not more than 3*N/2, as required.
+ if (__comp_proj(*__prev, *__result.min))
+ __result.min = __prev;
+ else if (!__comp_proj(*__prev, *__result.max))
+ __result.max = __prev;
+ break;
+ }
+ if (!__comp_proj(*__first, *__prev))
+ {
+ if (__comp_proj(*__prev, *__result.min))
+ __result.min = __prev;
+ if (!__comp_proj(*__first, *__result.max))
+ __result.max = __first;
+ }
+ else
+ {
+ if (__comp_proj(*__first, *__result.min))
+ __result.min = __first;
+ if (!__comp_proj(*__prev, *__result.max))
+ __result.max = __prev;
+ }
}
return __result;
}
@@ -3749,8 +3811,7 @@ namespace ranges
// i.e. we are shifting out at least half of the range. In
// this case we can safely perform the shift with a single
// move.
- std::move(std::move(__first), std::move(__dest_head),
- std::move(__result));
+ std::move(std::move(__first), std::move(__dest_head), __result);
return __result;
}
++__dest_head;
diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
index 786922414b5..c365152bf2b 100644
--- a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
@@ -19,6 +19,8 @@
// { dg-do run { target c++2a } }
#include <algorithm>
+#include <string>
+#include <vector>
#include <testsuite_hooks.h>
#include <testsuite_iterators.h>
@@ -89,10 +91,50 @@ test03()
== res_t(1,4) );
}
+void
+test04()
+{
+ // Verify we perform at most 3*N/2 applications of the comparison predicate.
+ static int counter;
+ struct counted_less
+ { bool operator()(int a, int b) { ++counter; return a < b; } };
+
+ ranges::minmax({1,2}, counted_less{});
+ VERIFY( counter == 1 );
+
+ counter = 0;
+ ranges::minmax({1,2,3}, counted_less{});
+ VERIFY( counter == 3 );
+
+ counter = 0;
+ ranges::minmax({1,2,3,4,5,6,7,8,9,10}, counted_less{});
+ VERIFY( counter <= 15 );
+
+ counter = 0;
+ ranges::minmax({10,9,8,7,6,5,4,3,2,1}, counted_less{});
+ VERIFY( counter <= 15 );
+}
+
+void
+test05()
+{
+ // PR libstdc++/100387
+ using namespace std::literals::string_literals;
+ auto comp = [](const auto& a, const auto& b) {
+ return a.size() == b.size() ? a.front() < b.front() : a.size() > b.size();
+ };
+ auto result = ranges::minmax({"b"s, "a"s}, comp);
+ VERIFY( result.min == "a"s && result.max == "b"s );
+ result = ranges::minmax({"c"s, "b"s, "a"s}, comp);
+ VERIFY( result.min == "a"s && result.max == "c"s );
+}
+
int
main()
{
test01();
test02();
test03();
+ test04();
+ test05();
}
diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
index 3b11c0dd96c..0919f7dda8f 100644
--- a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
@@ -61,8 +61,35 @@ test01()
static_assert(ranges::minmax_element(y, y+3, {}, &X::i).max->j == 3);
}
+void
+test02()
+{
+ // Verify we perform at most 3*N/2 applications of the comparison predicate.
+ static int counter;
+ struct counted_less
+ { bool operator()(int a, int b) { ++counter; return a < b; } };
+
+ int x[] = {1,2,3,4,5,6,7,8,9,10};
+ ranges::minmax_element(x, x+2, counted_less{});
+ VERIFY( counter == 1 );
+
+ counter = 0;
+ ranges::minmax_element(x, x+3, counted_less{});
+ VERIFY( counter == 3 );
+
+ counter = 0;
+ ranges::minmax_element(x, counted_less{});
+ VERIFY( counter <= 15 );
+
+ ranges::reverse(x);
+ counter = 0;
+ ranges::minmax_element(x, counted_less{});
+ VERIFY( counter <= 15 );
+}
+
int
main()
{
test01();
+ test02();
}
--
2.31.1.442.g7e39198978
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity
2021-05-05 15:33 ` Patrick Palka
@ 2021-05-05 15:48 ` Patrick Palka
2021-06-18 17:44 ` Patrick Palka
0 siblings, 1 reply; 7+ messages in thread
From: Patrick Palka @ 2021-05-05 15:48 UTC (permalink / raw)
To: Patrick Palka; +Cc: Jonathan Wakely, gcc-patches, libstdc++
On Wed, 5 May 2021, Patrick Palka wrote:
> On Wed, 5 May 2021, Jonathan Wakely wrote:
>
> > On 04/05/21 21:42 -0400, Patrick Palka via Libstdc++ wrote:
> > > This rewrites ranges::minmax and ranges::minmax_element so that it
> > > performs at most 3*N/2 many comparisons, as required by the standard.
> > > In passing, this also fixes PR100387 by avoiding a premature std::move
> > > in ranges::minmax and in std::shift_right.
> > >
> > > Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
> > > 10/11?
> > >
> > > libstdc++-v3/ChangeLog:
> > >
> > > PR libstdc++/100387
> > > * include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
> > > to limit comparison complexity to 3*N/2. Avoid premature std::move.
> > > (__minmax_element_fn::operator()): Likewise.
> > > (shift_right): Avoid premature std::move of __result.
> > > * testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
> > > New tests.
> > > * testsuite/25_algorithms/minmax_element/constrained.cc (test02):
> > > Likewise.
> > > ---
> > > libstdc++-v3/include/bits/ranges_algo.h | 87 ++++++++++++++-----
> > > .../25_algorithms/minmax/constrained.cc | 31 +++++++
> > > .../minmax_element/constrained.cc | 19 ++++
> > > 3 files changed, 113 insertions(+), 24 deletions(-)
> > >
> > > diff --git a/libstdc++-v3/include/bits/ranges_algo.h
> > > b/libstdc++-v3/include/bits/ranges_algo.h
> > > index cda3042c11f..bbd29127e89 100644
> > > --- a/libstdc++-v3/include/bits/ranges_algo.h
> > > +++ b/libstdc++-v3/include/bits/ranges_algo.h
> > > @@ -3291,18 +3291,39 @@ namespace ranges
> > > auto __first = ranges::begin(__r);
> > > auto __last = ranges::end(__r);
> > > __glibcxx_assert(__first != __last);
> > > + auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> > > minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
> > > while (++__first != __last)
> > > {
> > > - auto __tmp = *__first;
> > > - if (std::__invoke(__comp,
> > > - std::__invoke(__proj, __tmp),
> > > - std::__invoke(__proj, __result.min)))
> > > - __result.min = std::move(__tmp);
> > > - if (!(bool)std::__invoke(__comp,
> > > - std::__invoke(__proj, __tmp),
> > > - std::__invoke(__proj, __result.max)))
> > > - __result.max = std::move(__tmp);
> > > + // Process two elements at a time so that we perform at most
> > > + // 3*N/2 many comparisons in total (each of the N/2 iterations
> >
> > Is "many" a typo here?
>
> Just a bad habit of mine to usually write "<count> many" instead of just
> "<count>" :) Consider the "many" removed.
>
> >
> > > + // of this loop performs three comparisions).
> > > + auto __val1 = *__first;
> >
> > Can we avoid making this copy if the range satisfies forward_range, by
> > keeping copies of the min/max iterators, or just forwarding to
> > ranges::minmax_element?
>
> Maybe we can make __val1 and __val2 universal references? Ah, but then
> __val1 would potentially be invalidated after incrementing __first. I
> think it should be safe to make __val2 a universal reference though.
> I've done this in v2 below.
>
> Forwarding to ranges::minmax_element seems like it would be profitable
> in some situations, e.g if the value type isn't trivially copyable. I
> can do this in a followup patch for ranges::max/max_element and
> ranges::min/min_element too, they should all use the same heuristic.
>
> >
> >
> > > + if (++__first == __last)
> > > + {
> > > + // N is odd; in this final iteration, we perform a just one
> >
> > s/perform a just one/perform just one/
>
> Fixed.
>
> >
> > > + // comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2
> > > comparisons.
> >
> > I find this a bit hard to parse with the inequality there.
>
> Removed.
>
> >
> > > + if (__comp_proj(__val1, __result.min))
> > > + __result.min = std::move(__val1);
> > > + else if (!__comp_proj(__val1, __result.max))
> > > + __result.max = std::move(__val1);
> >
> > This can be two comparisons, can't it? Would this be better...
>
> Whoops, yeah...
>
> >
> > // N is odd; in this final iteration, we perform at most two
> > // comparisons, for a total of 3*(N-1)/2 + 2 comparisons,
> > // which is not more than 3*N/2, as required.
> >
> > ?
>
> Ah, but then the total is more than 3*N/2 :( And I think we reach this
> case really when N is even, not odd (sorry, I really botched this
> patch).
>
> And when N=2 in particular, we perform up two comparisons instead of
> three, but actually a single comparison should suffice in this case. I
> think all this is fixed in v2 below by handling the second element in
> the range specially.
>
> >
> > > + break;
> > > + }
> > > + auto __val2 = *__first;
> > > + if (!__comp_proj(__val2, __val1))
> > > + {
> > > + if (__comp_proj(__val1, __result.min))
> > > + __result.min = std::move(__val1);
> > > + if (!__comp_proj(__val2, __result.max))
> > > + __result.max = std::move(__val2);
> > > + }
> > > + else
> > > + {
> > > + if (__comp_proj(__val2, __result.min))
> > > + __result.min = std::move(__val2);
> > > + if (!__comp_proj(__val1, __result.max))
> > > + __result.max = std::move(__val1);
> > > + }
> >
> > I thought we might be able to simplify this to something like:
> >
> > auto __val2 = *__first;
> > auto&& [__min, __max] = (*this)(__val1, __val2, __comp, __proj);
> > if (__comp_proj(__min, __result.min))
> > __result.min = __min;
> > if (__comp_proj(__result.max, __max))
> > __result.max = __max;
> >
> > But it doesn't work because we need to move from __min and __max, but
> > the (*this)(...) returns minmax_result<const T&> and can't be moved
> > from.
> >
> > We could get around that but it's not much of a simplification:
> >
> > range_value_t<Range> __val2 = *__first;
> > auto [__min, __max] = (*this)(std::addressof(__val1),
> > std::addressof(__val2),
> > __comp,
> > [](auto __p) -> const auto& {
> > return *__p;
> > });
> > if (__comp_proj(*__min, __result.min))
> > __result.min = std::move(*__min);
> > if (__comp_proj(__result.max, *__max))
> > __result.max = std::move(*__max);
>
> Hmm, now that __val2 was made a universal reference, this simplifcation
> might not work.
>
> >
> > > }
> > > return __result;
> > > }
> > > @@ -3408,21 +3429,40 @@ namespace ranges
> > > operator()(_Iter __first, _Sent __last,
> > > _Comp __comp = {}, _Proj __proj = {}) const
> > > {
> > > - if (__first == __last)
> > > - return {__first, __first};
> > > -
> > > + auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> > > minmax_element_result<_Iter> __result = {__first, __first};
> > > - auto __i = __first;
> > > - while (++__i != __last)
> > > + if (__first == __last)
> > > + return __result;
> > > + while (++__first != __last)
> > > {
> > > - if (std::__invoke(__comp,
> > > - std::__invoke(__proj, *__i),
> > > - std::__invoke(__proj, *__result.min)))
> > > - __result.min = __i;
> > > - if (!(bool)std::__invoke(__comp,
> > > - std::__invoke(__proj, *__i),
> > > - std::__invoke(__proj, *__result.max)))
> > > - __result.max = __i;
> > > + // Process two elements at a time so that we perform at most
> > > + // 3*N/2 many comparisons in total (each of the N/2 iterations
> > > + // of this loop performs three comparisions).
> > > + auto __prev = __first;
> > > + if (++__first == __last)
> > > + {
> > > + // N is odd; in this final iteration, we perform a just one
> > > + // comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2
> > > comparisons.
> >
> > Same comments on the comments as above.
>
> Fixed.
>
> >
> > > + if (__comp_proj(*__prev, *__result.min))
> > > + __result.min = __prev;
> > > + else if (!__comp_proj(*__prev, *__result.max))
> > > + __result.max = __prev;
> > > + break;
> > > + }
> > > + if (!__comp_proj(*__first, *__prev))
> > > + {
> > > + if (__comp_proj(*__prev, *__result.min))
> > > + __result.min = __prev;
> > > + if (!__comp_proj(*__first, *__result.max))
> > > + __result.max = __first;
> > > + }
> > > + else
> > > + {
> > > + if (__comp_proj(*__first, *__result.min))
> > > + __result.min = __first;
> > > + if (!__comp_proj(*__prev, *__result.max))
> > > + __result.max = __prev;
> > > + }
> >
> > We don't need to move anything here, so this could be written using
> > ranges::minmax. I'm not sure it is an improvement though (except for
> > being slightly fewer lines of code):
> >
> > auto __mm = minmax(__prev, __first, __comp,
> > [](auto&& __it) -> auto&& { return *__it; });
> >
> > if (__comp_proj(*__mm.min, *__result.min))
> > __result.min = __mm.min;
> > if (__comp_proj(*__result.max, *__mm.max))
> > __result.max = __mm.max;
>
> Hmm, it's shorter, but on the other hand it'd make the implementations
> of minmax and minmax_element no longer mirror each other as closely. So
> I didn't make this change for now.
>
> Here's v2, which makes the following additional changes:
>
> * Fixes comment typos, and some nearby indentation in the signature
> of minmax
> * Makes both minmax and minmax_element handle the second element
> specially in order to optimally perform just a single comparison
> when N=2 and to always stay below 3*N/2 comparisons
> * Avoids using a deduced return type for __val1
> * Makes __val2 a universal reference to avoid a move
> * Adds comparison complexity tests for the N=2,3 cases
>
> -- >8 --
>
> Subject: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison
> complexity
>
> This rewrites ranges::minmax and ranges::minmax_element so that it
> performs at most 3*N/2 many comparisons, as required by the standard.
> In passing, this also fixes PR100387 by avoiding a premature std::move
> in ranges::minmax and in std::shift_right.
>
> Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
> 10/11?
>
> libstdc++-v3/ChangeLog:
>
> PR libstdc++/100387
> * include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
> to limit comparison complexity to 3*N/2.
> (__minmax_element_fn::operator()): Likewise.
> (shift_right): Avoid premature std::move of __result.
> * testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
> New tests.
> * testsuite/25_algorithms/minmax_element/constrained.cc (test02):
> Likewise.
> ---
> libstdc++-v3/include/bits/ranges_algo.h | 113 ++++++++++++++----
> .../25_algorithms/minmax/constrained.cc | 42 +++++++
> .../minmax_element/constrained.cc | 27 +++++
> 3 files changed, 156 insertions(+), 26 deletions(-)
>
> diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h
> index cda3042c11f..2091cbf5b4e 100644
> --- a/libstdc++-v3/include/bits/ranges_algo.h
> +++ b/libstdc++-v3/include/bits/ranges_algo.h
> @@ -3283,26 +3283,59 @@ namespace ranges
> template<input_range _Range, typename _Proj = identity,
> indirect_strict_weak_order<projected<iterator_t<_Range>, _Proj>>
> _Comp = ranges::less>
> - requires indirectly_copyable_storable<iterator_t<_Range>,
> - range_value_t<_Range>*>
> + requires indirectly_copyable_storable<iterator_t<_Range>, range_value_t<_Range>*>
> constexpr minmax_result<range_value_t<_Range>>
> operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const
> {
> auto __first = ranges::begin(__r);
> auto __last = ranges::end(__r);
> __glibcxx_assert(__first != __last);
> + auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
> + if (++__first == __last)
> + return __result;
> + else
> + {
> + // At this point __result.min == __result.max, so a single
> + // comparison with the next element suffices.
> + auto&& __val = *__first;
> + if (__comp_proj(__val, __result.min))
> + __result.min = std::forward<decltype(__val)>(__val);
> + else
> + __result.max = std::forward<decltype(__val)>(__val);
> + }
> while (++__first != __last)
> {
> - auto __tmp = *__first;
> - if (std::__invoke(__comp,
> - std::__invoke(__proj, __tmp),
> - std::__invoke(__proj, __result.min)))
> - __result.min = std::move(__tmp);
> - if (!(bool)std::__invoke(__comp,
> - std::__invoke(__proj, __tmp),
> - std::__invoke(__proj, __result.max)))
> - __result.max = std::move(__tmp);
> + // Now process two elements at a time so that we perform at most
> + // 3*(N-2)/2 comparisons in total (each of the (N-2)/2 iterations
Oops, this should now say "1 + 3*(N-2)/2 comparisons" because of the
initial comparison with the second element above.
> + // of this loop performs three comparisons).
> + range_value_t<_Range> __val1 = *__first;
> + if (++__first == __last)
> + {
> + // N is odd; in this final iteration, we perform at most two
> + // comparisons, for a total of 1 + 3*(N-3)/2 + 2 comparisons,
> + // which is not more than 3*N/2, as required.
> + if (__comp_proj(__val1, __result.min))
> + __result.min = std::move(__val1);
> + else if (!__comp_proj(__val1, __result.max))
> + __result.max = std::move(__val1);
> + break;
> + }
> + auto&& __val2 = *__first;
> + if (!__comp_proj(__val2, __val1))
> + {
> + if (__comp_proj(__val1, __result.min))
> + __result.min = std::move(__val1);
> + if (!__comp_proj(__val2, __result.max))
> + __result.max = std::forward<decltype(__val2)>(__val2);
> + }
> + else
> + {
> + if (__comp_proj(__val2, __result.min))
> + __result.min = std::forward<decltype(__val2)>(__val2);
> + if (!__comp_proj(__val1, __result.max))
> + __result.max = std::move(__val1);
> + }
> }
> return __result;
> }
> @@ -3408,21 +3441,50 @@ namespace ranges
> operator()(_Iter __first, _Sent __last,
> _Comp __comp = {}, _Proj __proj = {}) const
> {
> - if (__first == __last)
> - return {__first, __first};
> -
> + auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> minmax_element_result<_Iter> __result = {__first, __first};
> - auto __i = __first;
> - while (++__i != __last)
> + if (__first == __last || ++__first == __last)
> + return __result;
> + else
> {
> - if (std::__invoke(__comp,
> - std::__invoke(__proj, *__i),
> - std::__invoke(__proj, *__result.min)))
> - __result.min = __i;
> - if (!(bool)std::__invoke(__comp,
> - std::__invoke(__proj, *__i),
> - std::__invoke(__proj, *__result.max)))
> - __result.max = __i;
> + // At this point __result.min == __result.max, so a single
> + // comparison with the next element suffices.
> + if (__comp_proj(*__first, *__result.min))
> + __result.min = __first;
> + else
> + __result.max = __first;
> + }
> + while (++__first != __last)
> + {
> + // Now process two elements at a time so that we perform at most
> + // 3*(N-2)/2 comparisons in total (each of the (N-2)/2 iterations
Here too; consider this fixed.
> + // of this loop performs three comparisons).
> + auto __prev = __first;
> + if (++__first == __last)
> + {
> + // N is odd; in this final iteration, we perform at most two
> + // comparisons, for a total of 1 + 3*(N-3)/2 + 2 comparisons,
> + // which is not more than 3*N/2, as required.
> + if (__comp_proj(*__prev, *__result.min))
> + __result.min = __prev;
> + else if (!__comp_proj(*__prev, *__result.max))
> + __result.max = __prev;
> + break;
> + }
> + if (!__comp_proj(*__first, *__prev))
> + {
> + if (__comp_proj(*__prev, *__result.min))
> + __result.min = __prev;
> + if (!__comp_proj(*__first, *__result.max))
> + __result.max = __first;
> + }
> + else
> + {
> + if (__comp_proj(*__first, *__result.min))
> + __result.min = __first;
> + if (!__comp_proj(*__prev, *__result.max))
> + __result.max = __prev;
> + }
> }
> return __result;
> }
> @@ -3749,8 +3811,7 @@ namespace ranges
> // i.e. we are shifting out at least half of the range. In
> // this case we can safely perform the shift with a single
> // move.
> - std::move(std::move(__first), std::move(__dest_head),
> - std::move(__result));
> + std::move(std::move(__first), std::move(__dest_head), __result);
> return __result;
> }
> ++__dest_head;
> diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
> index 786922414b5..c365152bf2b 100644
> --- a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
> +++ b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
> @@ -19,6 +19,8 @@
> // { dg-do run { target c++2a } }
>
> #include <algorithm>
> +#include <string>
> +#include <vector>
> #include <testsuite_hooks.h>
> #include <testsuite_iterators.h>
>
> @@ -89,10 +91,50 @@ test03()
> == res_t(1,4) );
> }
>
> +void
> +test04()
> +{
> + // Verify we perform at most 3*N/2 applications of the comparison predicate.
> + static int counter;
> + struct counted_less
> + { bool operator()(int a, int b) { ++counter; return a < b; } };
> +
> + ranges::minmax({1,2}, counted_less{});
> + VERIFY( counter == 1 );
> +
> + counter = 0;
> + ranges::minmax({1,2,3}, counted_less{});
> + VERIFY( counter == 3 );
> +
> + counter = 0;
> + ranges::minmax({1,2,3,4,5,6,7,8,9,10}, counted_less{});
> + VERIFY( counter <= 15 );
> +
> + counter = 0;
> + ranges::minmax({10,9,8,7,6,5,4,3,2,1}, counted_less{});
> + VERIFY( counter <= 15 );
> +}
> +
> +void
> +test05()
> +{
> + // PR libstdc++/100387
> + using namespace std::literals::string_literals;
> + auto comp = [](const auto& a, const auto& b) {
> + return a.size() == b.size() ? a.front() < b.front() : a.size() > b.size();
> + };
> + auto result = ranges::minmax({"b"s, "a"s}, comp);
> + VERIFY( result.min == "a"s && result.max == "b"s );
> + result = ranges::minmax({"c"s, "b"s, "a"s}, comp);
> + VERIFY( result.min == "a"s && result.max == "c"s );
> +}
> +
> int
> main()
> {
> test01();
> test02();
> test03();
> + test04();
> + test05();
> }
> diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
> index 3b11c0dd96c..0919f7dda8f 100644
> --- a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
> +++ b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
> @@ -61,8 +61,35 @@ test01()
> static_assert(ranges::minmax_element(y, y+3, {}, &X::i).max->j == 3);
> }
>
> +void
> +test02()
> +{
> + // Verify we perform at most 3*N/2 applications of the comparison predicate.
> + static int counter;
> + struct counted_less
> + { bool operator()(int a, int b) { ++counter; return a < b; } };
> +
> + int x[] = {1,2,3,4,5,6,7,8,9,10};
> + ranges::minmax_element(x, x+2, counted_less{});
> + VERIFY( counter == 1 );
> +
> + counter = 0;
> + ranges::minmax_element(x, x+3, counted_less{});
> + VERIFY( counter == 3 );
> +
> + counter = 0;
> + ranges::minmax_element(x, counted_less{});
> + VERIFY( counter <= 15 );
> +
> + ranges::reverse(x);
> + counter = 0;
> + ranges::minmax_element(x, counted_less{});
> + VERIFY( counter <= 15 );
> +}
> +
> int
> main()
> {
> test01();
> + test02();
> }
> --
> 2.31.1.442.g7e39198978
>
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity
2021-05-05 15:48 ` Patrick Palka
@ 2021-06-18 17:44 ` Patrick Palka
2021-06-18 21:19 ` Jonathan Wakely
0 siblings, 1 reply; 7+ messages in thread
From: Patrick Palka @ 2021-06-18 17:44 UTC (permalink / raw)
To: Patrick Palka; +Cc: Jonathan Wakely, gcc-patches, libstdc++
On Wed, 5 May 2021, Patrick Palka wrote:
> On Wed, 5 May 2021, Patrick Palka wrote:
>
> > On Wed, 5 May 2021, Jonathan Wakely wrote:
> >
> > > On 04/05/21 21:42 -0400, Patrick Palka via Libstdc++ wrote:
> > > > This rewrites ranges::minmax and ranges::minmax_element so that it
> > > > performs at most 3*N/2 many comparisons, as required by the standard.
> > > > In passing, this also fixes PR100387 by avoiding a premature std::move
> > > > in ranges::minmax and in std::shift_right.
> > > >
> > > > Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
> > > > 10/11?
> > > >
> > > > libstdc++-v3/ChangeLog:
> > > >
> > > > PR libstdc++/100387
> > > > * include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
> > > > to limit comparison complexity to 3*N/2. Avoid premature std::move.
> > > > (__minmax_element_fn::operator()): Likewise.
> > > > (shift_right): Avoid premature std::move of __result.
> > > > * testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
> > > > New tests.
> > > > * testsuite/25_algorithms/minmax_element/constrained.cc (test02):
> > > > Likewise.
> > > > ---
> > > > libstdc++-v3/include/bits/ranges_algo.h | 87 ++++++++++++++-----
> > > > .../25_algorithms/minmax/constrained.cc | 31 +++++++
> > > > .../minmax_element/constrained.cc | 19 ++++
> > > > 3 files changed, 113 insertions(+), 24 deletions(-)
> > > >
> > > > diff --git a/libstdc++-v3/include/bits/ranges_algo.h
> > > > b/libstdc++-v3/include/bits/ranges_algo.h
> > > > index cda3042c11f..bbd29127e89 100644
> > > > --- a/libstdc++-v3/include/bits/ranges_algo.h
> > > > +++ b/libstdc++-v3/include/bits/ranges_algo.h
> > > > @@ -3291,18 +3291,39 @@ namespace ranges
> > > > auto __first = ranges::begin(__r);
> > > > auto __last = ranges::end(__r);
> > > > __glibcxx_assert(__first != __last);
> > > > + auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> > > > minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
> > > > while (++__first != __last)
> > > > {
> > > > - auto __tmp = *__first;
> > > > - if (std::__invoke(__comp,
> > > > - std::__invoke(__proj, __tmp),
> > > > - std::__invoke(__proj, __result.min)))
> > > > - __result.min = std::move(__tmp);
> > > > - if (!(bool)std::__invoke(__comp,
> > > > - std::__invoke(__proj, __tmp),
> > > > - std::__invoke(__proj, __result.max)))
> > > > - __result.max = std::move(__tmp);
> > > > + // Process two elements at a time so that we perform at most
> > > > + // 3*N/2 many comparisons in total (each of the N/2 iterations
> > >
> > > Is "many" a typo here?
> >
> > Just a bad habit of mine to usually write "<count> many" instead of just
> > "<count>" :) Consider the "many" removed.
> >
> > >
> > > > + // of this loop performs three comparisions).
> > > > + auto __val1 = *__first;
> > >
> > > Can we avoid making this copy if the range satisfies forward_range, by
> > > keeping copies of the min/max iterators, or just forwarding to
> > > ranges::minmax_element?
> >
> > Maybe we can make __val1 and __val2 universal references? Ah, but then
> > __val1 would potentially be invalidated after incrementing __first. I
> > think it should be safe to make __val2 a universal reference though.
> > I've done this in v2 below.
> >
> > Forwarding to ranges::minmax_element seems like it would be profitable
> > in some situations, e.g if the value type isn't trivially copyable. I
> > can do this in a followup patch for ranges::max/max_element and
> > ranges::min/min_element too, they should all use the same heuristic.
> >
> > >
> > >
> > > > + if (++__first == __last)
> > > > + {
> > > > + // N is odd; in this final iteration, we perform a just one
> > >
> > > s/perform a just one/perform just one/
> >
> > Fixed.
> >
> > >
> > > > + // comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2
> > > > comparisons.
> > >
> > > I find this a bit hard to parse with the inequality there.
> >
> > Removed.
> >
> > >
> > > > + if (__comp_proj(__val1, __result.min))
> > > > + __result.min = std::move(__val1);
> > > > + else if (!__comp_proj(__val1, __result.max))
> > > > + __result.max = std::move(__val1);
> > >
> > > This can be two comparisons, can't it? Would this be better...
> >
> > Whoops, yeah...
> >
> > >
> > > // N is odd; in this final iteration, we perform at most two
> > > // comparisons, for a total of 3*(N-1)/2 + 2 comparisons,
> > > // which is not more than 3*N/2, as required.
> > >
> > > ?
> >
> > Ah, but then the total is more than 3*N/2 :( And I think we reach this
> > case really when N is even, not odd (sorry, I really botched this
> > patch).
> >
> > And when N=2 in particular, we perform up two comparisons instead of
> > three, but actually a single comparison should suffice in this case. I
> > think all this is fixed in v2 below by handling the second element in
> > the range specially.
> >
> > >
> > > > + break;
> > > > + }
> > > > + auto __val2 = *__first;
> > > > + if (!__comp_proj(__val2, __val1))
> > > > + {
> > > > + if (__comp_proj(__val1, __result.min))
> > > > + __result.min = std::move(__val1);
> > > > + if (!__comp_proj(__val2, __result.max))
> > > > + __result.max = std::move(__val2);
> > > > + }
> > > > + else
> > > > + {
> > > > + if (__comp_proj(__val2, __result.min))
> > > > + __result.min = std::move(__val2);
> > > > + if (!__comp_proj(__val1, __result.max))
> > > > + __result.max = std::move(__val1);
> > > > + }
> > >
> > > I thought we might be able to simplify this to something like:
> > >
> > > auto __val2 = *__first;
> > > auto&& [__min, __max] = (*this)(__val1, __val2, __comp, __proj);
> > > if (__comp_proj(__min, __result.min))
> > > __result.min = __min;
> > > if (__comp_proj(__result.max, __max))
> > > __result.max = __max;
> > >
> > > But it doesn't work because we need to move from __min and __max, but
> > > the (*this)(...) returns minmax_result<const T&> and can't be moved
> > > from.
> > >
> > > We could get around that but it's not much of a simplification:
> > >
> > > range_value_t<Range> __val2 = *__first;
> > > auto [__min, __max] = (*this)(std::addressof(__val1),
> > > std::addressof(__val2),
> > > __comp,
> > > [](auto __p) -> const auto& {
> > > return *__p;
> > > });
> > > if (__comp_proj(*__min, __result.min))
> > > __result.min = std::move(*__min);
> > > if (__comp_proj(__result.max, *__max))
> > > __result.max = std::move(*__max);
> >
> > Hmm, now that __val2 was made a universal reference, this simplifcation
> > might not work.
> >
> > >
> > > > }
> > > > return __result;
> > > > }
> > > > @@ -3408,21 +3429,40 @@ namespace ranges
> > > > operator()(_Iter __first, _Sent __last,
> > > > _Comp __comp = {}, _Proj __proj = {}) const
> > > > {
> > > > - if (__first == __last)
> > > > - return {__first, __first};
> > > > -
> > > > + auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> > > > minmax_element_result<_Iter> __result = {__first, __first};
> > > > - auto __i = __first;
> > > > - while (++__i != __last)
> > > > + if (__first == __last)
> > > > + return __result;
> > > > + while (++__first != __last)
> > > > {
> > > > - if (std::__invoke(__comp,
> > > > - std::__invoke(__proj, *__i),
> > > > - std::__invoke(__proj, *__result.min)))
> > > > - __result.min = __i;
> > > > - if (!(bool)std::__invoke(__comp,
> > > > - std::__invoke(__proj, *__i),
> > > > - std::__invoke(__proj, *__result.max)))
> > > > - __result.max = __i;
> > > > + // Process two elements at a time so that we perform at most
> > > > + // 3*N/2 many comparisons in total (each of the N/2 iterations
> > > > + // of this loop performs three comparisions).
> > > > + auto __prev = __first;
> > > > + if (++__first == __last)
> > > > + {
> > > > + // N is odd; in this final iteration, we perform a just one
> > > > + // comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2
> > > > comparisons.
> > >
> > > Same comments on the comments as above.
> >
> > Fixed.
> >
> > >
> > > > + if (__comp_proj(*__prev, *__result.min))
> > > > + __result.min = __prev;
> > > > + else if (!__comp_proj(*__prev, *__result.max))
> > > > + __result.max = __prev;
> > > > + break;
> > > > + }
> > > > + if (!__comp_proj(*__first, *__prev))
> > > > + {
> > > > + if (__comp_proj(*__prev, *__result.min))
> > > > + __result.min = __prev;
> > > > + if (!__comp_proj(*__first, *__result.max))
> > > > + __result.max = __first;
> > > > + }
> > > > + else
> > > > + {
> > > > + if (__comp_proj(*__first, *__result.min))
> > > > + __result.min = __first;
> > > > + if (!__comp_proj(*__prev, *__result.max))
> > > > + __result.max = __prev;
> > > > + }
> > >
> > > We don't need to move anything here, so this could be written using
> > > ranges::minmax. I'm not sure it is an improvement though (except for
> > > being slightly fewer lines of code):
> > >
> > > auto __mm = minmax(__prev, __first, __comp,
> > > [](auto&& __it) -> auto&& { return *__it; });
> > >
> > > if (__comp_proj(*__mm.min, *__result.min))
> > > __result.min = __mm.min;
> > > if (__comp_proj(*__result.max, *__mm.max))
> > > __result.max = __mm.max;
> >
> > Hmm, it's shorter, but on the other hand it'd make the implementations
> > of minmax and minmax_element no longer mirror each other as closely. So
> > I didn't make this change for now.
> >
> > Here's v2, which makes the following additional changes:
> >
> > * Fixes comment typos, and some nearby indentation in the signature
> > of minmax
> > * Makes both minmax and minmax_element handle the second element
> > specially in order to optimally perform just a single comparison
> > when N=2 and to always stay below 3*N/2 comparisons
> > * Avoids using a deduced return type for __val1
> > * Makes __val2 a universal reference to avoid a move
> > * Adds comparison complexity tests for the N=2,3 cases
> >
> > -- >8 --
> >
> > Subject: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison
> > complexity
> >
> > This rewrites ranges::minmax and ranges::minmax_element so that it
> > performs at most 3*N/2 many comparisons, as required by the standard.
> > In passing, this also fixes PR100387 by avoiding a premature std::move
> > in ranges::minmax and in std::shift_right.
> >
> > Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
> > 10/11?
> >
> > libstdc++-v3/ChangeLog:
> >
> > PR libstdc++/100387
> > * include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
> > to limit comparison complexity to 3*N/2.
> > (__minmax_element_fn::operator()): Likewise.
> > (shift_right): Avoid premature std::move of __result.
> > * testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
> > New tests.
> > * testsuite/25_algorithms/minmax_element/constrained.cc (test02):
> > Likewise.
> > ---
> > libstdc++-v3/include/bits/ranges_algo.h | 113 ++++++++++++++----
> > .../25_algorithms/minmax/constrained.cc | 42 +++++++
> > .../minmax_element/constrained.cc | 27 +++++
> > 3 files changed, 156 insertions(+), 26 deletions(-)
> >
> > diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h
> > index cda3042c11f..2091cbf5b4e 100644
> > --- a/libstdc++-v3/include/bits/ranges_algo.h
> > +++ b/libstdc++-v3/include/bits/ranges_algo.h
> > @@ -3283,26 +3283,59 @@ namespace ranges
> > template<input_range _Range, typename _Proj = identity,
> > indirect_strict_weak_order<projected<iterator_t<_Range>, _Proj>>
> > _Comp = ranges::less>
> > - requires indirectly_copyable_storable<iterator_t<_Range>,
> > - range_value_t<_Range>*>
> > + requires indirectly_copyable_storable<iterator_t<_Range>, range_value_t<_Range>*>
> > constexpr minmax_result<range_value_t<_Range>>
> > operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const
> > {
> > auto __first = ranges::begin(__r);
> > auto __last = ranges::end(__r);
> > __glibcxx_assert(__first != __last);
> > + auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> > minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
> > + if (++__first == __last)
> > + return __result;
> > + else
> > + {
> > + // At this point __result.min == __result.max, so a single
> > + // comparison with the next element suffices.
> > + auto&& __val = *__first;
> > + if (__comp_proj(__val, __result.min))
> > + __result.min = std::forward<decltype(__val)>(__val);
> > + else
> > + __result.max = std::forward<decltype(__val)>(__val);
> > + }
> > while (++__first != __last)
> > {
> > - auto __tmp = *__first;
> > - if (std::__invoke(__comp,
> > - std::__invoke(__proj, __tmp),
> > - std::__invoke(__proj, __result.min)))
> > - __result.min = std::move(__tmp);
> > - if (!(bool)std::__invoke(__comp,
> > - std::__invoke(__proj, __tmp),
> > - std::__invoke(__proj, __result.max)))
> > - __result.max = std::move(__tmp);
> > + // Now process two elements at a time so that we perform at most
> > + // 3*(N-2)/2 comparisons in total (each of the (N-2)/2 iterations
>
> Oops, this should now say "1 + 3*(N-2)/2 comparisons" because of the
> initial comparison with the second element above.
Ping; here's the same patch with the above comment corrected.
-- >8 --
Subject: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison
complexity
This rewrites ranges::minmax and ranges::minmax_element so that it
performs at most 3*N/2 many comparisons, as required by the standard.
In passing, this also fixes PR100387 by avoiding a premature std::move
in ranges::minmax and in std::shift_right.
Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
10/11?
libstdc++-v3/ChangeLog:
PR libstdc++/100387
* include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
to limit comparison complexity to 3*N/2.
(__minmax_element_fn::operator()): Likewise.
(shift_right): Avoid premature std::move of __result.
* testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
New tests.
* testsuite/25_algorithms/minmax_element/constrained.cc (test02):
Likewise.
---
libstdc++-v3/include/bits/ranges_algo.h | 113 ++++++++++++++----
.../25_algorithms/minmax/constrained.cc | 42 +++++++
.../minmax_element/constrained.cc | 27 +++++
3 files changed, 156 insertions(+), 26 deletions(-)
diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h
index ecf1378742d..7cf5fb3f187 100644
--- a/libstdc++-v3/include/bits/ranges_algo.h
+++ b/libstdc++-v3/include/bits/ranges_algo.h
@@ -3287,26 +3287,59 @@ namespace ranges
template<input_range _Range, typename _Proj = identity,
indirect_strict_weak_order<projected<iterator_t<_Range>, _Proj>>
_Comp = ranges::less>
- requires indirectly_copyable_storable<iterator_t<_Range>,
- range_value_t<_Range>*>
+ requires indirectly_copyable_storable<iterator_t<_Range>, range_value_t<_Range>*>
constexpr minmax_result<range_value_t<_Range>>
operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const
{
auto __first = ranges::begin(__r);
auto __last = ranges::end(__r);
__glibcxx_assert(__first != __last);
+ auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
+ if (++__first == __last)
+ return __result;
+ else
+ {
+ // At this point __result.min == __result.max, so a single
+ // comparison with the next element suffices.
+ auto&& __val = *__first;
+ if (__comp_proj(__val, __result.min))
+ __result.min = std::forward<decltype(__val)>(__val);
+ else
+ __result.max = std::forward<decltype(__val)>(__val);
+ }
while (++__first != __last)
{
- auto __tmp = *__first;
- if (std::__invoke(__comp,
- std::__invoke(__proj, __tmp),
- std::__invoke(__proj, __result.min)))
- __result.min = std::move(__tmp);
- if (!(bool)std::__invoke(__comp,
- std::__invoke(__proj, __tmp),
- std::__invoke(__proj, __result.max)))
- __result.max = std::move(__tmp);
+ // Now process two elements at a time so that we perform at most
+ // 1 + 3*(N-2)/2 comparisons in total (each of the (N-2)/2
+ // iterations of this loop performs three comparisons).
+ range_value_t<_Range> __val1 = *__first;
+ if (++__first == __last)
+ {
+ // N is odd; in this final iteration, we perform at most two
+ // comparisons, for a total of 1 + 3*(N-3)/2 + 2 comparisons,
+ // which is not more than 3*N/2, as required.
+ if (__comp_proj(__val1, __result.min))
+ __result.min = std::move(__val1);
+ else if (!__comp_proj(__val1, __result.max))
+ __result.max = std::move(__val1);
+ break;
+ }
+ auto&& __val2 = *__first;
+ if (!__comp_proj(__val2, __val1))
+ {
+ if (__comp_proj(__val1, __result.min))
+ __result.min = std::move(__val1);
+ if (!__comp_proj(__val2, __result.max))
+ __result.max = std::forward<decltype(__val2)>(__val2);
+ }
+ else
+ {
+ if (__comp_proj(__val2, __result.min))
+ __result.min = std::forward<decltype(__val2)>(__val2);
+ if (!__comp_proj(__val1, __result.max))
+ __result.max = std::move(__val1);
+ }
}
return __result;
}
@@ -3412,21 +3445,50 @@ namespace ranges
operator()(_Iter __first, _Sent __last,
_Comp __comp = {}, _Proj __proj = {}) const
{
- if (__first == __last)
- return {__first, __first};
-
+ auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
minmax_element_result<_Iter> __result = {__first, __first};
- auto __i = __first;
- while (++__i != __last)
+ if (__first == __last || ++__first == __last)
+ return __result;
+ else
{
- if (std::__invoke(__comp,
- std::__invoke(__proj, *__i),
- std::__invoke(__proj, *__result.min)))
- __result.min = __i;
- if (!(bool)std::__invoke(__comp,
- std::__invoke(__proj, *__i),
- std::__invoke(__proj, *__result.max)))
- __result.max = __i;
+ // At this point __result.min == __result.max, so a single
+ // comparison with the next element suffices.
+ if (__comp_proj(*__first, *__result.min))
+ __result.min = __first;
+ else
+ __result.max = __first;
+ }
+ while (++__first != __last)
+ {
+ // Now process two elements at a time so that we perform at most
+ // 1 + 3*(N-2)/2 comparisons in total (each of the (N-2)/2
+ // iterations of this loop performs three comparisons).
+ auto __prev = __first;
+ if (++__first == __last)
+ {
+ // N is odd; in this final iteration, we perform at most two
+ // comparisons, for a total of 1 + 3*(N-3)/2 + 2 comparisons,
+ // which is not more than 3*N/2, as required.
+ if (__comp_proj(*__prev, *__result.min))
+ __result.min = __prev;
+ else if (!__comp_proj(*__prev, *__result.max))
+ __result.max = __prev;
+ break;
+ }
+ if (!__comp_proj(*__first, *__prev))
+ {
+ if (__comp_proj(*__prev, *__result.min))
+ __result.min = __prev;
+ if (!__comp_proj(*__first, *__result.max))
+ __result.max = __first;
+ }
+ else
+ {
+ if (__comp_proj(*__first, *__result.min))
+ __result.min = __first;
+ if (!__comp_proj(*__prev, *__result.max))
+ __result.max = __prev;
+ }
}
return __result;
}
@@ -3753,8 +3815,7 @@ namespace ranges
// i.e. we are shifting out at least half of the range. In
// this case we can safely perform the shift with a single
// move.
- std::move(std::move(__first), std::move(__dest_head),
- std::move(__result));
+ std::move(std::move(__first), std::move(__dest_head), __result);
return __result;
}
++__dest_head;
diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
index 786922414b5..c365152bf2b 100644
--- a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
@@ -19,6 +19,8 @@
// { dg-do run { target c++2a } }
#include <algorithm>
+#include <string>
+#include <vector>
#include <testsuite_hooks.h>
#include <testsuite_iterators.h>
@@ -89,10 +91,50 @@ test03()
== res_t(1,4) );
}
+void
+test04()
+{
+ // Verify we perform at most 3*N/2 applications of the comparison predicate.
+ static int counter;
+ struct counted_less
+ { bool operator()(int a, int b) { ++counter; return a < b; } };
+
+ ranges::minmax({1,2}, counted_less{});
+ VERIFY( counter == 1 );
+
+ counter = 0;
+ ranges::minmax({1,2,3}, counted_less{});
+ VERIFY( counter == 3 );
+
+ counter = 0;
+ ranges::minmax({1,2,3,4,5,6,7,8,9,10}, counted_less{});
+ VERIFY( counter <= 15 );
+
+ counter = 0;
+ ranges::minmax({10,9,8,7,6,5,4,3,2,1}, counted_less{});
+ VERIFY( counter <= 15 );
+}
+
+void
+test05()
+{
+ // PR libstdc++/100387
+ using namespace std::literals::string_literals;
+ auto comp = [](const auto& a, const auto& b) {
+ return a.size() == b.size() ? a.front() < b.front() : a.size() > b.size();
+ };
+ auto result = ranges::minmax({"b"s, "a"s}, comp);
+ VERIFY( result.min == "a"s && result.max == "b"s );
+ result = ranges::minmax({"c"s, "b"s, "a"s}, comp);
+ VERIFY( result.min == "a"s && result.max == "c"s );
+}
+
int
main()
{
test01();
test02();
test03();
+ test04();
+ test05();
}
diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
index 3b11c0dd96c..0919f7dda8f 100644
--- a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
@@ -61,8 +61,35 @@ test01()
static_assert(ranges::minmax_element(y, y+3, {}, &X::i).max->j == 3);
}
+void
+test02()
+{
+ // Verify we perform at most 3*N/2 applications of the comparison predicate.
+ static int counter;
+ struct counted_less
+ { bool operator()(int a, int b) { ++counter; return a < b; } };
+
+ int x[] = {1,2,3,4,5,6,7,8,9,10};
+ ranges::minmax_element(x, x+2, counted_less{});
+ VERIFY( counter == 1 );
+
+ counter = 0;
+ ranges::minmax_element(x, x+3, counted_less{});
+ VERIFY( counter == 3 );
+
+ counter = 0;
+ ranges::minmax_element(x, counted_less{});
+ VERIFY( counter <= 15 );
+
+ ranges::reverse(x);
+ counter = 0;
+ ranges::minmax_element(x, counted_less{});
+ VERIFY( counter <= 15 );
+}
+
int
main()
{
test01();
+ test02();
}
--
2.32.0.93.g670b81a890
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity
2021-06-18 17:44 ` Patrick Palka
@ 2021-06-18 21:19 ` Jonathan Wakely
0 siblings, 0 replies; 7+ messages in thread
From: Jonathan Wakely @ 2021-06-18 21:19 UTC (permalink / raw)
To: Patrick Palka; +Cc: gcc Patches, libstdc++
On Fri, 18 Jun 2021 at 18:44, Patrick Palka wrote:
>
> Ping; here's the same patch with the above comment corrected.
>
> -- >8 --
>
> Subject: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison
> complexity
>
> This rewrites ranges::minmax and ranges::minmax_element so that it
> performs at most 3*N/2 many comparisons, as required by the standard.
> In passing, this also fixes PR100387 by avoiding a premature std::move
> in ranges::minmax and in std::shift_right.
>
> Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
> 10/11?
OK, for all branches, thanks.
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2021-06-18 21:19 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-05 1:42 [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity Patrick Palka
2021-05-05 9:39 ` Jonathan Wakely
2021-05-05 9:46 ` Jonathan Wakely
2021-05-05 15:33 ` Patrick Palka
2021-05-05 15:48 ` Patrick Palka
2021-06-18 17:44 ` Patrick Palka
2021-06-18 21:19 ` Jonathan Wakely
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).