public inbox for libstdc++@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity
@ 2021-05-05  1:42 Patrick Palka
  2021-05-05  9:39 ` Jonathan Wakely
  0 siblings, 1 reply; 7+ messages in thread
From: Patrick Palka @ 2021-05-05  1:42 UTC (permalink / raw)
  To: gcc-patches; +Cc: libstdc++, Patrick Palka

This rewrites ranges::minmax and ranges::minmax_element so that it
performs at most 3*N/2 many comparisons, as required by the standard.
In passing, this also fixes PR100387 by avoiding a premature std::move
in ranges::minmax and in std::shift_right.

Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
10/11?

libstdc++-v3/ChangeLog:

	PR libstdc++/100387
	* include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
	to limit comparison complexity to 3*N/2.  Avoid premature std::move.
	(__minmax_element_fn::operator()): Likewise.
	(shift_right): Avoid premature std::move of __result.
	* testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
	New tests.
	* testsuite/25_algorithms/minmax_element/constrained.cc (test02):
	Likewise.
---
 libstdc++-v3/include/bits/ranges_algo.h       | 87 ++++++++++++++-----
 .../25_algorithms/minmax/constrained.cc       | 31 +++++++
 .../minmax_element/constrained.cc             | 19 ++++
 3 files changed, 113 insertions(+), 24 deletions(-)

diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h
index cda3042c11f..bbd29127e89 100644
--- a/libstdc++-v3/include/bits/ranges_algo.h
+++ b/libstdc++-v3/include/bits/ranges_algo.h
@@ -3291,18 +3291,39 @@ namespace ranges
 	auto __first = ranges::begin(__r);
 	auto __last = ranges::end(__r);
 	__glibcxx_assert(__first != __last);
+	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
 	minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
 	while (++__first != __last)
 	  {
-	    auto __tmp = *__first;
-	    if (std::__invoke(__comp,
-			      std::__invoke(__proj, __tmp),
-			      std::__invoke(__proj, __result.min)))
-	      __result.min = std::move(__tmp);
-	    if (!(bool)std::__invoke(__comp,
-				     std::__invoke(__proj, __tmp),
-				     std::__invoke(__proj, __result.max)))
-	      __result.max = std::move(__tmp);
+	    // Process two elements at a time so that we perform at most
+	    // 3*N/2 many comparisons in total (each of the N/2 iterations
+	    // of this loop performs three comparisions).
+	    auto __val1 = *__first;
+	    if (++__first == __last)
+	      {
+		// N is odd; in this final iteration, we perform a just one
+		// comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2 comparisons.
+		if (__comp_proj(__val1, __result.min))
+		  __result.min = std::move(__val1);
+		else if (!__comp_proj(__val1, __result.max))
+		  __result.max = std::move(__val1);
+		break;
+	      }
+	    auto __val2 = *__first;
+	    if (!__comp_proj(__val2, __val1))
+	      {
+		if (__comp_proj(__val1, __result.min))
+		  __result.min = std::move(__val1);
+		if (!__comp_proj(__val2, __result.max))
+		  __result.max = std::move(__val2);
+	      }
+	    else
+	      {
+		if (__comp_proj(__val2, __result.min))
+		  __result.min = std::move(__val2);
+		if (!__comp_proj(__val1, __result.max))
+		  __result.max = std::move(__val1);
+	      }
 	  }
 	return __result;
       }
@@ -3408,21 +3429,40 @@ namespace ranges
       operator()(_Iter __first, _Sent __last,
 		 _Comp __comp = {}, _Proj __proj = {}) const
       {
-	if (__first == __last)
-	  return {__first, __first};
-
+	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
 	minmax_element_result<_Iter> __result = {__first, __first};
-	auto __i = __first;
-	while (++__i != __last)
+	if (__first == __last)
+	  return __result;
+	while (++__first != __last)
 	  {
-	    if (std::__invoke(__comp,
-			      std::__invoke(__proj, *__i),
-			      std::__invoke(__proj, *__result.min)))
-	      __result.min = __i;
-	    if (!(bool)std::__invoke(__comp,
-				     std::__invoke(__proj, *__i),
-				     std::__invoke(__proj, *__result.max)))
-	      __result.max = __i;
+	    // Process two elements at a time so that we perform at most
+	    // 3*N/2 many comparisons in total (each of the N/2 iterations
+	    // of this loop performs three comparisions).
+	    auto __prev = __first;
+	    if (++__first == __last)
+	      {
+		// N is odd; in this final iteration, we perform a just one
+		// comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2 comparisons.
+		if (__comp_proj(*__prev, *__result.min))
+		  __result.min = __prev;
+		else if (!__comp_proj(*__prev, *__result.max))
+		  __result.max = __prev;
+		break;
+	      }
+	    if (!__comp_proj(*__first, *__prev))
+	      {
+		if (__comp_proj(*__prev, *__result.min))
+		  __result.min = __prev;
+		if (!__comp_proj(*__first, *__result.max))
+		  __result.max = __first;
+	      }
+	    else
+	      {
+		if (__comp_proj(*__first, *__result.min))
+		  __result.min = __first;
+		if (!__comp_proj(*__prev, *__result.max))
+		  __result.max = __prev;
+	      }
 	  }
 	return __result;
       }
@@ -3749,8 +3789,7 @@ namespace ranges
 		  // i.e. we are shifting out at least half of the range.  In
 		  // this case we can safely perform the shift with a single
 		  // move.
-		  std::move(std::move(__first), std::move(__dest_head),
-			    std::move(__result));
+		  std::move(std::move(__first), std::move(__dest_head), __result);
 		  return __result;
 		}
 	      ++__dest_head;
diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
index 786922414b5..2373c15a9d2 100644
--- a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
@@ -19,6 +19,8 @@
 // { dg-do run { target c++2a } }
 
 #include <algorithm>
+#include <string>
+#include <vector>
 #include <testsuite_hooks.h>
 #include <testsuite_iterators.h>
 
@@ -89,10 +91,39 @@ test03()
 	  == res_t(1,4) );
 }
 
+void
+test04()
+{
+  // Verify we perform most 3*N/2 applications of the comparison predicate.
+  static int counter;
+  struct counted_less
+  { bool operator()(int a, int b) { ++counter; return a < b; } };
+
+  ranges::minmax({1,2,3,4,5,6,7,8,9,10}, counted_less{});
+  VERIFY( counter <= 15 );
+
+  counter = 0;
+  ranges::minmax({10,9,8,7,6,5,4,3,2,1}, counted_less{});
+  VERIFY( counter <= 15 );
+}
+
+void
+test05()
+{
+  // PR libstdc++/100387
+  std::vector<std::string> v{"b", "a"};
+  auto [min, max] = ranges::minmax(v, [](const auto& a, const auto& b) {
+    return a.size() == b.size() ? a.front() < b.front() : a.size() > b.size();
+  });
+  VERIFY( min == "a" && max == "b" );
+}
+
 int
 main()
 {
   test01();
   test02();
   test03();
+  test04();
+  test05();
 }
diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
index 3b11c0dd96c..89044181530 100644
--- a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
@@ -61,8 +61,27 @@ test01()
   static_assert(ranges::minmax_element(y, y+3, {}, &X::i).max->j == 3);
 }
 
+void
+test02()
+{
+  // Verify we perform most 3*N/2 applications of the comparison predicate.
+  static int counter;
+  struct counted_less
+  { bool operator()(int a, int b) { ++counter; return a < b; } };
+
+  int x[] = {1,2,3,4,5,6,7,8,9,10};
+  ranges::minmax(x, counted_less{});
+  VERIFY( counter <= 15 );
+
+  ranges::reverse(x);
+  counter = 0;
+  ranges::minmax(x, counted_less{});
+  VERIFY( counter <= 15 );
+}
+
 int
 main()
 {
   test01();
+  test02();
 }
-- 
2.31.1.442.g7e39198978


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity
  2021-05-05  1:42 [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity Patrick Palka
@ 2021-05-05  9:39 ` Jonathan Wakely
  2021-05-05  9:46   ` Jonathan Wakely
  2021-05-05 15:33   ` Patrick Palka
  0 siblings, 2 replies; 7+ messages in thread
From: Jonathan Wakely @ 2021-05-05  9:39 UTC (permalink / raw)
  To: Patrick Palka; +Cc: gcc-patches, libstdc++

On 04/05/21 21:42 -0400, Patrick Palka via Libstdc++ wrote:
>This rewrites ranges::minmax and ranges::minmax_element so that it
>performs at most 3*N/2 many comparisons, as required by the standard.
>In passing, this also fixes PR100387 by avoiding a premature std::move
>in ranges::minmax and in std::shift_right.
>
>Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
>10/11?
>
>libstdc++-v3/ChangeLog:
>
>	PR libstdc++/100387
>	* include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
>	to limit comparison complexity to 3*N/2.  Avoid premature std::move.
>	(__minmax_element_fn::operator()): Likewise.
>	(shift_right): Avoid premature std::move of __result.
>	* testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
>	New tests.
>	* testsuite/25_algorithms/minmax_element/constrained.cc (test02):
>	Likewise.
>---
> libstdc++-v3/include/bits/ranges_algo.h       | 87 ++++++++++++++-----
> .../25_algorithms/minmax/constrained.cc       | 31 +++++++
> .../minmax_element/constrained.cc             | 19 ++++
> 3 files changed, 113 insertions(+), 24 deletions(-)
>
>diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h
>index cda3042c11f..bbd29127e89 100644
>--- a/libstdc++-v3/include/bits/ranges_algo.h
>+++ b/libstdc++-v3/include/bits/ranges_algo.h
>@@ -3291,18 +3291,39 @@ namespace ranges
> 	auto __first = ranges::begin(__r);
> 	auto __last = ranges::end(__r);
> 	__glibcxx_assert(__first != __last);
>+	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> 	minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
> 	while (++__first != __last)
> 	  {
>-	    auto __tmp = *__first;
>-	    if (std::__invoke(__comp,
>-			      std::__invoke(__proj, __tmp),
>-			      std::__invoke(__proj, __result.min)))
>-	      __result.min = std::move(__tmp);
>-	    if (!(bool)std::__invoke(__comp,
>-				     std::__invoke(__proj, __tmp),
>-				     std::__invoke(__proj, __result.max)))
>-	      __result.max = std::move(__tmp);
>+	    // Process two elements at a time so that we perform at most
>+	    // 3*N/2 many comparisons in total (each of the N/2 iterations

Is "many" a typo here?

>+	    // of this loop performs three comparisions).
>+	    auto __val1 = *__first;

Can we avoid making this copy if the range satisfies forward_range, by
keeping copies of the min/max iterators, or just forwarding to
ranges::minmax_element?


>+	    if (++__first == __last)
>+	      {
>+		// N is odd; in this final iteration, we perform a just one

s/perform a just one/perform just one/

>+		// comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2 comparisons.

I find this a bit hard to parse with the inequality there.

>+		if (__comp_proj(__val1, __result.min))
>+		  __result.min = std::move(__val1);
>+		else if (!__comp_proj(__val1, __result.max))
>+		  __result.max = std::move(__val1);

This can be two comparisons, can't it? Would this be better...

   // N is odd; in this final iteration, we perform at most two
   // comparisons, for a total of 3*(N-1)/2 + 2 comparisons,
   // which is not more than 3*N/2, as required.

?

>+		break;
>+	      }
>+	    auto __val2 = *__first;
>+	    if (!__comp_proj(__val2, __val1))
>+	      {
>+		if (__comp_proj(__val1, __result.min))
>+		  __result.min = std::move(__val1);
>+		if (!__comp_proj(__val2, __result.max))
>+		  __result.max = std::move(__val2);
>+	      }
>+	    else
>+	      {
>+		if (__comp_proj(__val2, __result.min))
>+		  __result.min = std::move(__val2);
>+		if (!__comp_proj(__val1, __result.max))
>+		  __result.max = std::move(__val1);
>+	      }

I thought we might be able to simplify this to something like:

     auto __val2 = *__first;
     auto&& [__min, __max] = (*this)(__val1, __val2, __comp, __proj);
     if (__comp_proj(__min, __result.min))
       __result.min = __min;
     if (__comp_proj(__result.max, __max))
       __result.max = __max;

But it doesn't work because we need to move from __min and __max, but
the (*this)(...) returns minmax_result<const T&> and can't be moved
from.

We could get around that but it's not much of a simplification:

     range_value_t<Range> __val2 = *__first;
     auto [__min, __max] = (*this)(std::addressof(__val1),
                                   std::addressof(__val2),
                                   __comp,
                                   [](auto __p) -> const auto& {
                                     return *__p;
                                   });
     if (__comp_proj(*__min, __result.min))
       __result.min = std::move(*__min);
     if (__comp_proj(__result.max, *__max))
       __result.max = std::move(*__max);

> 	  }
> 	return __result;
>       }
>@@ -3408,21 +3429,40 @@ namespace ranges
>       operator()(_Iter __first, _Sent __last,
> 		 _Comp __comp = {}, _Proj __proj = {}) const
>       {
>-	if (__first == __last)
>-	  return {__first, __first};
>-
>+	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> 	minmax_element_result<_Iter> __result = {__first, __first};
>-	auto __i = __first;
>-	while (++__i != __last)
>+	if (__first == __last)
>+	  return __result;
>+	while (++__first != __last)
> 	  {
>-	    if (std::__invoke(__comp,
>-			      std::__invoke(__proj, *__i),
>-			      std::__invoke(__proj, *__result.min)))
>-	      __result.min = __i;
>-	    if (!(bool)std::__invoke(__comp,
>-				     std::__invoke(__proj, *__i),
>-				     std::__invoke(__proj, *__result.max)))
>-	      __result.max = __i;
>+	    // Process two elements at a time so that we perform at most
>+	    // 3*N/2 many comparisons in total (each of the N/2 iterations
>+	    // of this loop performs three comparisions).
>+	    auto __prev = __first;
>+	    if (++__first == __last)
>+	      {
>+		// N is odd; in this final iteration, we perform a just one
>+		// comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2 comparisons.

Same comments on the comments as above.

>+		if (__comp_proj(*__prev, *__result.min))
>+		  __result.min = __prev;
>+		else if (!__comp_proj(*__prev, *__result.max))
>+		  __result.max = __prev;
>+		break;
>+	      }
>+	    if (!__comp_proj(*__first, *__prev))
>+	      {
>+		if (__comp_proj(*__prev, *__result.min))
>+		  __result.min = __prev;
>+		if (!__comp_proj(*__first, *__result.max))
>+		  __result.max = __first;
>+	      }
>+	    else
>+	      {
>+		if (__comp_proj(*__first, *__result.min))
>+		  __result.min = __first;
>+		if (!__comp_proj(*__prev, *__result.max))
>+		  __result.max = __prev;
>+	      }

We don't need to move anything here, so this could be written using
ranges::minmax. I'm not sure it is an improvement though (except for
being slightly fewer lines of code):

     auto __mm = minmax(__prev, __first, __comp,
                        [](auto&& __it) -> auto&& { return *__it; });

     if (__comp_proj(*__mm.min, *__result.min))
       __result.min = __mm.min;
     if (__comp_proj(*__result.max, *__mm.max))
       __result.max = __mm.max;


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity
  2021-05-05  9:39 ` Jonathan Wakely
@ 2021-05-05  9:46   ` Jonathan Wakely
  2021-05-05 15:33   ` Patrick Palka
  1 sibling, 0 replies; 7+ messages in thread
From: Jonathan Wakely @ 2021-05-05  9:46 UTC (permalink / raw)
  To: Patrick Palka; +Cc: gcc-patches, libstdc++

On 05/05/21 10:39 +0100, Jonathan Wakely wrote:
>On 04/05/21 21:42 -0400, Patrick Palka via Libstdc++ wrote:
>>This rewrites ranges::minmax and ranges::minmax_element so that it
>>performs at most 3*N/2 many comparisons, as required by the standard.
>>In passing, this also fixes PR100387 by avoiding a premature std::move
>>in ranges::minmax and in std::shift_right.
>>
>>Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
>>10/11?
>>
>>libstdc++-v3/ChangeLog:
>>
>>	PR libstdc++/100387
>>	* include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
>>	to limit comparison complexity to 3*N/2.  Avoid premature std::move.
>>	(__minmax_element_fn::operator()): Likewise.
>>	(shift_right): Avoid premature std::move of __result.
>>	* testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
>>	New tests.
>>	* testsuite/25_algorithms/minmax_element/constrained.cc (test02):
>>	Likewise.
>>---
>>libstdc++-v3/include/bits/ranges_algo.h       | 87 ++++++++++++++-----
>>.../25_algorithms/minmax/constrained.cc       | 31 +++++++
>>.../minmax_element/constrained.cc             | 19 ++++
>>3 files changed, 113 insertions(+), 24 deletions(-)
>>
>>diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h
>>index cda3042c11f..bbd29127e89 100644
>>--- a/libstdc++-v3/include/bits/ranges_algo.h
>>+++ b/libstdc++-v3/include/bits/ranges_algo.h
>>@@ -3291,18 +3291,39 @@ namespace ranges
>>	auto __first = ranges::begin(__r);
>>	auto __last = ranges::end(__r);
>>	__glibcxx_assert(__first != __last);
>>+	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
>>	minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
>>	while (++__first != __last)
>>	  {
>>-	    auto __tmp = *__first;
>>-	    if (std::__invoke(__comp,
>>-			      std::__invoke(__proj, __tmp),
>>-			      std::__invoke(__proj, __result.min)))
>>-	      __result.min = std::move(__tmp);
>>-	    if (!(bool)std::__invoke(__comp,
>>-				     std::__invoke(__proj, __tmp),
>>-				     std::__invoke(__proj, __result.max)))
>>-	      __result.max = std::move(__tmp);
>>+	    // Process two elements at a time so that we perform at most
>>+	    // 3*N/2 many comparisons in total (each of the N/2 iterations
>
>Is "many" a typo here?
>
>>+	    // of this loop performs three comparisions).
>>+	    auto __val1 = *__first;
>
>Can we avoid making this copy if the range satisfies forward_range, by
>keeping copies of the min/max iterators, or just forwarding to
>ranges::minmax_element?

Hmm, on the other hand, for a forward range of ints we're probably
better off jsut making the copy here and not going through the
indirections done by minmax_element.

Maybe something like:

   if constexpr (forward_range<_Range> && is_scalar_v<range_value_t<_Range>>)



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity
  2021-05-05  9:39 ` Jonathan Wakely
  2021-05-05  9:46   ` Jonathan Wakely
@ 2021-05-05 15:33   ` Patrick Palka
  2021-05-05 15:48     ` Patrick Palka
  1 sibling, 1 reply; 7+ messages in thread
From: Patrick Palka @ 2021-05-05 15:33 UTC (permalink / raw)
  To: Jonathan Wakely; +Cc: Patrick Palka, gcc-patches, libstdc++

On Wed, 5 May 2021, Jonathan Wakely wrote:

> On 04/05/21 21:42 -0400, Patrick Palka via Libstdc++ wrote:
> > This rewrites ranges::minmax and ranges::minmax_element so that it
> > performs at most 3*N/2 many comparisons, as required by the standard.
> > In passing, this also fixes PR100387 by avoiding a premature std::move
> > in ranges::minmax and in std::shift_right.
> > 
> > Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
> > 10/11?
> > 
> > libstdc++-v3/ChangeLog:
> > 
> > 	PR libstdc++/100387
> > 	* include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
> > 	to limit comparison complexity to 3*N/2.  Avoid premature std::move.
> > 	(__minmax_element_fn::operator()): Likewise.
> > 	(shift_right): Avoid premature std::move of __result.
> > 	* testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
> > 	New tests.
> > 	* testsuite/25_algorithms/minmax_element/constrained.cc (test02):
> > 	Likewise.
> > ---
> > libstdc++-v3/include/bits/ranges_algo.h       | 87 ++++++++++++++-----
> > .../25_algorithms/minmax/constrained.cc       | 31 +++++++
> > .../minmax_element/constrained.cc             | 19 ++++
> > 3 files changed, 113 insertions(+), 24 deletions(-)
> > 
> > diff --git a/libstdc++-v3/include/bits/ranges_algo.h
> > b/libstdc++-v3/include/bits/ranges_algo.h
> > index cda3042c11f..bbd29127e89 100644
> > --- a/libstdc++-v3/include/bits/ranges_algo.h
> > +++ b/libstdc++-v3/include/bits/ranges_algo.h
> > @@ -3291,18 +3291,39 @@ namespace ranges
> > 	auto __first = ranges::begin(__r);
> > 	auto __last = ranges::end(__r);
> > 	__glibcxx_assert(__first != __last);
> > +	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> > 	minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
> > 	while (++__first != __last)
> > 	  {
> > -	    auto __tmp = *__first;
> > -	    if (std::__invoke(__comp,
> > -			      std::__invoke(__proj, __tmp),
> > -			      std::__invoke(__proj, __result.min)))
> > -	      __result.min = std::move(__tmp);
> > -	    if (!(bool)std::__invoke(__comp,
> > -				     std::__invoke(__proj, __tmp),
> > -				     std::__invoke(__proj, __result.max)))
> > -	      __result.max = std::move(__tmp);
> > +	    // Process two elements at a time so that we perform at most
> > +	    // 3*N/2 many comparisons in total (each of the N/2 iterations
> 
> Is "many" a typo here?

Just a bad habit of mine to usually write "<count> many" instead of just
"<count>" :) Consider the "many" removed.

> 
> > +	    // of this loop performs three comparisions).
> > +	    auto __val1 = *__first;
> 
> Can we avoid making this copy if the range satisfies forward_range, by
> keeping copies of the min/max iterators, or just forwarding to
> ranges::minmax_element?

Maybe we can make __val1 and __val2 universal references?  Ah, but then
__val1 would potentially be invalidated after incrementing __first.  I
think it should be safe to make __val2 a universal reference though.
I've done this in v2 below.

Forwarding to ranges::minmax_element seems like it would be profitable
in some situations, e.g if the value type isn't trivially copyable.  I
can do this in a followup patch for ranges::max/max_element and
ranges::min/min_element too, they should all use the same heuristic.

> 
> 
> > +	    if (++__first == __last)
> > +	      {
> > +		// N is odd; in this final iteration, we perform a just one
> 
> s/perform a just one/perform just one/

Fixed.

> 
> > +		// comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2
> > comparisons.
> 
> I find this a bit hard to parse with the inequality there.

Removed.

> 
> > +		if (__comp_proj(__val1, __result.min))
> > +		  __result.min = std::move(__val1);
> > +		else if (!__comp_proj(__val1, __result.max))
> > +		  __result.max = std::move(__val1);
> 
> This can be two comparisons, can't it? Would this be better...

Whoops, yeah...

> 
>   // N is odd; in this final iteration, we perform at most two
>   // comparisons, for a total of 3*(N-1)/2 + 2 comparisons,
>   // which is not more than 3*N/2, as required.
> 
> ?

Ah, but then the total is more than 3*N/2 :(  And I think we reach this
case really when N is even, not odd (sorry, I really botched this
patch).

And when N=2 in particular, we perform up two comparisons instead of
three, but actually a single comparison should suffice in this case.  I
think all this is fixed in v2 below by handling the second element in
the range specially.

> 
> > +		break;
> > +	      }
> > +	    auto __val2 = *__first;
> > +	    if (!__comp_proj(__val2, __val1))
> > +	      {
> > +		if (__comp_proj(__val1, __result.min))
> > +		  __result.min = std::move(__val1);
> > +		if (!__comp_proj(__val2, __result.max))
> > +		  __result.max = std::move(__val2);
> > +	      }
> > +	    else
> > +	      {
> > +		if (__comp_proj(__val2, __result.min))
> > +		  __result.min = std::move(__val2);
> > +		if (!__comp_proj(__val1, __result.max))
> > +		  __result.max = std::move(__val1);
> > +	      }
> 
> I thought we might be able to simplify this to something like:
> 
>     auto __val2 = *__first;
>     auto&& [__min, __max] = (*this)(__val1, __val2, __comp, __proj);
>     if (__comp_proj(__min, __result.min))
>       __result.min = __min;
>     if (__comp_proj(__result.max, __max))
>       __result.max = __max;
> 
> But it doesn't work because we need to move from __min and __max, but
> the (*this)(...) returns minmax_result<const T&> and can't be moved
> from.
> 
> We could get around that but it's not much of a simplification:
> 
>     range_value_t<Range> __val2 = *__first;
>     auto [__min, __max] = (*this)(std::addressof(__val1),
>                                   std::addressof(__val2),
>                                   __comp,
>                                   [](auto __p) -> const auto& {
>                                     return *__p;
>                                   });
>     if (__comp_proj(*__min, __result.min))
>       __result.min = std::move(*__min);
>     if (__comp_proj(__result.max, *__max))
>       __result.max = std::move(*__max);

Hmm, now that __val2 was made a universal reference, this simplifcation
might not work.

> 
> > 	  }
> > 	return __result;
> >       }
> > @@ -3408,21 +3429,40 @@ namespace ranges
> >       operator()(_Iter __first, _Sent __last,
> > 		 _Comp __comp = {}, _Proj __proj = {}) const
> >       {
> > -	if (__first == __last)
> > -	  return {__first, __first};
> > -
> > +	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> > 	minmax_element_result<_Iter> __result = {__first, __first};
> > -	auto __i = __first;
> > -	while (++__i != __last)
> > +	if (__first == __last)
> > +	  return __result;
> > +	while (++__first != __last)
> > 	  {
> > -	    if (std::__invoke(__comp,
> > -			      std::__invoke(__proj, *__i),
> > -			      std::__invoke(__proj, *__result.min)))
> > -	      __result.min = __i;
> > -	    if (!(bool)std::__invoke(__comp,
> > -				     std::__invoke(__proj, *__i),
> > -				     std::__invoke(__proj, *__result.max)))
> > -	      __result.max = __i;
> > +	    // Process two elements at a time so that we perform at most
> > +	    // 3*N/2 many comparisons in total (each of the N/2 iterations
> > +	    // of this loop performs three comparisions).
> > +	    auto __prev = __first;
> > +	    if (++__first == __last)
> > +	      {
> > +		// N is odd; in this final iteration, we perform a just one
> > +		// comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2
> > comparisons.
> 
> Same comments on the comments as above.

Fixed.

> 
> > +		if (__comp_proj(*__prev, *__result.min))
> > +		  __result.min = __prev;
> > +		else if (!__comp_proj(*__prev, *__result.max))
> > +		  __result.max = __prev;
> > +		break;
> > +	      }
> > +	    if (!__comp_proj(*__first, *__prev))
> > +	      {
> > +		if (__comp_proj(*__prev, *__result.min))
> > +		  __result.min = __prev;
> > +		if (!__comp_proj(*__first, *__result.max))
> > +		  __result.max = __first;
> > +	      }
> > +	    else
> > +	      {
> > +		if (__comp_proj(*__first, *__result.min))
> > +		  __result.min = __first;
> > +		if (!__comp_proj(*__prev, *__result.max))
> > +		  __result.max = __prev;
> > +	      }
> 
> We don't need to move anything here, so this could be written using
> ranges::minmax. I'm not sure it is an improvement though (except for
> being slightly fewer lines of code):
> 
>     auto __mm = minmax(__prev, __first, __comp,
>                        [](auto&& __it) -> auto&& { return *__it; });
> 
>     if (__comp_proj(*__mm.min, *__result.min))
>       __result.min = __mm.min;
>     if (__comp_proj(*__result.max, *__mm.max))
>       __result.max = __mm.max;

Hmm, it's shorter, but on the other hand it'd make the implementations
of minmax and minmax_element no longer mirror each other as closely.  So
I didn't make this change for now.

Here's v2, which makes the following additional changes:

  * Fixes comment typos, and some nearby indentation in the signature
    of minmax
  * Makes both minmax and minmax_element handle the second element
    specially in order to optimally perform just a single comparison
    when N=2 and to always stay below 3*N/2 comparisons
  * Avoids using a deduced return type for __val1
  * Makes __val2 a universal reference to avoid a move
  * Adds comparison complexity tests for the N=2,3 cases

-- >8 --

Subject: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison
 complexity

This rewrites ranges::minmax and ranges::minmax_element so that it
performs at most 3*N/2 many comparisons, as required by the standard.
In passing, this also fixes PR100387 by avoiding a premature std::move
in ranges::minmax and in std::shift_right.

Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
10/11?

libstdc++-v3/ChangeLog:

	PR libstdc++/100387
	* include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
	to limit comparison complexity to 3*N/2.
	(__minmax_element_fn::operator()): Likewise.
	(shift_right): Avoid premature std::move of __result.
	* testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
	New tests.
	* testsuite/25_algorithms/minmax_element/constrained.cc (test02):
	Likewise.
---
 libstdc++-v3/include/bits/ranges_algo.h       | 113 ++++++++++++++----
 .../25_algorithms/minmax/constrained.cc       |  42 +++++++
 .../minmax_element/constrained.cc             |  27 +++++
 3 files changed, 156 insertions(+), 26 deletions(-)

diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h
index cda3042c11f..2091cbf5b4e 100644
--- a/libstdc++-v3/include/bits/ranges_algo.h
+++ b/libstdc++-v3/include/bits/ranges_algo.h
@@ -3283,26 +3283,59 @@ namespace ranges
     template<input_range _Range, typename _Proj = identity,
 	     indirect_strict_weak_order<projected<iterator_t<_Range>, _Proj>>
 	       _Comp = ranges::less>
-      requires indirectly_copyable_storable<iterator_t<_Range>,
-      range_value_t<_Range>*>
+      requires indirectly_copyable_storable<iterator_t<_Range>, range_value_t<_Range>*>
       constexpr minmax_result<range_value_t<_Range>>
       operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const
       {
 	auto __first = ranges::begin(__r);
 	auto __last = ranges::end(__r);
 	__glibcxx_assert(__first != __last);
+	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
 	minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
+	if (++__first == __last)
+	  return __result;
+	else
+	  {
+	    // At this point __result.min == __result.max, so a single
+	    // comparison with the next element suffices.
+	    auto&& __val = *__first;
+	    if (__comp_proj(__val, __result.min))
+	      __result.min = std::forward<decltype(__val)>(__val);
+	    else
+	      __result.max = std::forward<decltype(__val)>(__val);
+	  }
 	while (++__first != __last)
 	  {
-	    auto __tmp = *__first;
-	    if (std::__invoke(__comp,
-			      std::__invoke(__proj, __tmp),
-			      std::__invoke(__proj, __result.min)))
-	      __result.min = std::move(__tmp);
-	    if (!(bool)std::__invoke(__comp,
-				     std::__invoke(__proj, __tmp),
-				     std::__invoke(__proj, __result.max)))
-	      __result.max = std::move(__tmp);
+	    // Now process two elements at a time so that we perform at most
+	    // 3*(N-2)/2 comparisons in total (each of the (N-2)/2 iterations
+	    // of this loop performs three comparisons).
+	    range_value_t<_Range> __val1 = *__first;
+	    if (++__first == __last)
+	      {
+		// N is odd; in this final iteration, we perform at most two
+		// comparisons, for a total of 1 + 3*(N-3)/2 + 2 comparisons,
+		// which is not more than 3*N/2, as required.
+		if (__comp_proj(__val1, __result.min))
+		  __result.min = std::move(__val1);
+		else if (!__comp_proj(__val1, __result.max))
+		  __result.max = std::move(__val1);
+		break;
+	      }
+	    auto&& __val2 = *__first;
+	    if (!__comp_proj(__val2, __val1))
+	      {
+		if (__comp_proj(__val1, __result.min))
+		  __result.min = std::move(__val1);
+		if (!__comp_proj(__val2, __result.max))
+		  __result.max = std::forward<decltype(__val2)>(__val2);
+	      }
+	    else
+	      {
+		if (__comp_proj(__val2, __result.min))
+		  __result.min = std::forward<decltype(__val2)>(__val2);
+		if (!__comp_proj(__val1, __result.max))
+		  __result.max = std::move(__val1);
+	      }
 	  }
 	return __result;
       }
@@ -3408,21 +3441,50 @@ namespace ranges
       operator()(_Iter __first, _Sent __last,
 		 _Comp __comp = {}, _Proj __proj = {}) const
       {
-	if (__first == __last)
-	  return {__first, __first};
-
+	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
 	minmax_element_result<_Iter> __result = {__first, __first};
-	auto __i = __first;
-	while (++__i != __last)
+	if (__first == __last || ++__first == __last)
+	  return __result;
+	else
 	  {
-	    if (std::__invoke(__comp,
-			      std::__invoke(__proj, *__i),
-			      std::__invoke(__proj, *__result.min)))
-	      __result.min = __i;
-	    if (!(bool)std::__invoke(__comp,
-				     std::__invoke(__proj, *__i),
-				     std::__invoke(__proj, *__result.max)))
-	      __result.max = __i;
+	    // At this point __result.min == __result.max, so a single
+	    // comparison with the next element suffices.
+	    if (__comp_proj(*__first, *__result.min))
+	      __result.min = __first;
+	    else
+	      __result.max = __first;
+	  }
+	while (++__first != __last)
+	  {
+	    // Now process two elements at a time so that we perform at most
+	    // 3*(N-2)/2 comparisons in total (each of the (N-2)/2 iterations
+	    // of this loop performs three comparisons).
+	    auto __prev = __first;
+	    if (++__first == __last)
+	      {
+		// N is odd; in this final iteration, we perform at most two
+		// comparisons, for a total of 1 + 3*(N-3)/2 + 2 comparisons,
+		// which is not more than 3*N/2, as required.
+		if (__comp_proj(*__prev, *__result.min))
+		  __result.min = __prev;
+		else if (!__comp_proj(*__prev, *__result.max))
+		  __result.max = __prev;
+		break;
+	      }
+	    if (!__comp_proj(*__first, *__prev))
+	      {
+		if (__comp_proj(*__prev, *__result.min))
+		  __result.min = __prev;
+		if (!__comp_proj(*__first, *__result.max))
+		  __result.max = __first;
+	      }
+	    else
+	      {
+		if (__comp_proj(*__first, *__result.min))
+		  __result.min = __first;
+		if (!__comp_proj(*__prev, *__result.max))
+		  __result.max = __prev;
+	      }
 	  }
 	return __result;
       }
@@ -3749,8 +3811,7 @@ namespace ranges
 		  // i.e. we are shifting out at least half of the range.  In
 		  // this case we can safely perform the shift with a single
 		  // move.
-		  std::move(std::move(__first), std::move(__dest_head),
-			    std::move(__result));
+		  std::move(std::move(__first), std::move(__dest_head), __result);
 		  return __result;
 		}
 	      ++__dest_head;
diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
index 786922414b5..c365152bf2b 100644
--- a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
@@ -19,6 +19,8 @@
 // { dg-do run { target c++2a } }
 
 #include <algorithm>
+#include <string>
+#include <vector>
 #include <testsuite_hooks.h>
 #include <testsuite_iterators.h>
 
@@ -89,10 +91,50 @@ test03()
 	  == res_t(1,4) );
 }
 
+void
+test04()
+{
+  // Verify we perform at most 3*N/2 applications of the comparison predicate.
+  static int counter;
+  struct counted_less
+  { bool operator()(int a, int b) { ++counter; return a < b; } };
+
+  ranges::minmax({1,2}, counted_less{});
+  VERIFY( counter == 1 );
+
+  counter = 0;
+  ranges::minmax({1,2,3}, counted_less{});
+  VERIFY( counter == 3 );
+
+  counter = 0;
+  ranges::minmax({1,2,3,4,5,6,7,8,9,10}, counted_less{});
+  VERIFY( counter <= 15 );
+
+  counter = 0;
+  ranges::minmax({10,9,8,7,6,5,4,3,2,1}, counted_less{});
+  VERIFY( counter <= 15 );
+}
+
+void
+test05()
+{
+  // PR libstdc++/100387
+  using namespace std::literals::string_literals;
+  auto comp = [](const auto& a, const auto& b) {
+    return a.size() == b.size() ? a.front() < b.front() : a.size() > b.size();
+  };
+  auto result = ranges::minmax({"b"s, "a"s}, comp);
+  VERIFY( result.min == "a"s && result.max == "b"s );
+  result = ranges::minmax({"c"s, "b"s, "a"s}, comp);
+  VERIFY( result.min == "a"s && result.max == "c"s );
+}
+
 int
 main()
 {
   test01();
   test02();
   test03();
+  test04();
+  test05();
 }
diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
index 3b11c0dd96c..0919f7dda8f 100644
--- a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
@@ -61,8 +61,35 @@ test01()
   static_assert(ranges::minmax_element(y, y+3, {}, &X::i).max->j == 3);
 }
 
+void
+test02()
+{
+  // Verify we perform at most 3*N/2 applications of the comparison predicate.
+  static int counter;
+  struct counted_less
+  { bool operator()(int a, int b) { ++counter; return a < b; } };
+
+  int x[] = {1,2,3,4,5,6,7,8,9,10};
+  ranges::minmax_element(x, x+2, counted_less{});
+  VERIFY( counter == 1 );
+
+  counter = 0;
+  ranges::minmax_element(x, x+3, counted_less{});
+  VERIFY( counter == 3 );
+
+  counter = 0;
+  ranges::minmax_element(x, counted_less{});
+  VERIFY( counter <= 15 );
+
+  ranges::reverse(x);
+  counter = 0;
+  ranges::minmax_element(x, counted_less{});
+  VERIFY( counter <= 15 );
+}
+
 int
 main()
 {
   test01();
+  test02();
 }
-- 
2.31.1.442.g7e39198978


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity
  2021-05-05 15:33   ` Patrick Palka
@ 2021-05-05 15:48     ` Patrick Palka
  2021-06-18 17:44       ` Patrick Palka
  0 siblings, 1 reply; 7+ messages in thread
From: Patrick Palka @ 2021-05-05 15:48 UTC (permalink / raw)
  To: Patrick Palka; +Cc: Jonathan Wakely, gcc-patches, libstdc++

On Wed, 5 May 2021, Patrick Palka wrote:

> On Wed, 5 May 2021, Jonathan Wakely wrote:
> 
> > On 04/05/21 21:42 -0400, Patrick Palka via Libstdc++ wrote:
> > > This rewrites ranges::minmax and ranges::minmax_element so that it
> > > performs at most 3*N/2 many comparisons, as required by the standard.
> > > In passing, this also fixes PR100387 by avoiding a premature std::move
> > > in ranges::minmax and in std::shift_right.
> > > 
> > > Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
> > > 10/11?
> > > 
> > > libstdc++-v3/ChangeLog:
> > > 
> > > 	PR libstdc++/100387
> > > 	* include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
> > > 	to limit comparison complexity to 3*N/2.  Avoid premature std::move.
> > > 	(__minmax_element_fn::operator()): Likewise.
> > > 	(shift_right): Avoid premature std::move of __result.
> > > 	* testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
> > > 	New tests.
> > > 	* testsuite/25_algorithms/minmax_element/constrained.cc (test02):
> > > 	Likewise.
> > > ---
> > > libstdc++-v3/include/bits/ranges_algo.h       | 87 ++++++++++++++-----
> > > .../25_algorithms/minmax/constrained.cc       | 31 +++++++
> > > .../minmax_element/constrained.cc             | 19 ++++
> > > 3 files changed, 113 insertions(+), 24 deletions(-)
> > > 
> > > diff --git a/libstdc++-v3/include/bits/ranges_algo.h
> > > b/libstdc++-v3/include/bits/ranges_algo.h
> > > index cda3042c11f..bbd29127e89 100644
> > > --- a/libstdc++-v3/include/bits/ranges_algo.h
> > > +++ b/libstdc++-v3/include/bits/ranges_algo.h
> > > @@ -3291,18 +3291,39 @@ namespace ranges
> > > 	auto __first = ranges::begin(__r);
> > > 	auto __last = ranges::end(__r);
> > > 	__glibcxx_assert(__first != __last);
> > > +	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> > > 	minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
> > > 	while (++__first != __last)
> > > 	  {
> > > -	    auto __tmp = *__first;
> > > -	    if (std::__invoke(__comp,
> > > -			      std::__invoke(__proj, __tmp),
> > > -			      std::__invoke(__proj, __result.min)))
> > > -	      __result.min = std::move(__tmp);
> > > -	    if (!(bool)std::__invoke(__comp,
> > > -				     std::__invoke(__proj, __tmp),
> > > -				     std::__invoke(__proj, __result.max)))
> > > -	      __result.max = std::move(__tmp);
> > > +	    // Process two elements at a time so that we perform at most
> > > +	    // 3*N/2 many comparisons in total (each of the N/2 iterations
> > 
> > Is "many" a typo here?
> 
> Just a bad habit of mine to usually write "<count> many" instead of just
> "<count>" :) Consider the "many" removed.
> 
> > 
> > > +	    // of this loop performs three comparisions).
> > > +	    auto __val1 = *__first;
> > 
> > Can we avoid making this copy if the range satisfies forward_range, by
> > keeping copies of the min/max iterators, or just forwarding to
> > ranges::minmax_element?
> 
> Maybe we can make __val1 and __val2 universal references?  Ah, but then
> __val1 would potentially be invalidated after incrementing __first.  I
> think it should be safe to make __val2 a universal reference though.
> I've done this in v2 below.
> 
> Forwarding to ranges::minmax_element seems like it would be profitable
> in some situations, e.g if the value type isn't trivially copyable.  I
> can do this in a followup patch for ranges::max/max_element and
> ranges::min/min_element too, they should all use the same heuristic.
> 
> > 
> > 
> > > +	    if (++__first == __last)
> > > +	      {
> > > +		// N is odd; in this final iteration, we perform a just one
> > 
> > s/perform a just one/perform just one/
> 
> Fixed.
> 
> > 
> > > +		// comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2
> > > comparisons.
> > 
> > I find this a bit hard to parse with the inequality there.
> 
> Removed.
> 
> > 
> > > +		if (__comp_proj(__val1, __result.min))
> > > +		  __result.min = std::move(__val1);
> > > +		else if (!__comp_proj(__val1, __result.max))
> > > +		  __result.max = std::move(__val1);
> > 
> > This can be two comparisons, can't it? Would this be better...
> 
> Whoops, yeah...
> 
> > 
> >   // N is odd; in this final iteration, we perform at most two
> >   // comparisons, for a total of 3*(N-1)/2 + 2 comparisons,
> >   // which is not more than 3*N/2, as required.
> > 
> > ?
> 
> Ah, but then the total is more than 3*N/2 :(  And I think we reach this
> case really when N is even, not odd (sorry, I really botched this
> patch).
> 
> And when N=2 in particular, we perform up two comparisons instead of
> three, but actually a single comparison should suffice in this case.  I
> think all this is fixed in v2 below by handling the second element in
> the range specially.
> 
> > 
> > > +		break;
> > > +	      }
> > > +	    auto __val2 = *__first;
> > > +	    if (!__comp_proj(__val2, __val1))
> > > +	      {
> > > +		if (__comp_proj(__val1, __result.min))
> > > +		  __result.min = std::move(__val1);
> > > +		if (!__comp_proj(__val2, __result.max))
> > > +		  __result.max = std::move(__val2);
> > > +	      }
> > > +	    else
> > > +	      {
> > > +		if (__comp_proj(__val2, __result.min))
> > > +		  __result.min = std::move(__val2);
> > > +		if (!__comp_proj(__val1, __result.max))
> > > +		  __result.max = std::move(__val1);
> > > +	      }
> > 
> > I thought we might be able to simplify this to something like:
> > 
> >     auto __val2 = *__first;
> >     auto&& [__min, __max] = (*this)(__val1, __val2, __comp, __proj);
> >     if (__comp_proj(__min, __result.min))
> >       __result.min = __min;
> >     if (__comp_proj(__result.max, __max))
> >       __result.max = __max;
> > 
> > But it doesn't work because we need to move from __min and __max, but
> > the (*this)(...) returns minmax_result<const T&> and can't be moved
> > from.
> > 
> > We could get around that but it's not much of a simplification:
> > 
> >     range_value_t<Range> __val2 = *__first;
> >     auto [__min, __max] = (*this)(std::addressof(__val1),
> >                                   std::addressof(__val2),
> >                                   __comp,
> >                                   [](auto __p) -> const auto& {
> >                                     return *__p;
> >                                   });
> >     if (__comp_proj(*__min, __result.min))
> >       __result.min = std::move(*__min);
> >     if (__comp_proj(__result.max, *__max))
> >       __result.max = std::move(*__max);
> 
> Hmm, now that __val2 was made a universal reference, this simplifcation
> might not work.
> 
> > 
> > > 	  }
> > > 	return __result;
> > >       }
> > > @@ -3408,21 +3429,40 @@ namespace ranges
> > >       operator()(_Iter __first, _Sent __last,
> > > 		 _Comp __comp = {}, _Proj __proj = {}) const
> > >       {
> > > -	if (__first == __last)
> > > -	  return {__first, __first};
> > > -
> > > +	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> > > 	minmax_element_result<_Iter> __result = {__first, __first};
> > > -	auto __i = __first;
> > > -	while (++__i != __last)
> > > +	if (__first == __last)
> > > +	  return __result;
> > > +	while (++__first != __last)
> > > 	  {
> > > -	    if (std::__invoke(__comp,
> > > -			      std::__invoke(__proj, *__i),
> > > -			      std::__invoke(__proj, *__result.min)))
> > > -	      __result.min = __i;
> > > -	    if (!(bool)std::__invoke(__comp,
> > > -				     std::__invoke(__proj, *__i),
> > > -				     std::__invoke(__proj, *__result.max)))
> > > -	      __result.max = __i;
> > > +	    // Process two elements at a time so that we perform at most
> > > +	    // 3*N/2 many comparisons in total (each of the N/2 iterations
> > > +	    // of this loop performs three comparisions).
> > > +	    auto __prev = __first;
> > > +	    if (++__first == __last)
> > > +	      {
> > > +		// N is odd; in this final iteration, we perform a just one
> > > +		// comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2
> > > comparisons.
> > 
> > Same comments on the comments as above.
> 
> Fixed.
> 
> > 
> > > +		if (__comp_proj(*__prev, *__result.min))
> > > +		  __result.min = __prev;
> > > +		else if (!__comp_proj(*__prev, *__result.max))
> > > +		  __result.max = __prev;
> > > +		break;
> > > +	      }
> > > +	    if (!__comp_proj(*__first, *__prev))
> > > +	      {
> > > +		if (__comp_proj(*__prev, *__result.min))
> > > +		  __result.min = __prev;
> > > +		if (!__comp_proj(*__first, *__result.max))
> > > +		  __result.max = __first;
> > > +	      }
> > > +	    else
> > > +	      {
> > > +		if (__comp_proj(*__first, *__result.min))
> > > +		  __result.min = __first;
> > > +		if (!__comp_proj(*__prev, *__result.max))
> > > +		  __result.max = __prev;
> > > +	      }
> > 
> > We don't need to move anything here, so this could be written using
> > ranges::minmax. I'm not sure it is an improvement though (except for
> > being slightly fewer lines of code):
> > 
> >     auto __mm = minmax(__prev, __first, __comp,
> >                        [](auto&& __it) -> auto&& { return *__it; });
> > 
> >     if (__comp_proj(*__mm.min, *__result.min))
> >       __result.min = __mm.min;
> >     if (__comp_proj(*__result.max, *__mm.max))
> >       __result.max = __mm.max;
> 
> Hmm, it's shorter, but on the other hand it'd make the implementations
> of minmax and minmax_element no longer mirror each other as closely.  So
> I didn't make this change for now.
> 
> Here's v2, which makes the following additional changes:
> 
>   * Fixes comment typos, and some nearby indentation in the signature
>     of minmax
>   * Makes both minmax and minmax_element handle the second element
>     specially in order to optimally perform just a single comparison
>     when N=2 and to always stay below 3*N/2 comparisons
>   * Avoids using a deduced return type for __val1
>   * Makes __val2 a universal reference to avoid a move
>   * Adds comparison complexity tests for the N=2,3 cases
> 
> -- >8 --
> 
> Subject: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison
>  complexity
> 
> This rewrites ranges::minmax and ranges::minmax_element so that it
> performs at most 3*N/2 many comparisons, as required by the standard.
> In passing, this also fixes PR100387 by avoiding a premature std::move
> in ranges::minmax and in std::shift_right.
> 
> Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
> 10/11?
> 
> libstdc++-v3/ChangeLog:
> 
> 	PR libstdc++/100387
> 	* include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
> 	to limit comparison complexity to 3*N/2.
> 	(__minmax_element_fn::operator()): Likewise.
> 	(shift_right): Avoid premature std::move of __result.
> 	* testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
> 	New tests.
> 	* testsuite/25_algorithms/minmax_element/constrained.cc (test02):
> 	Likewise.
> ---
>  libstdc++-v3/include/bits/ranges_algo.h       | 113 ++++++++++++++----
>  .../25_algorithms/minmax/constrained.cc       |  42 +++++++
>  .../minmax_element/constrained.cc             |  27 +++++
>  3 files changed, 156 insertions(+), 26 deletions(-)
> 
> diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h
> index cda3042c11f..2091cbf5b4e 100644
> --- a/libstdc++-v3/include/bits/ranges_algo.h
> +++ b/libstdc++-v3/include/bits/ranges_algo.h
> @@ -3283,26 +3283,59 @@ namespace ranges
>      template<input_range _Range, typename _Proj = identity,
>  	     indirect_strict_weak_order<projected<iterator_t<_Range>, _Proj>>
>  	       _Comp = ranges::less>
> -      requires indirectly_copyable_storable<iterator_t<_Range>,
> -      range_value_t<_Range>*>
> +      requires indirectly_copyable_storable<iterator_t<_Range>, range_value_t<_Range>*>
>        constexpr minmax_result<range_value_t<_Range>>
>        operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const
>        {
>  	auto __first = ranges::begin(__r);
>  	auto __last = ranges::end(__r);
>  	__glibcxx_assert(__first != __last);
> +	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
>  	minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
> +	if (++__first == __last)
> +	  return __result;
> +	else
> +	  {
> +	    // At this point __result.min == __result.max, so a single
> +	    // comparison with the next element suffices.
> +	    auto&& __val = *__first;
> +	    if (__comp_proj(__val, __result.min))
> +	      __result.min = std::forward<decltype(__val)>(__val);
> +	    else
> +	      __result.max = std::forward<decltype(__val)>(__val);
> +	  }
>  	while (++__first != __last)
>  	  {
> -	    auto __tmp = *__first;
> -	    if (std::__invoke(__comp,
> -			      std::__invoke(__proj, __tmp),
> -			      std::__invoke(__proj, __result.min)))
> -	      __result.min = std::move(__tmp);
> -	    if (!(bool)std::__invoke(__comp,
> -				     std::__invoke(__proj, __tmp),
> -				     std::__invoke(__proj, __result.max)))
> -	      __result.max = std::move(__tmp);
> +	    // Now process two elements at a time so that we perform at most
> +	    // 3*(N-2)/2 comparisons in total (each of the (N-2)/2 iterations

Oops, this should now say "1 + 3*(N-2)/2 comparisons" because of the
initial comparison with the second element above.

> +	    // of this loop performs three comparisons).
> +	    range_value_t<_Range> __val1 = *__first;
> +	    if (++__first == __last)
> +	      {
> +		// N is odd; in this final iteration, we perform at most two
> +		// comparisons, for a total of 1 + 3*(N-3)/2 + 2 comparisons,
> +		// which is not more than 3*N/2, as required.
> +		if (__comp_proj(__val1, __result.min))
> +		  __result.min = std::move(__val1);
> +		else if (!__comp_proj(__val1, __result.max))
> +		  __result.max = std::move(__val1);
> +		break;
> +	      }
> +	    auto&& __val2 = *__first;
> +	    if (!__comp_proj(__val2, __val1))
> +	      {
> +		if (__comp_proj(__val1, __result.min))
> +		  __result.min = std::move(__val1);
> +		if (!__comp_proj(__val2, __result.max))
> +		  __result.max = std::forward<decltype(__val2)>(__val2);
> +	      }
> +	    else
> +	      {
> +		if (__comp_proj(__val2, __result.min))
> +		  __result.min = std::forward<decltype(__val2)>(__val2);
> +		if (!__comp_proj(__val1, __result.max))
> +		  __result.max = std::move(__val1);
> +	      }
>  	  }
>  	return __result;
>        }
> @@ -3408,21 +3441,50 @@ namespace ranges
>        operator()(_Iter __first, _Sent __last,
>  		 _Comp __comp = {}, _Proj __proj = {}) const
>        {
> -	if (__first == __last)
> -	  return {__first, __first};
> -
> +	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
>  	minmax_element_result<_Iter> __result = {__first, __first};
> -	auto __i = __first;
> -	while (++__i != __last)
> +	if (__first == __last || ++__first == __last)
> +	  return __result;
> +	else
>  	  {
> -	    if (std::__invoke(__comp,
> -			      std::__invoke(__proj, *__i),
> -			      std::__invoke(__proj, *__result.min)))
> -	      __result.min = __i;
> -	    if (!(bool)std::__invoke(__comp,
> -				     std::__invoke(__proj, *__i),
> -				     std::__invoke(__proj, *__result.max)))
> -	      __result.max = __i;
> +	    // At this point __result.min == __result.max, so a single
> +	    // comparison with the next element suffices.
> +	    if (__comp_proj(*__first, *__result.min))
> +	      __result.min = __first;
> +	    else
> +	      __result.max = __first;
> +	  }
> +	while (++__first != __last)
> +	  {
> +	    // Now process two elements at a time so that we perform at most
> +	    // 3*(N-2)/2 comparisons in total (each of the (N-2)/2 iterations

Here too; consider this fixed.

> +	    // of this loop performs three comparisons).
> +	    auto __prev = __first;
> +	    if (++__first == __last)
> +	      {
> +		// N is odd; in this final iteration, we perform at most two
> +		// comparisons, for a total of 1 + 3*(N-3)/2 + 2 comparisons,
> +		// which is not more than 3*N/2, as required.
> +		if (__comp_proj(*__prev, *__result.min))
> +		  __result.min = __prev;
> +		else if (!__comp_proj(*__prev, *__result.max))
> +		  __result.max = __prev;
> +		break;
> +	      }
> +	    if (!__comp_proj(*__first, *__prev))
> +	      {
> +		if (__comp_proj(*__prev, *__result.min))
> +		  __result.min = __prev;
> +		if (!__comp_proj(*__first, *__result.max))
> +		  __result.max = __first;
> +	      }
> +	    else
> +	      {
> +		if (__comp_proj(*__first, *__result.min))
> +		  __result.min = __first;
> +		if (!__comp_proj(*__prev, *__result.max))
> +		  __result.max = __prev;
> +	      }
>  	  }
>  	return __result;
>        }
> @@ -3749,8 +3811,7 @@ namespace ranges
>  		  // i.e. we are shifting out at least half of the range.  In
>  		  // this case we can safely perform the shift with a single
>  		  // move.
> -		  std::move(std::move(__first), std::move(__dest_head),
> -			    std::move(__result));
> +		  std::move(std::move(__first), std::move(__dest_head), __result);
>  		  return __result;
>  		}
>  	      ++__dest_head;
> diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
> index 786922414b5..c365152bf2b 100644
> --- a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
> +++ b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
> @@ -19,6 +19,8 @@
>  // { dg-do run { target c++2a } }
>  
>  #include <algorithm>
> +#include <string>
> +#include <vector>
>  #include <testsuite_hooks.h>
>  #include <testsuite_iterators.h>
>  
> @@ -89,10 +91,50 @@ test03()
>  	  == res_t(1,4) );
>  }
>  
> +void
> +test04()
> +{
> +  // Verify we perform at most 3*N/2 applications of the comparison predicate.
> +  static int counter;
> +  struct counted_less
> +  { bool operator()(int a, int b) { ++counter; return a < b; } };
> +
> +  ranges::minmax({1,2}, counted_less{});
> +  VERIFY( counter == 1 );
> +
> +  counter = 0;
> +  ranges::minmax({1,2,3}, counted_less{});
> +  VERIFY( counter == 3 );
> +
> +  counter = 0;
> +  ranges::minmax({1,2,3,4,5,6,7,8,9,10}, counted_less{});
> +  VERIFY( counter <= 15 );
> +
> +  counter = 0;
> +  ranges::minmax({10,9,8,7,6,5,4,3,2,1}, counted_less{});
> +  VERIFY( counter <= 15 );
> +}
> +
> +void
> +test05()
> +{
> +  // PR libstdc++/100387
> +  using namespace std::literals::string_literals;
> +  auto comp = [](const auto& a, const auto& b) {
> +    return a.size() == b.size() ? a.front() < b.front() : a.size() > b.size();
> +  };
> +  auto result = ranges::minmax({"b"s, "a"s}, comp);
> +  VERIFY( result.min == "a"s && result.max == "b"s );
> +  result = ranges::minmax({"c"s, "b"s, "a"s}, comp);
> +  VERIFY( result.min == "a"s && result.max == "c"s );
> +}
> +
>  int
>  main()
>  {
>    test01();
>    test02();
>    test03();
> +  test04();
> +  test05();
>  }
> diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
> index 3b11c0dd96c..0919f7dda8f 100644
> --- a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
> +++ b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
> @@ -61,8 +61,35 @@ test01()
>    static_assert(ranges::minmax_element(y, y+3, {}, &X::i).max->j == 3);
>  }
>  
> +void
> +test02()
> +{
> +  // Verify we perform at most 3*N/2 applications of the comparison predicate.
> +  static int counter;
> +  struct counted_less
> +  { bool operator()(int a, int b) { ++counter; return a < b; } };
> +
> +  int x[] = {1,2,3,4,5,6,7,8,9,10};
> +  ranges::minmax_element(x, x+2, counted_less{});
> +  VERIFY( counter == 1 );
> +
> +  counter = 0;
> +  ranges::minmax_element(x, x+3, counted_less{});
> +  VERIFY( counter == 3 );
> +
> +  counter = 0;
> +  ranges::minmax_element(x, counted_less{});
> +  VERIFY( counter <= 15 );
> +
> +  ranges::reverse(x);
> +  counter = 0;
> +  ranges::minmax_element(x, counted_less{});
> +  VERIFY( counter <= 15 );
> +}
> +
>  int
>  main()
>  {
>    test01();
> +  test02();
>  }
> -- 
> 2.31.1.442.g7e39198978
> 
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity
  2021-05-05 15:48     ` Patrick Palka
@ 2021-06-18 17:44       ` Patrick Palka
  2021-06-18 21:19         ` Jonathan Wakely
  0 siblings, 1 reply; 7+ messages in thread
From: Patrick Palka @ 2021-06-18 17:44 UTC (permalink / raw)
  To: Patrick Palka; +Cc: Jonathan Wakely, gcc-patches, libstdc++

On Wed, 5 May 2021, Patrick Palka wrote:

> On Wed, 5 May 2021, Patrick Palka wrote:
> 
> > On Wed, 5 May 2021, Jonathan Wakely wrote:
> > 
> > > On 04/05/21 21:42 -0400, Patrick Palka via Libstdc++ wrote:
> > > > This rewrites ranges::minmax and ranges::minmax_element so that it
> > > > performs at most 3*N/2 many comparisons, as required by the standard.
> > > > In passing, this also fixes PR100387 by avoiding a premature std::move
> > > > in ranges::minmax and in std::shift_right.
> > > > 
> > > > Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
> > > > 10/11?
> > > > 
> > > > libstdc++-v3/ChangeLog:
> > > > 
> > > > 	PR libstdc++/100387
> > > > 	* include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
> > > > 	to limit comparison complexity to 3*N/2.  Avoid premature std::move.
> > > > 	(__minmax_element_fn::operator()): Likewise.
> > > > 	(shift_right): Avoid premature std::move of __result.
> > > > 	* testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
> > > > 	New tests.
> > > > 	* testsuite/25_algorithms/minmax_element/constrained.cc (test02):
> > > > 	Likewise.
> > > > ---
> > > > libstdc++-v3/include/bits/ranges_algo.h       | 87 ++++++++++++++-----
> > > > .../25_algorithms/minmax/constrained.cc       | 31 +++++++
> > > > .../minmax_element/constrained.cc             | 19 ++++
> > > > 3 files changed, 113 insertions(+), 24 deletions(-)
> > > > 
> > > > diff --git a/libstdc++-v3/include/bits/ranges_algo.h
> > > > b/libstdc++-v3/include/bits/ranges_algo.h
> > > > index cda3042c11f..bbd29127e89 100644
> > > > --- a/libstdc++-v3/include/bits/ranges_algo.h
> > > > +++ b/libstdc++-v3/include/bits/ranges_algo.h
> > > > @@ -3291,18 +3291,39 @@ namespace ranges
> > > > 	auto __first = ranges::begin(__r);
> > > > 	auto __last = ranges::end(__r);
> > > > 	__glibcxx_assert(__first != __last);
> > > > +	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> > > > 	minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
> > > > 	while (++__first != __last)
> > > > 	  {
> > > > -	    auto __tmp = *__first;
> > > > -	    if (std::__invoke(__comp,
> > > > -			      std::__invoke(__proj, __tmp),
> > > > -			      std::__invoke(__proj, __result.min)))
> > > > -	      __result.min = std::move(__tmp);
> > > > -	    if (!(bool)std::__invoke(__comp,
> > > > -				     std::__invoke(__proj, __tmp),
> > > > -				     std::__invoke(__proj, __result.max)))
> > > > -	      __result.max = std::move(__tmp);
> > > > +	    // Process two elements at a time so that we perform at most
> > > > +	    // 3*N/2 many comparisons in total (each of the N/2 iterations
> > > 
> > > Is "many" a typo here?
> > 
> > Just a bad habit of mine to usually write "<count> many" instead of just
> > "<count>" :) Consider the "many" removed.
> > 
> > > 
> > > > +	    // of this loop performs three comparisions).
> > > > +	    auto __val1 = *__first;
> > > 
> > > Can we avoid making this copy if the range satisfies forward_range, by
> > > keeping copies of the min/max iterators, or just forwarding to
> > > ranges::minmax_element?
> > 
> > Maybe we can make __val1 and __val2 universal references?  Ah, but then
> > __val1 would potentially be invalidated after incrementing __first.  I
> > think it should be safe to make __val2 a universal reference though.
> > I've done this in v2 below.
> > 
> > Forwarding to ranges::minmax_element seems like it would be profitable
> > in some situations, e.g if the value type isn't trivially copyable.  I
> > can do this in a followup patch for ranges::max/max_element and
> > ranges::min/min_element too, they should all use the same heuristic.
> > 
> > > 
> > > 
> > > > +	    if (++__first == __last)
> > > > +	      {
> > > > +		// N is odd; in this final iteration, we perform a just one
> > > 
> > > s/perform a just one/perform just one/
> > 
> > Fixed.
> > 
> > > 
> > > > +		// comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2
> > > > comparisons.
> > > 
> > > I find this a bit hard to parse with the inequality there.
> > 
> > Removed.
> > 
> > > 
> > > > +		if (__comp_proj(__val1, __result.min))
> > > > +		  __result.min = std::move(__val1);
> > > > +		else if (!__comp_proj(__val1, __result.max))
> > > > +		  __result.max = std::move(__val1);
> > > 
> > > This can be two comparisons, can't it? Would this be better...
> > 
> > Whoops, yeah...
> > 
> > > 
> > >   // N is odd; in this final iteration, we perform at most two
> > >   // comparisons, for a total of 3*(N-1)/2 + 2 comparisons,
> > >   // which is not more than 3*N/2, as required.
> > > 
> > > ?
> > 
> > Ah, but then the total is more than 3*N/2 :(  And I think we reach this
> > case really when N is even, not odd (sorry, I really botched this
> > patch).
> > 
> > And when N=2 in particular, we perform up two comparisons instead of
> > three, but actually a single comparison should suffice in this case.  I
> > think all this is fixed in v2 below by handling the second element in
> > the range specially.
> > 
> > > 
> > > > +		break;
> > > > +	      }
> > > > +	    auto __val2 = *__first;
> > > > +	    if (!__comp_proj(__val2, __val1))
> > > > +	      {
> > > > +		if (__comp_proj(__val1, __result.min))
> > > > +		  __result.min = std::move(__val1);
> > > > +		if (!__comp_proj(__val2, __result.max))
> > > > +		  __result.max = std::move(__val2);
> > > > +	      }
> > > > +	    else
> > > > +	      {
> > > > +		if (__comp_proj(__val2, __result.min))
> > > > +		  __result.min = std::move(__val2);
> > > > +		if (!__comp_proj(__val1, __result.max))
> > > > +		  __result.max = std::move(__val1);
> > > > +	      }
> > > 
> > > I thought we might be able to simplify this to something like:
> > > 
> > >     auto __val2 = *__first;
> > >     auto&& [__min, __max] = (*this)(__val1, __val2, __comp, __proj);
> > >     if (__comp_proj(__min, __result.min))
> > >       __result.min = __min;
> > >     if (__comp_proj(__result.max, __max))
> > >       __result.max = __max;
> > > 
> > > But it doesn't work because we need to move from __min and __max, but
> > > the (*this)(...) returns minmax_result<const T&> and can't be moved
> > > from.
> > > 
> > > We could get around that but it's not much of a simplification:
> > > 
> > >     range_value_t<Range> __val2 = *__first;
> > >     auto [__min, __max] = (*this)(std::addressof(__val1),
> > >                                   std::addressof(__val2),
> > >                                   __comp,
> > >                                   [](auto __p) -> const auto& {
> > >                                     return *__p;
> > >                                   });
> > >     if (__comp_proj(*__min, __result.min))
> > >       __result.min = std::move(*__min);
> > >     if (__comp_proj(__result.max, *__max))
> > >       __result.max = std::move(*__max);
> > 
> > Hmm, now that __val2 was made a universal reference, this simplifcation
> > might not work.
> > 
> > > 
> > > > 	  }
> > > > 	return __result;
> > > >       }
> > > > @@ -3408,21 +3429,40 @@ namespace ranges
> > > >       operator()(_Iter __first, _Sent __last,
> > > > 		 _Comp __comp = {}, _Proj __proj = {}) const
> > > >       {
> > > > -	if (__first == __last)
> > > > -	  return {__first, __first};
> > > > -
> > > > +	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> > > > 	minmax_element_result<_Iter> __result = {__first, __first};
> > > > -	auto __i = __first;
> > > > -	while (++__i != __last)
> > > > +	if (__first == __last)
> > > > +	  return __result;
> > > > +	while (++__first != __last)
> > > > 	  {
> > > > -	    if (std::__invoke(__comp,
> > > > -			      std::__invoke(__proj, *__i),
> > > > -			      std::__invoke(__proj, *__result.min)))
> > > > -	      __result.min = __i;
> > > > -	    if (!(bool)std::__invoke(__comp,
> > > > -				     std::__invoke(__proj, *__i),
> > > > -				     std::__invoke(__proj, *__result.max)))
> > > > -	      __result.max = __i;
> > > > +	    // Process two elements at a time so that we perform at most
> > > > +	    // 3*N/2 many comparisons in total (each of the N/2 iterations
> > > > +	    // of this loop performs three comparisions).
> > > > +	    auto __prev = __first;
> > > > +	    if (++__first == __last)
> > > > +	      {
> > > > +		// N is odd; in this final iteration, we perform a just one
> > > > +		// comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2
> > > > comparisons.
> > > 
> > > Same comments on the comments as above.
> > 
> > Fixed.
> > 
> > > 
> > > > +		if (__comp_proj(*__prev, *__result.min))
> > > > +		  __result.min = __prev;
> > > > +		else if (!__comp_proj(*__prev, *__result.max))
> > > > +		  __result.max = __prev;
> > > > +		break;
> > > > +	      }
> > > > +	    if (!__comp_proj(*__first, *__prev))
> > > > +	      {
> > > > +		if (__comp_proj(*__prev, *__result.min))
> > > > +		  __result.min = __prev;
> > > > +		if (!__comp_proj(*__first, *__result.max))
> > > > +		  __result.max = __first;
> > > > +	      }
> > > > +	    else
> > > > +	      {
> > > > +		if (__comp_proj(*__first, *__result.min))
> > > > +		  __result.min = __first;
> > > > +		if (!__comp_proj(*__prev, *__result.max))
> > > > +		  __result.max = __prev;
> > > > +	      }
> > > 
> > > We don't need to move anything here, so this could be written using
> > > ranges::minmax. I'm not sure it is an improvement though (except for
> > > being slightly fewer lines of code):
> > > 
> > >     auto __mm = minmax(__prev, __first, __comp,
> > >                        [](auto&& __it) -> auto&& { return *__it; });
> > > 
> > >     if (__comp_proj(*__mm.min, *__result.min))
> > >       __result.min = __mm.min;
> > >     if (__comp_proj(*__result.max, *__mm.max))
> > >       __result.max = __mm.max;
> > 
> > Hmm, it's shorter, but on the other hand it'd make the implementations
> > of minmax and minmax_element no longer mirror each other as closely.  So
> > I didn't make this change for now.
> > 
> > Here's v2, which makes the following additional changes:
> > 
> >   * Fixes comment typos, and some nearby indentation in the signature
> >     of minmax
> >   * Makes both minmax and minmax_element handle the second element
> >     specially in order to optimally perform just a single comparison
> >     when N=2 and to always stay below 3*N/2 comparisons
> >   * Avoids using a deduced return type for __val1
> >   * Makes __val2 a universal reference to avoid a move
> >   * Adds comparison complexity tests for the N=2,3 cases
> > 
> > -- >8 --
> > 
> > Subject: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison
> >  complexity
> > 
> > This rewrites ranges::minmax and ranges::minmax_element so that it
> > performs at most 3*N/2 many comparisons, as required by the standard.
> > In passing, this also fixes PR100387 by avoiding a premature std::move
> > in ranges::minmax and in std::shift_right.
> > 
> > Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
> > 10/11?
> > 
> > libstdc++-v3/ChangeLog:
> > 
> > 	PR libstdc++/100387
> > 	* include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
> > 	to limit comparison complexity to 3*N/2.
> > 	(__minmax_element_fn::operator()): Likewise.
> > 	(shift_right): Avoid premature std::move of __result.
> > 	* testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
> > 	New tests.
> > 	* testsuite/25_algorithms/minmax_element/constrained.cc (test02):
> > 	Likewise.
> > ---
> >  libstdc++-v3/include/bits/ranges_algo.h       | 113 ++++++++++++++----
> >  .../25_algorithms/minmax/constrained.cc       |  42 +++++++
> >  .../minmax_element/constrained.cc             |  27 +++++
> >  3 files changed, 156 insertions(+), 26 deletions(-)
> > 
> > diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h
> > index cda3042c11f..2091cbf5b4e 100644
> > --- a/libstdc++-v3/include/bits/ranges_algo.h
> > +++ b/libstdc++-v3/include/bits/ranges_algo.h
> > @@ -3283,26 +3283,59 @@ namespace ranges
> >      template<input_range _Range, typename _Proj = identity,
> >  	     indirect_strict_weak_order<projected<iterator_t<_Range>, _Proj>>
> >  	       _Comp = ranges::less>
> > -      requires indirectly_copyable_storable<iterator_t<_Range>,
> > -      range_value_t<_Range>*>
> > +      requires indirectly_copyable_storable<iterator_t<_Range>, range_value_t<_Range>*>
> >        constexpr minmax_result<range_value_t<_Range>>
> >        operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const
> >        {
> >  	auto __first = ranges::begin(__r);
> >  	auto __last = ranges::end(__r);
> >  	__glibcxx_assert(__first != __last);
> > +	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
> >  	minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
> > +	if (++__first == __last)
> > +	  return __result;
> > +	else
> > +	  {
> > +	    // At this point __result.min == __result.max, so a single
> > +	    // comparison with the next element suffices.
> > +	    auto&& __val = *__first;
> > +	    if (__comp_proj(__val, __result.min))
> > +	      __result.min = std::forward<decltype(__val)>(__val);
> > +	    else
> > +	      __result.max = std::forward<decltype(__val)>(__val);
> > +	  }
> >  	while (++__first != __last)
> >  	  {
> > -	    auto __tmp = *__first;
> > -	    if (std::__invoke(__comp,
> > -			      std::__invoke(__proj, __tmp),
> > -			      std::__invoke(__proj, __result.min)))
> > -	      __result.min = std::move(__tmp);
> > -	    if (!(bool)std::__invoke(__comp,
> > -				     std::__invoke(__proj, __tmp),
> > -				     std::__invoke(__proj, __result.max)))
> > -	      __result.max = std::move(__tmp);
> > +	    // Now process two elements at a time so that we perform at most
> > +	    // 3*(N-2)/2 comparisons in total (each of the (N-2)/2 iterations
> 
> Oops, this should now say "1 + 3*(N-2)/2 comparisons" because of the
> initial comparison with the second element above.

Ping; here's the same patch with the above comment corrected.

-- >8 --

Subject: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison
 complexity

This rewrites ranges::minmax and ranges::minmax_element so that it
performs at most 3*N/2 many comparisons, as required by the standard.
In passing, this also fixes PR100387 by avoiding a premature std::move
in ranges::minmax and in std::shift_right.

Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
10/11?

libstdc++-v3/ChangeLog:

	PR libstdc++/100387
	* include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite
	to limit comparison complexity to 3*N/2.
	(__minmax_element_fn::operator()): Likewise.
	(shift_right): Avoid premature std::move of __result.
	* testsuite/25_algorithms/minmax/constrained.cc (test04, test05):
	New tests.
	* testsuite/25_algorithms/minmax_element/constrained.cc (test02):
	Likewise.
---
 libstdc++-v3/include/bits/ranges_algo.h       | 113 ++++++++++++++----
 .../25_algorithms/minmax/constrained.cc       |  42 +++++++
 .../minmax_element/constrained.cc             |  27 +++++
 3 files changed, 156 insertions(+), 26 deletions(-)

diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h
index ecf1378742d..7cf5fb3f187 100644
--- a/libstdc++-v3/include/bits/ranges_algo.h
+++ b/libstdc++-v3/include/bits/ranges_algo.h
@@ -3287,26 +3287,59 @@ namespace ranges
     template<input_range _Range, typename _Proj = identity,
 	     indirect_strict_weak_order<projected<iterator_t<_Range>, _Proj>>
 	       _Comp = ranges::less>
-      requires indirectly_copyable_storable<iterator_t<_Range>,
-      range_value_t<_Range>*>
+      requires indirectly_copyable_storable<iterator_t<_Range>, range_value_t<_Range>*>
       constexpr minmax_result<range_value_t<_Range>>
       operator()(_Range&& __r, _Comp __comp = {}, _Proj __proj = {}) const
       {
 	auto __first = ranges::begin(__r);
 	auto __last = ranges::end(__r);
 	__glibcxx_assert(__first != __last);
+	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
 	minmax_result<range_value_t<_Range>> __result = {*__first, *__first};
+	if (++__first == __last)
+	  return __result;
+	else
+	  {
+	    // At this point __result.min == __result.max, so a single
+	    // comparison with the next element suffices.
+	    auto&& __val = *__first;
+	    if (__comp_proj(__val, __result.min))
+	      __result.min = std::forward<decltype(__val)>(__val);
+	    else
+	      __result.max = std::forward<decltype(__val)>(__val);
+	  }
 	while (++__first != __last)
 	  {
-	    auto __tmp = *__first;
-	    if (std::__invoke(__comp,
-			      std::__invoke(__proj, __tmp),
-			      std::__invoke(__proj, __result.min)))
-	      __result.min = std::move(__tmp);
-	    if (!(bool)std::__invoke(__comp,
-				     std::__invoke(__proj, __tmp),
-				     std::__invoke(__proj, __result.max)))
-	      __result.max = std::move(__tmp);
+	    // Now process two elements at a time so that we perform at most
+	    // 1 + 3*(N-2)/2 comparisons in total (each of the (N-2)/2
+	    // iterations of this loop performs three comparisons).
+	    range_value_t<_Range> __val1 = *__first;
+	    if (++__first == __last)
+	      {
+		// N is odd; in this final iteration, we perform at most two
+		// comparisons, for a total of 1 + 3*(N-3)/2 + 2 comparisons,
+		// which is not more than 3*N/2, as required.
+		if (__comp_proj(__val1, __result.min))
+		  __result.min = std::move(__val1);
+		else if (!__comp_proj(__val1, __result.max))
+		  __result.max = std::move(__val1);
+		break;
+	      }
+	    auto&& __val2 = *__first;
+	    if (!__comp_proj(__val2, __val1))
+	      {
+		if (__comp_proj(__val1, __result.min))
+		  __result.min = std::move(__val1);
+		if (!__comp_proj(__val2, __result.max))
+		  __result.max = std::forward<decltype(__val2)>(__val2);
+	      }
+	    else
+	      {
+		if (__comp_proj(__val2, __result.min))
+		  __result.min = std::forward<decltype(__val2)>(__val2);
+		if (!__comp_proj(__val1, __result.max))
+		  __result.max = std::move(__val1);
+	      }
 	  }
 	return __result;
       }
@@ -3412,21 +3445,50 @@ namespace ranges
       operator()(_Iter __first, _Sent __last,
 		 _Comp __comp = {}, _Proj __proj = {}) const
       {
-	if (__first == __last)
-	  return {__first, __first};
-
+	auto __comp_proj = __detail::__make_comp_proj(__comp, __proj);
 	minmax_element_result<_Iter> __result = {__first, __first};
-	auto __i = __first;
-	while (++__i != __last)
+	if (__first == __last || ++__first == __last)
+	  return __result;
+	else
 	  {
-	    if (std::__invoke(__comp,
-			      std::__invoke(__proj, *__i),
-			      std::__invoke(__proj, *__result.min)))
-	      __result.min = __i;
-	    if (!(bool)std::__invoke(__comp,
-				     std::__invoke(__proj, *__i),
-				     std::__invoke(__proj, *__result.max)))
-	      __result.max = __i;
+	    // At this point __result.min == __result.max, so a single
+	    // comparison with the next element suffices.
+	    if (__comp_proj(*__first, *__result.min))
+	      __result.min = __first;
+	    else
+	      __result.max = __first;
+	  }
+	while (++__first != __last)
+	  {
+	    // Now process two elements at a time so that we perform at most
+	    // 1 + 3*(N-2)/2 comparisons in total (each of the (N-2)/2
+	    // iterations of this loop performs three comparisons).
+	    auto __prev = __first;
+	    if (++__first == __last)
+	      {
+		// N is odd; in this final iteration, we perform at most two
+		// comparisons, for a total of 1 + 3*(N-3)/2 + 2 comparisons,
+		// which is not more than 3*N/2, as required.
+		if (__comp_proj(*__prev, *__result.min))
+		  __result.min = __prev;
+		else if (!__comp_proj(*__prev, *__result.max))
+		  __result.max = __prev;
+		break;
+	      }
+	    if (!__comp_proj(*__first, *__prev))
+	      {
+		if (__comp_proj(*__prev, *__result.min))
+		  __result.min = __prev;
+		if (!__comp_proj(*__first, *__result.max))
+		  __result.max = __first;
+	      }
+	    else
+	      {
+		if (__comp_proj(*__first, *__result.min))
+		  __result.min = __first;
+		if (!__comp_proj(*__prev, *__result.max))
+		  __result.max = __prev;
+	      }
 	  }
 	return __result;
       }
@@ -3753,8 +3815,7 @@ namespace ranges
 		  // i.e. we are shifting out at least half of the range.  In
 		  // this case we can safely perform the shift with a single
 		  // move.
-		  std::move(std::move(__first), std::move(__dest_head),
-			    std::move(__result));
+		  std::move(std::move(__first), std::move(__dest_head), __result);
 		  return __result;
 		}
 	      ++__dest_head;
diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
index 786922414b5..c365152bf2b 100644
--- a/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/minmax/constrained.cc
@@ -19,6 +19,8 @@
 // { dg-do run { target c++2a } }
 
 #include <algorithm>
+#include <string>
+#include <vector>
 #include <testsuite_hooks.h>
 #include <testsuite_iterators.h>
 
@@ -89,10 +91,50 @@ test03()
 	  == res_t(1,4) );
 }
 
+void
+test04()
+{
+  // Verify we perform at most 3*N/2 applications of the comparison predicate.
+  static int counter;
+  struct counted_less
+  { bool operator()(int a, int b) { ++counter; return a < b; } };
+
+  ranges::minmax({1,2}, counted_less{});
+  VERIFY( counter == 1 );
+
+  counter = 0;
+  ranges::minmax({1,2,3}, counted_less{});
+  VERIFY( counter == 3 );
+
+  counter = 0;
+  ranges::minmax({1,2,3,4,5,6,7,8,9,10}, counted_less{});
+  VERIFY( counter <= 15 );
+
+  counter = 0;
+  ranges::minmax({10,9,8,7,6,5,4,3,2,1}, counted_less{});
+  VERIFY( counter <= 15 );
+}
+
+void
+test05()
+{
+  // PR libstdc++/100387
+  using namespace std::literals::string_literals;
+  auto comp = [](const auto& a, const auto& b) {
+    return a.size() == b.size() ? a.front() < b.front() : a.size() > b.size();
+  };
+  auto result = ranges::minmax({"b"s, "a"s}, comp);
+  VERIFY( result.min == "a"s && result.max == "b"s );
+  result = ranges::minmax({"c"s, "b"s, "a"s}, comp);
+  VERIFY( result.min == "a"s && result.max == "c"s );
+}
+
 int
 main()
 {
   test01();
   test02();
   test03();
+  test04();
+  test05();
 }
diff --git a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
index 3b11c0dd96c..0919f7dda8f 100644
--- a/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
+++ b/libstdc++-v3/testsuite/25_algorithms/minmax_element/constrained.cc
@@ -61,8 +61,35 @@ test01()
   static_assert(ranges::minmax_element(y, y+3, {}, &X::i).max->j == 3);
 }
 
+void
+test02()
+{
+  // Verify we perform at most 3*N/2 applications of the comparison predicate.
+  static int counter;
+  struct counted_less
+  { bool operator()(int a, int b) { ++counter; return a < b; } };
+
+  int x[] = {1,2,3,4,5,6,7,8,9,10};
+  ranges::minmax_element(x, x+2, counted_less{});
+  VERIFY( counter == 1 );
+
+  counter = 0;
+  ranges::minmax_element(x, x+3, counted_less{});
+  VERIFY( counter == 3 );
+
+  counter = 0;
+  ranges::minmax_element(x, counted_less{});
+  VERIFY( counter <= 15 );
+
+  ranges::reverse(x);
+  counter = 0;
+  ranges::minmax_element(x, counted_less{});
+  VERIFY( counter <= 15 );
+}
+
 int
 main()
 {
   test01();
+  test02();
 }
-- 
2.32.0.93.g670b81a890


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity
  2021-06-18 17:44       ` Patrick Palka
@ 2021-06-18 21:19         ` Jonathan Wakely
  0 siblings, 0 replies; 7+ messages in thread
From: Jonathan Wakely @ 2021-06-18 21:19 UTC (permalink / raw)
  To: Patrick Palka; +Cc: gcc Patches, libstdc++

On Fri, 18 Jun 2021 at 18:44, Patrick Palka wrote:
>
> Ping; here's the same patch with the above comment corrected.
>
> -- >8 --
>
> Subject: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison
>  complexity
>
> This rewrites ranges::minmax and ranges::minmax_element so that it
> performs at most 3*N/2 many comparisons, as required by the standard.
> In passing, this also fixes PR100387 by avoiding a premature std::move
> in ranges::minmax and in std::shift_right.
>
> Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps
> 10/11?

OK, for all branches, thanks.


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2021-06-18 21:19 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-05  1:42 [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity Patrick Palka
2021-05-05  9:39 ` Jonathan Wakely
2021-05-05  9:46   ` Jonathan Wakely
2021-05-05 15:33   ` Patrick Palka
2021-05-05 15:48     ` Patrick Palka
2021-06-18 17:44       ` Patrick Palka
2021-06-18 21:19         ` Jonathan Wakely

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).