From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: from us-smtp-delivery-124.mimecast.com (us-smtp-delivery-124.mimecast.com [170.10.133.124]) by sourceware.org (Postfix) with ESMTP id 2435038515DA for ; Wed, 5 May 2021 09:39:37 +0000 (GMT) DMARC-Filter: OpenDMARC Filter v1.3.2 sourceware.org 2435038515DA Received: from mimecast-mx01.redhat.com (mimecast-mx01.redhat.com [209.132.183.4]) (Using TLS) by relay.mimecast.com with ESMTP id us-mta-300-lXHZNnpJNKmuRmHUQI66OQ-1; Wed, 05 May 2021 05:39:35 -0400 X-MC-Unique: lXHZNnpJNKmuRmHUQI66OQ-1 Received: from smtp.corp.redhat.com (int-mx06.intmail.prod.int.phx2.redhat.com [10.5.11.16]) (using TLSv1.2 with cipher AECDH-AES256-SHA (256/256 bits)) (No client certificate requested) by mimecast-mx01.redhat.com (Postfix) with ESMTPS id 5987A107ACC7; Wed, 5 May 2021 09:39:34 +0000 (UTC) Received: from localhost (unknown [10.33.36.164]) by smtp.corp.redhat.com (Postfix) with ESMTP id E7C095C5B5; Wed, 5 May 2021 09:39:32 +0000 (UTC) Date: Wed, 5 May 2021 10:39:31 +0100 From: Jonathan Wakely To: Patrick Palka Cc: gcc-patches@gcc.gnu.org, libstdc++@gcc.gnu.org Subject: Re: [PATCH] libstdc++: Reduce ranges::minmax/minmax_element comparison complexity Message-ID: <20210505093931.GP3008@redhat.com> References: <20210505014220.1216746-1-ppalka@redhat.com> MIME-Version: 1.0 In-Reply-To: <20210505014220.1216746-1-ppalka@redhat.com> X-Clacks-Overhead: GNU Terry Pratchett X-Scanned-By: MIMEDefang 2.79 on 10.5.11.16 X-Mimecast-Spam-Score: 0 X-Mimecast-Originator: redhat.com Content-Type: text/plain; charset=us-ascii; format=flowed Content-Disposition: inline X-Spam-Status: No, score=-14.3 required=5.0 tests=BAYES_00, DKIMWL_WL_HIGH, DKIM_SIGNED, DKIM_VALID, DKIM_VALID_AU, DKIM_VALID_EF, GIT_PATCH_0, RCVD_IN_DNSWL_LOW, RCVD_IN_MSPIKE_H4, RCVD_IN_MSPIKE_WL, SPF_HELO_NONE, SPF_PASS, TXREP autolearn=ham autolearn_force=no version=3.4.2 X-Spam-Checker-Version: SpamAssassin 3.4.2 (2018-09-13) on server2.sourceware.org X-BeenThere: gcc-patches@gcc.gnu.org X-Mailman-Version: 2.1.29 Precedence: list List-Id: Gcc-patches mailing list List-Unsubscribe: , List-Archive: List-Post: List-Help: List-Subscribe: , X-List-Received-Date: Wed, 05 May 2021 09:39:38 -0000 On 04/05/21 21:42 -0400, Patrick Palka via Libstdc++ wrote: >This rewrites ranges::minmax and ranges::minmax_element so that it >performs at most 3*N/2 many comparisons, as required by the standard. >In passing, this also fixes PR100387 by avoiding a premature std::move >in ranges::minmax and in std::shift_right. > >Tested on x86_64-pc-linux-gnu, does this look OK for trunk and perhaps >10/11? > >libstdc++-v3/ChangeLog: > > PR libstdc++/100387 > * include/bits/ranges_algo.h (__minmax_fn::operator()): Rewrite > to limit comparison complexity to 3*N/2. Avoid premature std::move. > (__minmax_element_fn::operator()): Likewise. > (shift_right): Avoid premature std::move of __result. > * testsuite/25_algorithms/minmax/constrained.cc (test04, test05): > New tests. > * testsuite/25_algorithms/minmax_element/constrained.cc (test02): > Likewise. >--- > libstdc++-v3/include/bits/ranges_algo.h | 87 ++++++++++++++----- > .../25_algorithms/minmax/constrained.cc | 31 +++++++ > .../minmax_element/constrained.cc | 19 ++++ > 3 files changed, 113 insertions(+), 24 deletions(-) > >diff --git a/libstdc++-v3/include/bits/ranges_algo.h b/libstdc++-v3/include/bits/ranges_algo.h >index cda3042c11f..bbd29127e89 100644 >--- a/libstdc++-v3/include/bits/ranges_algo.h >+++ b/libstdc++-v3/include/bits/ranges_algo.h >@@ -3291,18 +3291,39 @@ namespace ranges > auto __first = ranges::begin(__r); > auto __last = ranges::end(__r); > __glibcxx_assert(__first != __last); >+ auto __comp_proj = __detail::__make_comp_proj(__comp, __proj); > minmax_result> __result = {*__first, *__first}; > while (++__first != __last) > { >- auto __tmp = *__first; >- if (std::__invoke(__comp, >- std::__invoke(__proj, __tmp), >- std::__invoke(__proj, __result.min))) >- __result.min = std::move(__tmp); >- if (!(bool)std::__invoke(__comp, >- std::__invoke(__proj, __tmp), >- std::__invoke(__proj, __result.max))) >- __result.max = std::move(__tmp); >+ // Process two elements at a time so that we perform at most >+ // 3*N/2 many comparisons in total (each of the N/2 iterations Is "many" a typo here? >+ // of this loop performs three comparisions). >+ auto __val1 = *__first; Can we avoid making this copy if the range satisfies forward_range, by keeping copies of the min/max iterators, or just forwarding to ranges::minmax_element? >+ if (++__first == __last) >+ { >+ // N is odd; in this final iteration, we perform a just one s/perform a just one/perform just one/ >+ // comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2 comparisons. I find this a bit hard to parse with the inequality there. >+ if (__comp_proj(__val1, __result.min)) >+ __result.min = std::move(__val1); >+ else if (!__comp_proj(__val1, __result.max)) >+ __result.max = std::move(__val1); This can be two comparisons, can't it? Would this be better... // N is odd; in this final iteration, we perform at most two // comparisons, for a total of 3*(N-1)/2 + 2 comparisons, // which is not more than 3*N/2, as required. ? >+ break; >+ } >+ auto __val2 = *__first; >+ if (!__comp_proj(__val2, __val1)) >+ { >+ if (__comp_proj(__val1, __result.min)) >+ __result.min = std::move(__val1); >+ if (!__comp_proj(__val2, __result.max)) >+ __result.max = std::move(__val2); >+ } >+ else >+ { >+ if (__comp_proj(__val2, __result.min)) >+ __result.min = std::move(__val2); >+ if (!__comp_proj(__val1, __result.max)) >+ __result.max = std::move(__val1); >+ } I thought we might be able to simplify this to something like: auto __val2 = *__first; auto&& [__min, __max] = (*this)(__val1, __val2, __comp, __proj); if (__comp_proj(__min, __result.min)) __result.min = __min; if (__comp_proj(__result.max, __max)) __result.max = __max; But it doesn't work because we need to move from __min and __max, but the (*this)(...) returns minmax_result and can't be moved from. We could get around that but it's not much of a simplification: range_value_t __val2 = *__first; auto [__min, __max] = (*this)(std::addressof(__val1), std::addressof(__val2), __comp, [](auto __p) -> const auto& { return *__p; }); if (__comp_proj(*__min, __result.min)) __result.min = std::move(*__min); if (__comp_proj(__result.max, *__max)) __result.max = std::move(*__max); > } > return __result; > } >@@ -3408,21 +3429,40 @@ namespace ranges > operator()(_Iter __first, _Sent __last, > _Comp __comp = {}, _Proj __proj = {}) const > { >- if (__first == __last) >- return {__first, __first}; >- >+ auto __comp_proj = __detail::__make_comp_proj(__comp, __proj); > minmax_element_result<_Iter> __result = {__first, __first}; >- auto __i = __first; >- while (++__i != __last) >+ if (__first == __last) >+ return __result; >+ while (++__first != __last) > { >- if (std::__invoke(__comp, >- std::__invoke(__proj, *__i), >- std::__invoke(__proj, *__result.min))) >- __result.min = __i; >- if (!(bool)std::__invoke(__comp, >- std::__invoke(__proj, *__i), >- std::__invoke(__proj, *__result.max))) >- __result.max = __i; >+ // Process two elements at a time so that we perform at most >+ // 3*N/2 many comparisons in total (each of the N/2 iterations >+ // of this loop performs three comparisions). >+ auto __prev = __first; >+ if (++__first == __last) >+ { >+ // N is odd; in this final iteration, we perform a just one >+ // comparison, for a total of 3*(N-1)/2 + 1 < 3*N/2 comparisons. Same comments on the comments as above. >+ if (__comp_proj(*__prev, *__result.min)) >+ __result.min = __prev; >+ else if (!__comp_proj(*__prev, *__result.max)) >+ __result.max = __prev; >+ break; >+ } >+ if (!__comp_proj(*__first, *__prev)) >+ { >+ if (__comp_proj(*__prev, *__result.min)) >+ __result.min = __prev; >+ if (!__comp_proj(*__first, *__result.max)) >+ __result.max = __first; >+ } >+ else >+ { >+ if (__comp_proj(*__first, *__result.min)) >+ __result.min = __first; >+ if (!__comp_proj(*__prev, *__result.max)) >+ __result.max = __prev; >+ } We don't need to move anything here, so this could be written using ranges::minmax. I'm not sure it is an improvement though (except for being slightly fewer lines of code): auto __mm = minmax(__prev, __first, __comp, [](auto&& __it) -> auto&& { return *__it; }); if (__comp_proj(*__mm.min, *__result.min)) __result.min = __mm.min; if (__comp_proj(*__result.max, *__mm.max)) __result.max = __mm.max;