On Thu, 8 Jun 2023 at 09:58, Maxim Kuvyrkov wrote: > Hi Jonathan, > > Interestingly, this increases code-size of -O3 code on aarch64-linux-gnu > on SPEC CPU2017's 641.leela_s benchmark [1]. > > In particular, FastBoard::get_nearby_enemies() grew from 1444 to 2212 > bytes. This seems like a corner-case; the rest of SPEC CPU2017 is, mostly, > neutral to this patch. Is this something you may be interested in > investigating? I'll be happy to assist. > > Looking at assembly, one of the differences I see is that the "after" > version has calls to realloc_insert(), while "before" version seems to have > them inlined [2]. > Was the size of that function stable at (approximately) 1444 bytes prior to my most recent change? Is it possible that r14-1452-gfb409a15d9babc caused the size to shrink, and then r14-1470-gb7b255e77a2719 caused it to grow again? > > [1] > https://git.linaro.org/toolchain/ci/interesting-commits.git/tree/gcc/sha1/b7b255e77a271974479c34d1db3daafc04b920bc/tcwg_bmk-code_size-cpu2017fast/status.txt > > [2] 641.leela_s is non-GPL/non-BSD benchmark, and I'm not sure if I can > post its compiled and/or preprocessed code publicly. I assume RedHat has > SPEC CPU2017 license, and I can post details to you privately. > > Kind regards, > > -- > Maxim Kuvyrkov > https://www.linaro.org > > > > > > On Jun 1, 2023, at 19:09, Jonathan Wakely via Gcc-patches < > gcc-patches@gcc.gnu.org> wrote: > > > > Tested powerpc64le-linux. Pusshed to trunk. > > > > -- >8 -- > > > > My r14-1452-gfb409a15d9babc change to add optimization hints to > > std::vector causes regressions because it makes std::vector::size() and > > std::vector::capacity() too big to inline. That's the opposite of what > > I wanted, so revert the changes to those functions. > > > > To achieve the original aim of optimizing vec.assign(vec.size(), x) we > > can add a local optimization hint to _M_fill_assign, so that it doesn't > > affect all other uses of size() and capacity(). > > > > Additionally, add the same hint to the _M_assign_aux overload for > > forward iterators and add that to the testcase. > > > > It would be nice to similarly optimize: > > if (vec1.size() == vec2.size()) vec1 = vec2; > > but adding hints to operator=(const vector&) doesn't help. Presumably > > the relationships between the two sizes and two capacities are too > > complex to track effectively. > > > > libstdc++-v3/ChangeLog: > > > > PR libstdc++/110060 > > * include/bits/stl_vector.h (_Vector_base::_M_invariant): > > Remove. > > (vector::size, vector::capacity): Remove calls to _M_invariant. > > * include/bits/vector.tcc (vector::_M_fill_assign): Add > > optimization hint to reallocating path. > > (vector::_M_assign_aux(FwdIter, FwdIter, forward_iterator_tag)): > > Likewise. > > * testsuite/23_containers/vector/capacity/invariant.cc: Moved > > to... > > * testsuite/23_containers/vector/modifiers/assign/no_realloc.cc: > > ...here. Check assign(FwdIter, FwdIter) too. > > * testsuite/23_containers/vector/types/1.cc: Revert addition > > of -Wno-stringop-overread option. > > --- > > libstdc++-v3/include/bits/stl_vector.h | 23 +------------------ > > libstdc++-v3/include/bits/vector.tcc | 17 ++++++++++---- > > .../assign/no_realloc.cc} | 6 +++++ > > .../testsuite/23_containers/vector/types/1.cc | 2 +- > > 4 files changed, 20 insertions(+), 28 deletions(-) > > rename > libstdc++-v3/testsuite/23_containers/vector/{capacity/invariant.cc => > modifiers/assign/no_realloc.cc} (70%) > > > > diff --git a/libstdc++-v3/include/bits/stl_vector.h > b/libstdc++-v3/include/bits/stl_vector.h > > index e593be443bc..70ced3d101f 100644 > > --- a/libstdc++-v3/include/bits/stl_vector.h > > +++ b/libstdc++-v3/include/bits/stl_vector.h > > @@ -389,23 +389,6 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > > > > protected: > > > > - __attribute__((__always_inline__)) > > - _GLIBCXX20_CONSTEXPR void > > - _M_invariant() const > > - { > > -#if __OPTIMIZE__ > > - if (this->_M_impl._M_finish < this->_M_impl._M_start) > > - __builtin_unreachable(); > > - if (this->_M_impl._M_finish > this->_M_impl._M_end_of_storage) > > - __builtin_unreachable(); > > - > > - size_t __sz = this->_M_impl._M_finish - this->_M_impl._M_start; > > - size_t __cap = this->_M_impl._M_end_of_storage - > this->_M_impl._M_start; > > - if (__sz > __cap) > > - __builtin_unreachable(); > > -#endif > > - } > > - > > _GLIBCXX20_CONSTEXPR > > void > > _M_create_storage(size_t __n) > > @@ -1005,10 +988,7 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > > _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR > > size_type > > size() const _GLIBCXX_NOEXCEPT > > - { > > - _Base::_M_invariant(); > > - return size_type(this->_M_impl._M_finish - this->_M_impl._M_start); > > - } > > + { return size_type(this->_M_impl._M_finish - > this->_M_impl._M_start); } > > > > /** Returns the size() of the largest possible %vector. */ > > _GLIBCXX_NODISCARD _GLIBCXX20_CONSTEXPR > > @@ -1095,7 +1075,6 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > > size_type > > capacity() const _GLIBCXX_NOEXCEPT > > { > > - _Base::_M_invariant(); > > return size_type(this->_M_impl._M_end_of_storage > > - this->_M_impl._M_start); > > } > > diff --git a/libstdc++-v3/include/bits/vector.tcc > b/libstdc++-v3/include/bits/vector.tcc > > index d6fdea2dd01..acd11e2dc68 100644 > > --- a/libstdc++-v3/include/bits/vector.tcc > > +++ b/libstdc++-v3/include/bits/vector.tcc > > @@ -270,15 +270,18 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > > vector<_Tp, _Alloc>:: > > _M_fill_assign(size_t __n, const value_type& __val) > > { > > + const size_type __sz = size(); > > if (__n > capacity()) > > { > > + if (__n <= __sz) > > + __builtin_unreachable(); > > vector __tmp(__n, __val, _M_get_Tp_allocator()); > > __tmp._M_impl._M_swap_data(this->_M_impl); > > } > > - else if (__n > size()) > > + else if (__n > __sz) > > { > > std::fill(begin(), end(), __val); > > - const size_type __add = __n - size(); > > + const size_type __add = __n - __sz; > > _GLIBCXX_ASAN_ANNOTATE_GROW(__add); > > this->_M_impl._M_finish = > > std::__uninitialized_fill_n_a(this->_M_impl._M_finish, > > @@ -316,10 +319,14 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > > _M_assign_aux(_ForwardIterator __first, _ForwardIterator __last, > > std::forward_iterator_tag) > > { > > + const size_type __sz = size(); > > const size_type __len = std::distance(__first, __last); > > > > if (__len > capacity()) > > { > > + if (__len <= __sz) > > + __builtin_unreachable(); > > + > > _S_check_init_len(__len, _M_get_Tp_allocator()); > > pointer __tmp(_M_allocate_and_copy(__len, __first, __last)); > > std::_Destroy(this->_M_impl._M_start, this->_M_impl._M_finish, > > @@ -332,14 +339,14 @@ _GLIBCXX_BEGIN_NAMESPACE_CONTAINER > > this->_M_impl._M_finish = this->_M_impl._M_start + __len; > > this->_M_impl._M_end_of_storage = this->_M_impl._M_finish; > > } > > - else if (size() >= __len) > > + else if (__sz >= __len) > > _M_erase_at_end(std::copy(__first, __last, this->_M_impl._M_start)); > > else > > { > > _ForwardIterator __mid = __first; > > - std::advance(__mid, size()); > > + std::advance(__mid, __sz); > > std::copy(__first, __mid, this->_M_impl._M_start); > > - const size_type __attribute__((__unused__)) __n = __len - size(); > > + const size_type __attribute__((__unused__)) __n = __len - __sz; > > _GLIBCXX_ASAN_ANNOTATE_GROW(__n); > > this->_M_impl._M_finish = > > std::__uninitialized_copy_a(__mid, __last, > > diff --git > a/libstdc++-v3/testsuite/23_containers/vector/capacity/invariant.cc > b/libstdc++-v3/testsuite/23_containers/vector/modifiers/assign/no_realloc.cc > > similarity index 70% > > rename from > libstdc++-v3/testsuite/23_containers/vector/capacity/invariant.cc > > rename to > libstdc++-v3/testsuite/23_containers/vector/modifiers/assign/no_realloc.cc > > index d68db694add..659f18dba56 100644 > > --- a/libstdc++-v3/testsuite/23_containers/vector/capacity/invariant.cc > > +++ > b/libstdc++-v3/testsuite/23_containers/vector/modifiers/assign/no_realloc.cc > > @@ -1,5 +1,6 @@ > > // { dg-do compile } > > // { dg-options "-O3 -g0" } > > +// { dg-require-normal-mode "" } > > // { dg-final { scan-assembler-not "_Znw" } } > > // GCC should be able to optimize away the paths involving reallocation. > > > > @@ -14,3 +15,8 @@ void fill_val(std::vector& vec, int i) > > { > > vec.assign(vec.size(), i); > > } > > + > > +void fill_range(std::vector& vec, const int* first) > > +{ > > + vec.assign(first, first + vec.size()); > > +} > > diff --git a/libstdc++-v3/testsuite/23_containers/vector/types/1.cc > b/libstdc++-v3/testsuite/23_containers/vector/types/1.cc > > index 9be07d9fd5c..079e5af9556 100644 > > --- a/libstdc++-v3/testsuite/23_containers/vector/types/1.cc > > +++ b/libstdc++-v3/testsuite/23_containers/vector/types/1.cc > > @@ -18,7 +18,7 @@ > > // . > > > > // { dg-do compile } > > -// { dg-options "-Wno-unused-result -Wno-stringop-overread" } > > +// { dg-options "-Wno-unused-result" } > > > > #include > > #include > > -- > > 2.40.1 > > > >