From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 48) id DB81C3858D1E; Wed, 2 Aug 2023 19:26:20 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org DB81C3858D1E DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1691004380; bh=Ye6OQ69648NW8ft1Pi+QDRXYFEaemBJKGGtI758uIu4=; h=From:To:Subject:Date:From; b=DuK0AkOA8DQ+TJuZJ7KGTvbyfzeN+Jl++CTZNoi3uXWskKAS+5UYrFWv/4UuKq0eg +ASo8z/l/ne/UENGfgS+Lkg15rGl6AIF0LSMWDESIG1p691/HwNPf9/Cc+8Z5vYGVH wvQW4EbojA5NZA3zu3POcr34oJK9QqNYTGE6hKuw= From: "palevichva at gmail dot com" To: gcc-bugs@gcc.gnu.org Subject: [Bug c++/110879] New: Unnecessary reread from memory in a loop Date: Wed, 02 Aug 2023 19:26:20 +0000 X-Bugzilla-Reason: CC X-Bugzilla-Type: new X-Bugzilla-Watch-Reason: None X-Bugzilla-Product: gcc X-Bugzilla-Component: c++ X-Bugzilla-Version: 14.0 X-Bugzilla-Keywords: X-Bugzilla-Severity: normal X-Bugzilla-Who: palevichva at gmail dot com X-Bugzilla-Status: UNCONFIRMED X-Bugzilla-Resolution: X-Bugzilla-Priority: P3 X-Bugzilla-Assigned-To: unassigned at gcc dot gnu.org X-Bugzilla-Target-Milestone: --- X-Bugzilla-Flags: X-Bugzilla-Changed-Fields: bug_id short_desc product version bug_status bug_severity priority component assigned_to reporter target_milestone attachments.created Message-ID: Content-Type: text/plain; charset="UTF-8" Content-Transfer-Encoding: quoted-printable X-Bugzilla-URL: http://gcc.gnu.org/bugzilla/ Auto-Submitted: auto-generated MIME-Version: 1.0 List-Id: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=3D110879 Bug ID: 110879 Summary: Unnecessary reread from memory in a loop Product: gcc Version: 14.0 Status: UNCONFIRMED Severity: normal Priority: P3 Component: c++ Assignee: unassigned at gcc dot gnu.org Reporter: palevichva at gmail dot com Target Milestone: --- Created attachment 55678 --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=3D55678&action=3Dedit preprocessed file by g++ from revision dd2eb972a I've found a strange regression in optimization. Trunk version of g++ produ= ces less optimal assembly. It rereads same memory location in every iteration o= f a loop. More specifically, it rereads fields _M_finish and _M_end_of_storage = of a vector from memory every push_back call, although it is not necessary. Released version 13.2 doesn't do that, and just uses values from registers. I'm compiling following code: #include std::vector f(std::size_t n) { std::vector res; res.reserve(n); for (std::size_t i =3D 0; i < n; ++i) { res.push_back(i*i); } return res; } The main body of a loop looks like this: ~/.local/gcc/bin/g++ -S -fverbose-asm -O3 -std=3Dc++20 pb.cpp >.L41: ># /home/scaiper/.local/gcc/include/c++/14.0.0/bits/stl_construct.h:97: = { return ::new((void*)__location) _Tp(std::forward<_Args>(__args)...); } > movl %r15d, (%rbx) # _3, *prephitmp_51 ># /home/scaiper/.local/gcc/include/c++/14.0.0/bits/vector.tcc:119: = ++this->_M_impl._M_finish; > addq $4, %rbx #, tmp135 > movq %rbx, 8(%rbp) # tmp135, res_8(D)->D.35756._M_impl.D.3506= 7._M_finish >.L8: ># pb.cpp:6: for (std::size_t i =3D 0; i < n; ++i) { > addq $1, %r13 #, i ># pb.cpp:6: for (std::size_t i =3D 0; i < n; ++i) { > cmpq %r13, %r12 # i, n > je .L1 #, ># /home/scaiper/.local/gcc/include/c++/14.0.0/bits/vector.tcc:114: if= (this->_M_impl._M_finish !=3D this->_M_impl._M_end_of_storage) > movq 8(%rbp), %rbx # res_8(D)->D.35756._M_impl.D.35067._M_fin= ish, prephitmp_51 ># /home/scaiper/.local/gcc/include/c++/14.0.0/bits/vector.tcc:114: if= (this->_M_impl._M_finish !=3D this->_M_impl._M_end_of_storage) > movq 16(%rbp), %rax # res_8(D)->D.35756._M_impl.D.35067._M_end= _of_storage, pretmp_52 >.L16: ># pb.cpp:7: res.push_back(i*i); > movl %r13d, %r15d # i, _3 > imull %r13d, %r15d # i, _3 ># /home/scaiper/.local/gcc/include/c++/14.0.0/bits/vector.tcc:114: if= (this->_M_impl._M_finish !=3D this->_M_impl._M_end_of_storage) > cmpq %rax, %rbx # pretmp_52, prephitmp_51 > jne .L41 #, Same loop as produced by 13.2: ~/.local/gcc-13.2/bin/g++ -v -S -fverbose-asm -O3 -std=3Dc++20 pb.cpp >.L43: ># /home/scaiper/.local/gcc-13.2/include/c++/13.2.0/bits/stl_construct.h:97= : { return ::new((void*)__location) _Tp(std::forward<_Args>(__args)...)= ; } > movl %r12d, (%rcx) # _3, *prephitmp_4 ># /home/scaiper/.local/gcc-13.2/include/c++/13.2.0/bits/vector.tcc:119: = ++this->_M_impl._M_finish; > addq $4, %rcx #, prephitmp_4 > movq %rcx, 8(%rbp) # prephitmp_4, res_8(D)->D.35699._M_impl.D= .35010._M_finish >.L8: ># pb.cpp:6: for (std::size_t i =3D 0; i < n; ++i) { > addq $1, %rbx #, i ># pb.cpp:6: for (std::size_t i =3D 0; i < n; ++i) { > cmpq %rbx, %r13 # i, n > je .L1 #, >.L18: ># pb.cpp:7: res.push_back(i*i); > movl %ebx, %r12d # i, _3 > imull %ebx, %r12d # i, _3 ># /home/scaiper/.local/gcc-13.2/include/c++/13.2.0/bits/vector.tcc:114: = if (this->_M_impl._M_finish !=3D this->_M_impl._M_end_of_storage) > cmpq %r8, %rcx # prephitmp_74, prephitmp_4 > jne .L43 #, Notice this extra commands in the first snippet: movq 8(%rbp), %rbx movq 16(%rbp), %rax I've bisected this problem to the commit dd2eb972a (libstdc++: Use RAII in std::vector::_M_realloc_insert) (https://gcc.gnu.org/git/?p=3Dgcc.git;a=3Dcommit;h=3Ddd2eb972a5b063e10c8387= 8d5c9336a818fa8291). It doesn't look like commit is the problem. Code looks pretty equivalent. B= ut for some reason compiler produces different result. I'm using version built from aforementioned commit dd2eb972a: Target: x86_64-pc-linux-gnu Configured with: ../gcc/configure --enable-languages=3Dc++ --disable-multil= ib --prefix=3D/home/scaiper/.local/gcc gcc version 14.0.0 20230623 (experimental) (GCC) COLLECT_GCC_OPTIONS=3D'-v' '-S' '-fverbose-asm' '-O3' '-std=3Dc++20' '-shared-libgcc' '-mtune=3Dgeneric' '-march=3Dx86-64' Comparing with 13.2: Target: x86_64-pc-linux-gnu Configured with: ../gcc/configure --enable-languages=3Dc++ --disable-multil= ib --prefix=3D/home/scaiper/.local/gcc-13.2 gcc version 13.2.0 (GCC) COLLECT_GCC_OPTIONS=3D'-v' '-S' '-fverbose-asm' '-O3' '-std=3Dc++20' '-shared-libgcc' '-mtune=3Dgeneric' '-march=3Dx86-64'=