public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed
* [Bug rtl-optimization/108707] New: suboptimal allocation with same memory op for many different instructions.
@ 2023-02-08  3:11 crazylht at gmail dot com
  2023-02-08  3:15 ` [Bug rtl-optimization/108707] " crazylht at gmail dot com
                   ` (7 more replies)
  0 siblings, 8 replies; 9+ messages in thread
From: crazylht at gmail dot com @ 2023-02-08  3:11 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108707

            Bug ID: 108707
           Summary: suboptimal allocation with same memory op for many
                    different instructions.
           Product: gcc
           Version: 13.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: crazylht at gmail dot com
  Target Milestone: ---

#include<immintrin.h>

void
foo (__m512* pv, float* __restrict ps, int n, __m512* pdest,
__m512* p1, __m512* p2, __m512* p3)
{
    __m512 a = _mm512_setzero_ps ();
    __m512 b = a;
    __m512 c = a;
    for (int i = 0; i != n ;i++)
    {
        a = _mm512_fmadd_ps (p1[i], pv[i], a);
        b = _mm512_fmadd_ps (p2[i], pv[i], b);
        c = _mm512_fmadd_ps (p3[i], pv[i], c);
    }
    pdest[0] = a;
    pdest[1] = b;
    pdest[2] = c;
}

g++ -O2 -mavx512f -S

got 

.L3:
        vmovaps (%r8,%rax), %zmm3
        vmovaps (%r9,%rax), %zmm4
        vmovaps (%rsi,%rax), %zmm5
        vfmadd231ps     (%rdi,%rax), %zmm3, %zmm2
        vfmadd231ps     (%rdi,%rax), %zmm4, %zmm1
        vfmadd231ps     (%rdi,%rax), %zmm5, %zmm0
        addq    $64, %rax
        cmpq    %rax, %rdx
        jne     .L3

It would be better to load (%rdi, %rax) into a zmm then

.L3:
        vmovaps (%rdi,%rax), %zmm0
        vfmadd231ps     (%r8,%rax), %zmm0, %zmm3
        vfmadd231ps     (%r9,%rax), %zmm0, %zmm2
        vfmadd231ps     (%rsi,%rax), %zmm0, %zmm1
        addq    $64, %rax
        cmpq    %rax, %rdx
        jne     .L3

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/108707] suboptimal allocation with same memory op for many different instructions.
  2023-02-08  3:11 [Bug rtl-optimization/108707] New: suboptimal allocation with same memory op for many different instructions crazylht at gmail dot com
@ 2023-02-08  3:15 ` crazylht at gmail dot com
  2023-02-09 13:38 ` rguenth at gcc dot gnu.org
                   ` (6 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: crazylht at gmail dot com @ 2023-02-08  3:15 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108707

Hongtao.liu <crazylht at gmail dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Target|                            |x86_64-*-* i?86-*-*

--- Comment #1 from Hongtao.liu <crazylht at gmail dot com> ---
It's related to memory_move_cost, in RA

1466  /* If this insn loads a parameter from its stack slot, then it
1467     represents a savings, rather than a cost, if the parameter is
1468     stored in memory.  Record this fact.
1469
1470     Similarly if we're loading other constants from memory (constant
1471     pool, TOC references, small data areas, etc) and this is the only
1472     assignment to the destination pseudo.
1473
1474     Don't do this if SET_SRC (set) isn't a general operand, if it is
1475     a memory requiring special instructions to load it, decreasing
1476     mem_cost might result in it being loaded using the specialized
1477     instruction into a register, then stored into stack and loaded
1478     again from the stack.  See PR52208.
1479
1480     Don't do this if SET_SRC (set) has side effect.  See PR56124.  */
1481  if (set != 0 && REG_P (SET_DEST (set)) && MEM_P (SET_SRC (set))
1482      && (note = find_reg_note (insn, REG_EQUIV, NULL_RTX)) != NULL_RTX
1483      && ((MEM_P (XEXP (note, 0))
1484           && !side_effects_p (SET_SRC (set)))
1485          || (CONSTANT_P (XEXP (note, 0))
1486              && targetm.legitimate_constant_p (GET_MODE (SET_DEST (set)),
1487                                                XEXP (note, 0))
1488              && REG_N_SETS (REGNO (SET_DEST (set))) == 1))
1489      && general_operand (SET_SRC (set), GET_MODE (SET_SRC (set)))
1490      /* LRA does not use equiv with a symbol for PIC code.  */
1491      && (! ira_use_lra_p || ! pic_offset_table_rtx
1492          || ! contains_symbol_ref_p (XEXP (note, 0))))
1493    {
1494      enum reg_class cl = GENERAL_REGS;
1495      rtx reg = SET_DEST (set);
1496      int num = COST_INDEX (REGNO (reg));
1497
1498      COSTS (costs, num)->mem_cost
1499        -= ira_memory_move_cost[GET_MODE (reg)][cl][1] * frequency;
1500      record_address_regs (GET_MODE (SET_SRC (set)),
1501                           MEM_ADDR_SPACE (SET_SRC (set)),
1502                           XEXP (SET_SRC (set), 0), 0, MEM, SCRATCH,
1503                           frequency * 2);
1504      counted_mem = true;

we use GENERAL_REGS for E_V16SFmode move cost which should be inaccurate, i
think when preferred regclass is unknown, we'd better use NO_REGS.

 588            /* Costs for NO_REGS are used in cost calculation on the
 589               1st pass when the preferred register classes are not
 590               known yet.  In this case we take the best scenario.  */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/108707] suboptimal allocation with same memory op for many different instructions.
  2023-02-08  3:11 [Bug rtl-optimization/108707] New: suboptimal allocation with same memory op for many different instructions crazylht at gmail dot com
  2023-02-08  3:15 ` [Bug rtl-optimization/108707] " crazylht at gmail dot com
@ 2023-02-09 13:38 ` rguenth at gcc dot gnu.org
  2023-03-09  5:58 ` crazylht at gmail dot com
                   ` (5 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-02-09 13:38 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108707

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|UNCONFIRMED                 |NEW
     Ever confirmed|0                           |1
           Keywords|                            |missed-optimization, ra
   Last reconfirmed|                            |2023-02-09

--- Comment #2 from Richard Biener <rguenth at gcc dot gnu.org> ---
I've filed a duplicate PR about this at some point, but I can't find it right
now.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/108707] suboptimal allocation with same memory op for many different instructions.
  2023-02-08  3:11 [Bug rtl-optimization/108707] New: suboptimal allocation with same memory op for many different instructions crazylht at gmail dot com
  2023-02-08  3:15 ` [Bug rtl-optimization/108707] " crazylht at gmail dot com
  2023-02-09 13:38 ` rguenth at gcc dot gnu.org
@ 2023-03-09  5:58 ` crazylht at gmail dot com
  2023-03-09  5:59 ` crazylht at gmail dot com
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: crazylht at gmail dot com @ 2023-03-09  5:58 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108707

--- Comment #3 from Hongtao.liu <crazylht at gmail dot com> ---
Created attachment 54614
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=54614&action=edit
0001.patch

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/108707] suboptimal allocation with same memory op for many different instructions.
  2023-02-08  3:11 [Bug rtl-optimization/108707] New: suboptimal allocation with same memory op for many different instructions crazylht at gmail dot com
                   ` (2 preceding siblings ...)
  2023-03-09  5:58 ` crazylht at gmail dot com
@ 2023-03-09  5:59 ` crazylht at gmail dot com
  2023-03-09  5:59 ` crazylht at gmail dot com
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: crazylht at gmail dot com @ 2023-03-09  5:59 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108707

--- Comment #4 from Hongtao.liu <crazylht at gmail dot com> ---
Created attachment 54615
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=54615&action=edit
0002.patch

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/108707] suboptimal allocation with same memory op for many different instructions.
  2023-02-08  3:11 [Bug rtl-optimization/108707] New: suboptimal allocation with same memory op for many different instructions crazylht at gmail dot com
                   ` (3 preceding siblings ...)
  2023-03-09  5:59 ` crazylht at gmail dot com
@ 2023-03-09  5:59 ` crazylht at gmail dot com
  2023-03-09  6:00 ` crazylht at gmail dot com
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 9+ messages in thread
From: crazylht at gmail dot com @ 2023-03-09  5:59 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108707

--- Comment #5 from Hongtao.liu <crazylht at gmail dot com> ---
Created attachment 54616
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=54616&action=edit
0003.patch

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/108707] suboptimal allocation with same memory op for many different instructions.
  2023-02-08  3:11 [Bug rtl-optimization/108707] New: suboptimal allocation with same memory op for many different instructions crazylht at gmail dot com
                   ` (4 preceding siblings ...)
  2023-03-09  5:59 ` crazylht at gmail dot com
@ 2023-03-09  6:00 ` crazylht at gmail dot com
  2023-04-23  1:57 ` cvs-commit at gcc dot gnu.org
  2023-11-01  5:29 ` crazylht at gmail dot com
  7 siblings, 0 replies; 9+ messages in thread
From: crazylht at gmail dot com @ 2023-03-09  6:00 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108707

--- Comment #6 from Hongtao.liu <crazylht at gmail dot com> ---
3 patches pending for GCC14.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/108707] suboptimal allocation with same memory op for many different instructions.
  2023-02-08  3:11 [Bug rtl-optimization/108707] New: suboptimal allocation with same memory op for many different instructions crazylht at gmail dot com
                   ` (5 preceding siblings ...)
  2023-03-09  6:00 ` crazylht at gmail dot com
@ 2023-04-23  1:57 ` cvs-commit at gcc dot gnu.org
  2023-11-01  5:29 ` crazylht at gmail dot com
  7 siblings, 0 replies; 9+ messages in thread
From: cvs-commit at gcc dot gnu.org @ 2023-04-23  1:57 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108707

--- Comment #7 from CVS Commits <cvs-commit at gcc dot gnu.org> ---
The master branch has been updated by hongtao Liu <liuhongt@gcc.gnu.org>:

https://gcc.gnu.org/g:0368d169492017cfab5622d38b15be94154d458c

commit r14-172-g0368d169492017cfab5622d38b15be94154d458c
Author: liuhongt <hongtao.liu@intel.com>
Date:   Wed Feb 8 12:42:27 2023 +0800

    Use NO_REGS in cost calculation when the preferred register class are not
known yet.

    gcc/ChangeLog:

            PR rtl-optimization/108707
            * ira-costs.cc (scan_one_insn): Use NO_REGS instead of
            GENERAL_REGS when preferred reg_class is not known.

    gcc/testsuite/ChangeLog:

            * gcc.target/i386/pr108707.c: New test.

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [Bug rtl-optimization/108707] suboptimal allocation with same memory op for many different instructions.
  2023-02-08  3:11 [Bug rtl-optimization/108707] New: suboptimal allocation with same memory op for many different instructions crazylht at gmail dot com
                   ` (6 preceding siblings ...)
  2023-04-23  1:57 ` cvs-commit at gcc dot gnu.org
@ 2023-11-01  5:29 ` crazylht at gmail dot com
  7 siblings, 0 replies; 9+ messages in thread
From: crazylht at gmail dot com @ 2023-11-01  5:29 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108707

Hongtao.liu <crazylht at gmail dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |RESOLVED
         Resolution|---                         |FIXED

--- Comment #8 from Hongtao.liu <crazylht at gmail dot com> ---
Fixed in GCC14.

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2023-11-01  5:29 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2023-02-08  3:11 [Bug rtl-optimization/108707] New: suboptimal allocation with same memory op for many different instructions crazylht at gmail dot com
2023-02-08  3:15 ` [Bug rtl-optimization/108707] " crazylht at gmail dot com
2023-02-09 13:38 ` rguenth at gcc dot gnu.org
2023-03-09  5:58 ` crazylht at gmail dot com
2023-03-09  5:59 ` crazylht at gmail dot com
2023-03-09  5:59 ` crazylht at gmail dot com
2023-03-09  6:00 ` crazylht at gmail dot com
2023-04-23  1:57 ` cvs-commit at gcc dot gnu.org
2023-11-01  5:29 ` crazylht at gmail dot com

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).