[Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3

public inbox for gcc-bugs@sourceware.org
help / color / mirror / Atom feed

* [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3
@ 2020-04-01 12:57 xiezhiheng at huawei dot com
  2020-04-01 21:12 ` [Bug tree-optimization/94442] " pinskia at gcc dot gnu.org
                   ` (14 more replies)
  0 siblings, 15 replies; 16+ messages in thread
From: xiezhiheng at huawei dot com @ 2020-04-01 12:57 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

            Bug ID: 94442
           Summary: [AArch64] Redundant ldp/stp instructions emitted at
                    -O3
           Product: gcc
           Version: 10.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: rtl-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: xiezhiheng at huawei dot com
  Target Milestone: ---
            Target: aarch64

Test case:

#include <arm_neon.h>

struct __m256i
{
  int8x16_t vect_s8[2];
};

__attribute__((inline)) __m256i _mm256_adds_epi8(__m256i a, __m256i b)
{
    __m256i res_m256i;
    res_m256i.vect_s8[0] = vqaddq_s8(a.vect_s8[0], b.vect_s8[0]);
    res_m256i.vect_s8[1] = vqaddq_s8(a.vect_s8[1], b.vect_s8[1]);
    return res_m256i;
}

void PerfTest1(__m256i *output, unsigned caseCount)
{
    unsigned loopCount = caseCount;
    __m256i& a = output[0];
    __m256i& b = output[1];
    __m256i& c = output[2];
    for (unsigned i = 0; i < loopCount; i++) {
        a = _mm256_adds_epi8(b, c);
        b = _mm256_adds_epi8(a, c);
        c = _mm256_adds_epi8(c, b);
        a = _mm256_adds_epi8(b, c);
        b = _mm256_adds_epi8(a, c);
        c = _mm256_adds_epi8(c, b);
        a = _mm256_adds_epi8(b, c);
        b = _mm256_adds_epi8(a, c);
        c = _mm256_adds_epi8(c, b);
        b = _mm256_adds_epi8(a, c);
    }
}

Command line (GCC version 10.0): aarch64-linux-gnu-g++ -S -O3 a.c

.L6:
        ldp     q3, q2, [x2]
        add     w4, w4, 1
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        stp     q1, q0, [x0]
        ldp     q3, q2, [x2]
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        stp     q1, q0, [x0, 32]
        ldp     q3, q2, [x2]
        sqadd   v3.16b, v3.16b, v1.16b
        sqadd   v2.16b, v2.16b, v0.16b
        stp     q3, q2, [x0, 64]
        ldp     q1, q0, [x3]
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        stp     q1, q0, [x0]
        ldp     q3, q2, [x2]
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        stp     q1, q0, [x0, 32]
        ldp     q3, q2, [x2]
        sqadd   v3.16b, v3.16b, v1.16b
        sqadd   v2.16b, v2.16b, v0.16b
        stp     q3, q2, [x0, 64]
        ldp     q1, q0, [x3]
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        stp     q1, q0, [x0]
        ldp     q2, q3, [x2]
        sqadd   v4.16b, v1.16b, v2.16b
        sqadd   v5.16b, v0.16b, v3.16b
        stp     q4, q5, [x0, 32]
        ldp     q2, q3, [x2]
        sqadd   v3.16b, v3.16b, v5.16b
        sqadd   v2.16b, v2.16b, v4.16b
        sqadd   v0.16b, v0.16b, v3.16b
        sqadd   v1.16b, v1.16b, v2.16b
        stp     q2, q3, [x0, 64]
        stp     q1, q0, [x0, 32]
        cmp     w1, w4
        bne     .L6

And command line (GCC version 10.0): aarch64-linux-gnu-g++ -S -O1 a.c
Or (GCC version 9.2.0): aarch64-linux-gnu-g++ -S -O3 a.c

.L4:
        ldr     q0, [x0, 48]
        ldr     q2, [x0, 80]
        ldr     q1, [x0, 32]
        ldr     q3, [x0, 64]
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        sqadd   v3.16b, v3.16b, v1.16b
        sqadd   v2.16b, v2.16b, v0.16b
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        sqadd   v3.16b, v3.16b, v1.16b
        sqadd   v2.16b, v2.16b, v0.16b
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        str     q1, [x0]
        str     q0, [x0, 16]
        sqadd   v5.16b, v1.16b, v3.16b
        sqadd   v4.16b, v0.16b, v2.16b
        sqadd   v3.16b, v3.16b, v5.16b
        sqadd   v2.16b, v2.16b, v4.16b
        str     q3, [x0, 64]
        str     q2, [x0, 80]
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        str     q1, [x0, 32]
        str     q0, [x0, 48]
        add     w3, w3, 1
        cmp     w1, w3
        bne     .L4

This issue triggers after commit
https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=3b47da42de621c6c3bf7d2f9245df989aa7eb5a1

This commit changes the gimple from
  a = MEM[(const struct __m256i &)output_5(D) + 32];
  a$vect_s8$0_4 = MEM <int8x16_t> [(const struct __m256i &)output_5(D) + 32];
  a$vect_s8$1_6 = MEM <int8x16_t> [(const struct __m256i &)output_5(D) + 48];
  b = MEM[(const struct __m256i &)output_5(D) + 64];
  b$vect_s8$0_9 = MEM <int8x16_t> [(const struct __m256i &)output_5(D) + 64];
  b$vect_s8$1_11 = MEM <int8x16_t> [(const struct __m256i &)output_5(D) + 80];
  _76 = a$vect_s8$0_4;
  _77 = b$vect_s8$0_9;
To
  a = MEM[(const struct __m256i &)output_5(D) + 32];
  a$vect_s8$0_4 = MEM[(const struct __m256i &)output_5(D) + 32].vect_s8[0]; 
<========
  a$vect_s8$1_6 = MEM[(const struct __m256i &)output_5(D) + 32].vect_s8[1]; 
<========
  b = MEM[(const struct __m256i &)output_5(D) + 64];
  b$vect_s8$0_9 = MEM[(const struct __m256i &)output_5(D) + 64].vect_s8[0]; 
<========
  b$vect_s8$1_11 = MEM[(const struct __m256i &)output_5(D) + 64].vect_s8[1]; 
<========
  _76 = a$vect_s8$0_4;
  _77 = b$vect_s8$0_9;

When expand to RTL, the latter form will emit two insns.
(insn 23 22 24 6 (set (reg/f:DI 140)
        (plus:DI (reg/v/f:DI 133 [ output ])
            (const_int 64 [0x40]))) -1
     (nil))
(insn 24 23 25 6 (set (reg:V16QI 94 [ b$vect_s8$1 ])
        (mem:V16QI (plus:DI (reg/f:DI 140)
                (const_int 16 [0x10])) [0 MEM[(const struct __m256i
&)output_5(D) + 64]+16 S16 A128])) -1
     (nil))

And later in rtl pre pass, insn 23 will be extracted outside the loop as a
common subexpression.
This will cause in dse pass it cannot determine whether the following two insns
reference the same location.
(insn 33 32 36 5 (set (mem:V16QI (plus:DI (reg/v/f:DI 133 [ output ])
                (const_int 16 [0x10])) [1 MEM <int8x16_t> [(struct __m256i
*)output_5(D) + 16B]+0 S16 A128])
        (reg:V16QI 114 [ _35 ])) "a.c":23:34 1203 {*aarch64_simd_movv16qi}
     (nil))
(insn 36 33 41 5 (set (reg:V16QI 116 [ b$vect_s8$1 ])
        (mem:V16QI (plus:DI (reg/f:DI 194)
                (const_int 16 [0x10])) [0 MEM[(const struct __m256i
&)output_5(D) + 64]+16 S16 A128])) 1203 {*aarch64_simd_movv16qi}
     (nil))

Because insn
(insn 140 5 130 4 (set (reg/f:DI 194)
        (plus:DI (reg/v/f:DI 133 [ output ])
            (const_int 64 [0x40]))) 121 {*adddi3_aarch64}
     (nil))

has just be extracted to another bb in rtl pre pass and dse pass is unable to
get this information.
Thus dse pass cannot eliminate these extra STRs.

I would like to solve this problem by propagating insn 23 to its use in fwprop
pass.
However, there exists some restrictions here. I try to modify like this:
diff --git a/gcc/fwprop.c b/gcc/fwprop.c
index 705d2885aae..0edbbc65047 100644
--- a/gcc/fwprop.c
+++ b/gcc/fwprop.c
@@ -416,7 +416,7 @@ should_replace_address (rtx old_rtx, rtx new_rtx,
machine_mode mode,
     gain = (set_src_cost (new_rtx, VOIDmode, speed)
            - set_src_cost (old_rtx, VOIDmode, speed));

-  return (gain > 0);
+  return (gain >= 0);
 }


@@ -1573,10 +1573,14 @@ fwprop (bool fwprop_addr_p)
       df_ref use = DF_USES_GET (i);
       if (use)
        {
+         df_ref def = get_def_for_use (use);
          if (DF_REF_TYPE (use) == DF_REF_REG_USE
              || DF_REF_BB (use)->loop_father == NULL
              /* The outer most loop is not really a loop.  */
-             || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
+             || loop_outer (DF_REF_BB (use)->loop_father) == NULL
+             || (def && (DF_REF_BB (def)->loop_father == DF_REF_BB
(use)->loop_father
+                         || flow_loop_nested_p (DF_REF_BB(use)->loop_father,
+                                               
DF_REF_BB(def)->loop_father))))
            forward_propagate_into (use, fwprop_addr_p);

          else if (fwprop_addr_p)

some discussion mails here
https://gcc.gnu.org/pipermail/gcc/2020-March/231980.html

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug tree-optimization/94442] [AArch64] Redundant ldp/stp instructions emitted at -O3
  2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
@ 2020-04-01 21:12 ` pinskia at gcc dot gnu.org
  2020-04-06 12:14 ` wdijkstr at arm dot com
                   ` (13 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: pinskia at gcc dot gnu.org @ 2020-04-01 21:12 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

Andrew Pinski <pinskia at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
          Component|rtl-optimization            |tree-optimization

--- Comment #1 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
Seems like there is a missed optimization on the gimple level also.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug tree-optimization/94442] [AArch64] Redundant ldp/stp instructions emitted at -O3
  2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
  2020-04-01 21:12 ` [Bug tree-optimization/94442] " pinskia at gcc dot gnu.org
@ 2020-04-06 12:14 ` wdijkstr at arm dot com
  2020-04-30  7:22 ` [Bug tree-optimization/94442] [10 regression] Redundant loads/stores " rguenth at gcc dot gnu.org
                   ` (12 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: wdijkstr at arm dot com @ 2020-04-06 12:14 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

Wilco <wdijkstr at arm dot com> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |wdijkstr at arm dot com

--- Comment #2 from Wilco <wdijkstr at arm dot com> ---
This should be marked as [10 regression].

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug tree-optimization/94442] [10 regression] Redundant loads/stores emitted at -O3
  2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
  2020-04-01 21:12 ` [Bug tree-optimization/94442] " pinskia at gcc dot gnu.org
  2020-04-06 12:14 ` wdijkstr at arm dot com
@ 2020-04-30  7:22 ` rguenth at gcc dot gnu.org
  2020-05-06  8:00 ` [Bug tree-optimization/94442] [10/11 " xiezhiheng at huawei dot com
                   ` (11 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: rguenth at gcc dot gnu.org @ 2020-04-30  7:22 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Target Milestone|---                         |10.0

--- Comment #3 from Richard Biener <rguenth at gcc dot gnu.org> ---
So I wonder why

  a$vect_s8$0_4 = MEM[(const struct __m256i &)output_5(D) + 32].vect_s8[0];  

necessarily emits two RTL insns.  It's likely because get_inner_reference
will not see through MEM[output_5(D) + 32] but records an extra offset
from the component-ref which we fail to fold into the MEM generated by
expansion of the MEM base.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug tree-optimization/94442] [10/11 regression] Redundant loads/stores emitted at -O3
  2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
                   ` (2 preceding siblings ...)
  2020-04-30  7:22 ` [Bug tree-optimization/94442] [10 regression] Redundant loads/stores " rguenth at gcc dot gnu.org
@ 2020-05-06  8:00 ` xiezhiheng at huawei dot com
  2020-05-07 11:56 ` jakub at gcc dot gnu.org
                   ` (10 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: xiezhiheng at huawei dot com @ 2020-05-06  8:00 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

--- Comment #4 from xiezhiheng at huawei dot com ---
(In reply to Richard Biener from comment #3)
> So I wonder why
> 
>   a$vect_s8$0_4 = MEM[(const struct __m256i &)output_5(D) + 32].vect_s8[0];  
> 
> necessarily emits two RTL insns.  It's likely because get_inner_reference
> will not see through MEM[output_5(D) + 32] but records an extra offset
> from the component-ref which we fail to fold into the MEM generated by
> expansion of the MEM base.

Indeed, get_inner_reference only handles the decl for MEM[&decl, off]
  case MEM_REF:
    /* Hand back the decl for MEM[&decl, off].  */
    if (TREE_CODE (TREE_OPERAND (exp, 0)) == ADDR_EXPR)
      {
        tree off = TREE_OPERAND (exp, 1);
        if (!integer_zerop (off))
          {
            poly_offset_int boff = mem_ref_offset (exp);
            boff <<= LOG2_BITS_PER_UNIT;
            bit_offset += boff;
          }
        exp = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
      }
    goto done;

In
  MEM[(const struct __m256i &)output_5(D) + 32].vect_s8[0];
output_5 is a SSA_NAME.
So maybe we could expand to handle the situation like MEM[decl, off]

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug tree-optimization/94442] [10/11 regression] Redundant loads/stores emitted at -O3
  2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
                   ` (3 preceding siblings ...)
  2020-05-06  8:00 ` [Bug tree-optimization/94442] [10/11 " xiezhiheng at huawei dot com
@ 2020-05-07 11:56 ` jakub at gcc dot gnu.org
  2020-06-29  2:04 ` xiezhiheng at huawei dot com
                   ` (9 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: jakub at gcc dot gnu.org @ 2020-05-07 11:56 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

Jakub Jelinek <jakub at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Target Milestone|10.0                        |10.2

--- Comment #5 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
GCC 10.1 has been released.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug tree-optimization/94442] [10/11 regression] Redundant loads/stores emitted at -O3
  2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
                   ` (4 preceding siblings ...)
  2020-05-07 11:56 ` jakub at gcc dot gnu.org
@ 2020-06-29  2:04 ` xiezhiheng at huawei dot com
  2020-07-23  6:52 ` rguenth at gcc dot gnu.org
                   ` (8 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: xiezhiheng at huawei dot com @ 2020-06-29  2:04 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

--- Comment #6 from xiezhiheng at huawei dot com ---
I'm trying to modify get_inner_reference to handle the case
for MEM[ptr, off].

I extract the "off" and add it to the recorded offset, then I
build a MEM[ptr, 0] and return it later.

Like this
                }
              exp = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
            }
+         else if (TREE_CODE (TREE_OPERAND (exp, 0)) == SSA_NAME)
+           {
+             tree off = TREE_OPERAND (exp, 1);
+             if (!integer_zerop (off))
+               {
+                 poly_offset_int boff = mem_ref_offset (exp);
+                 boff <<= LOG2_BITS_PER_UNIT;
+                 bit_offset += boff;
+
+                 exp = build2 (MEM_REF, TREE_TYPE (exp),
+                               TREE_OPERAND (exp, 0),
+                               build_int_cst (TREE_TYPE (off), 0));
+               }
+           }
          goto done;

        default:

Assembly with the patch looks like:
.L6:
        ldp     q3, q2, [x0, 64]
        add     w2, w2, 1
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        sqadd   v3.16b, v3.16b, v1.16b
        sqadd   v2.16b, v2.16b, v0.16b
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        sqadd   v3.16b, v3.16b, v1.16b
        sqadd   v2.16b, v2.16b, v0.16b
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        sqadd   v5.16b, v1.16b, v3.16b
        sqadd   v4.16b, v0.16b, v2.16b
        stp     q1, q0, [x0]
        sqadd   v3.16b, v3.16b, v5.16b
        sqadd   v2.16b, v2.16b, v4.16b
        sqadd   v1.16b, v1.16b, v3.16b
        sqadd   v0.16b, v0.16b, v2.16b
        stp     q3, q2, [x0, 64]
        stp     q1, q0, [x0, 32]
        cmp     w1, w2
        bne     .L6

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug tree-optimization/94442] [10/11 regression] Redundant loads/stores emitted at -O3
  2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
                   ` (5 preceding siblings ...)
  2020-06-29  2:04 ` xiezhiheng at huawei dot com
@ 2020-07-23  6:52 ` rguenth at gcc dot gnu.org
  2021-01-14  8:36 ` [Bug middle-end/94442] " rguenth at gcc dot gnu.org
                   ` (7 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: rguenth at gcc dot gnu.org @ 2020-07-23  6:52 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Target Milestone|10.2                        |10.3

--- Comment #7 from Richard Biener <rguenth at gcc dot gnu.org> ---
GCC 10.2 is released, adjusting target milestone.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94442] [10/11 regression] Redundant loads/stores emitted at -O3
  2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
                   ` (6 preceding siblings ...)
  2020-07-23  6:52 ` rguenth at gcc dot gnu.org
@ 2021-01-14  8:36 ` rguenth at gcc dot gnu.org
  2021-01-14  8:36 ` rguenth at gcc dot gnu.org
                   ` (6 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: rguenth at gcc dot gnu.org @ 2021-01-14  8:36 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
          Component|tree-optimization           |middle-end
           Keywords|alias                       |
           Priority|P3                          |P2

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94442] [10/11 regression] Redundant loads/stores emitted at -O3
  2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
                   ` (7 preceding siblings ...)
  2021-01-14  8:36 ` [Bug middle-end/94442] " rguenth at gcc dot gnu.org
@ 2021-01-14  8:36 ` rguenth at gcc dot gnu.org
  2021-02-25 14:36 ` jakub at gcc dot gnu.org
                   ` (5 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: rguenth at gcc dot gnu.org @ 2021-01-14  8:36 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Last reconfirmed|                            |2021-01-14
             Status|UNCONFIRMED                 |NEW
     Ever confirmed|0                           |1

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94442] [10/11 regression] Redundant loads/stores emitted at -O3
  2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
                   ` (8 preceding siblings ...)
  2021-01-14  8:36 ` rguenth at gcc dot gnu.org
@ 2021-02-25 14:36 ` jakub at gcc dot gnu.org
  2021-02-27  8:46 ` xiezhiheng at huawei dot com
                   ` (4 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: jakub at gcc dot gnu.org @ 2021-02-25 14:36 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

Jakub Jelinek <jakub at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
                 CC|                            |jakub at gcc dot gnu.org

--- Comment #8 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
So, is this fixed by any of the
r11-2190-gbf592b2ff776aef71c91924cdb5e0d10488496cf
r11-2448-g072a8b8fb6e861d8ac2db847bcc81dbcb1ef1b35
r11-2554-g35ffd4d16d7e3dbba297da788414a673530b7817
r11-2874-ge3684bcbf88b438ca1f0749de8843ddd5b72ad59
r11-2901-gd7738d4fde5b248b6814f5dd20617eecd33601df
r11-2902-g795944c4563b4d9abf6d4bd9963f41fa1249d9d9
r11-3844-gca4938fa8e0e72fd59307f1f058db800c1e4a8f3
r11-4131-g4fb0ee84ad8c9b789e2465c85ea048e3320365b0
r11-4384-g2d5aad691f5bd605cfc27ce16a1f2d023cd21f75
r11-4565-gc517003e719cb045d755dd4b074a1306d5567be4
r11-4665-gc229693ba6f5abb245fc71ebef4b8f7720e8ccf5
r11-4666-g60be12c32cb3a07a64efdab1f0ee6fd74536cc93
r11-4875-g1900707e56ae8c913f1d16426065e128b1abbb14
commits that refer to this PR number but none of them referred to it in
ChangeLog entry, or not?

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94442] [10/11 regression] Redundant loads/stores emitted at -O3
  2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
                   ` (9 preceding siblings ...)
  2021-02-25 14:36 ` jakub at gcc dot gnu.org
@ 2021-02-27  8:46 ` xiezhiheng at huawei dot com
  2021-04-08 12:02 ` rguenth at gcc dot gnu.org
                   ` (3 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: xiezhiheng at huawei dot com @ 2021-02-27  8:46 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

--- Comment #9 from xiezhiheng at huawei dot com ---
(In reply to Jakub Jelinek from comment #8)
> So, is this fixed by any of the
> r11-2190-gbf592b2ff776aef71c91924cdb5e0d10488496cf
> r11-2448-g072a8b8fb6e861d8ac2db847bcc81dbcb1ef1b35
> r11-2554-g35ffd4d16d7e3dbba297da788414a673530b7817
> r11-2874-ge3684bcbf88b438ca1f0749de8843ddd5b72ad59
> r11-2901-gd7738d4fde5b248b6814f5dd20617eecd33601df
> r11-2902-g795944c4563b4d9abf6d4bd9963f41fa1249d9d9
> r11-3844-gca4938fa8e0e72fd59307f1f058db800c1e4a8f3
> r11-4131-g4fb0ee84ad8c9b789e2465c85ea048e3320365b0
> r11-4384-g2d5aad691f5bd605cfc27ce16a1f2d023cd21f75
> r11-4565-gc517003e719cb045d755dd4b074a1306d5567be4
> r11-4665-gc229693ba6f5abb245fc71ebef4b8f7720e8ccf5
> r11-4666-g60be12c32cb3a07a64efdab1f0ee6fd74536cc93
> r11-4875-g1900707e56ae8c913f1d16426065e128b1abbb14
> commits that refer to this PR number but none of them referred to it in
> ChangeLog entry, or not?

Not yet.  We fixed part of the intrinsics.
But like saturating intrinsics (used in this PR), because GCC does not
model fpsr register but tests it in some test cases, simply setting
the FLAG of saturating intrinsics to none would cause these test cases to fail.
So it need take further considerations for saturating intrinsics.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94442] [10/11 regression] Redundant loads/stores emitted at -O3
  2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
                   ` (10 preceding siblings ...)
  2021-02-27  8:46 ` xiezhiheng at huawei dot com
@ 2021-04-08 12:02 ` rguenth at gcc dot gnu.org
  2022-06-28 10:40 ` [Bug middle-end/94442] [10/11/12/13 " jakub at gcc dot gnu.org
                   ` (2 subsequent siblings)
  14 siblings, 0 replies; 16+ messages in thread
From: rguenth at gcc dot gnu.org @ 2021-04-08 12:02 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Target Milestone|10.3                        |10.4

--- Comment #10 from Richard Biener <rguenth at gcc dot gnu.org> ---
GCC 10.3 is being released, retargeting bugs to GCC 10.4.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94442] [10/11/12/13 regression] Redundant loads/stores emitted at -O3
  2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
                   ` (11 preceding siblings ...)
  2021-04-08 12:02 ` rguenth at gcc dot gnu.org
@ 2022-06-28 10:40 ` jakub at gcc dot gnu.org
  2023-07-07 10:37 ` [Bug middle-end/94442] [11/12/13/14 " rguenth at gcc dot gnu.org
  2023-08-04 17:21 ` pinskia at gcc dot gnu.org
  14 siblings, 0 replies; 16+ messages in thread
From: jakub at gcc dot gnu.org @ 2022-06-28 10:40 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

Jakub Jelinek <jakub at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Target Milestone|10.4                        |10.5

--- Comment #11 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
GCC 10.4 is being released, retargeting bugs to GCC 10.5.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94442] [11/12/13/14 regression] Redundant loads/stores emitted at -O3
  2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
                   ` (12 preceding siblings ...)
  2022-06-28 10:40 ` [Bug middle-end/94442] [10/11/12/13 " jakub at gcc dot gnu.org
@ 2023-07-07 10:37 ` rguenth at gcc dot gnu.org
  2023-08-04 17:21 ` pinskia at gcc dot gnu.org
  14 siblings, 0 replies; 16+ messages in thread
From: rguenth at gcc dot gnu.org @ 2023-07-07 10:37 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

Richard Biener <rguenth at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
   Target Milestone|10.5                        |11.5

--- Comment #12 from Richard Biener <rguenth at gcc dot gnu.org> ---
GCC 10 branch is being closed.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [Bug middle-end/94442] [11/12/13/14 regression] Redundant loads/stores emitted at -O3
  2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
                   ` (13 preceding siblings ...)
  2023-07-07 10:37 ` [Bug middle-end/94442] [11/12/13/14 " rguenth at gcc dot gnu.org
@ 2023-08-04 17:21 ` pinskia at gcc dot gnu.org
  14 siblings, 0 replies; 16+ messages in thread
From: pinskia at gcc dot gnu.org @ 2023-08-04 17:21 UTC (permalink / raw)
  To: gcc-bugs

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94442

Andrew Pinski <pinskia at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|NEW                         |RESOLVED
   Target Milestone|11.5                        |11.0
         Resolution|---                         |FIXED

--- Comment #13 from Andrew Pinski <pinskia at gcc dot gnu.org> ---
Fixed by r11-6794-g04b472ad0e1dc93abafe .

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2023-08-04 17:22 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-04-01 12:57 [Bug rtl-optimization/94442] New: [AArch64] Redundant ldp/stp instructions emitted at -O3 xiezhiheng at huawei dot com
2020-04-01 21:12 ` [Bug tree-optimization/94442] " pinskia at gcc dot gnu.org
2020-04-06 12:14 ` wdijkstr at arm dot com
2020-04-30  7:22 ` [Bug tree-optimization/94442] [10 regression] Redundant loads/stores " rguenth at gcc dot gnu.org
2020-05-06  8:00 ` [Bug tree-optimization/94442] [10/11 " xiezhiheng at huawei dot com
2020-05-07 11:56 ` jakub at gcc dot gnu.org
2020-06-29  2:04 ` xiezhiheng at huawei dot com
2020-07-23  6:52 ` rguenth at gcc dot gnu.org
2021-01-14  8:36 ` [Bug middle-end/94442] " rguenth at gcc dot gnu.org
2021-01-14  8:36 ` rguenth at gcc dot gnu.org
2021-02-25 14:36 ` jakub at gcc dot gnu.org
2021-02-27  8:46 ` xiezhiheng at huawei dot com
2021-04-08 12:02 ` rguenth at gcc dot gnu.org
2022-06-28 10:40 ` [Bug middle-end/94442] [10/11/12/13 " jakub at gcc dot gnu.org
2023-07-07 10:37 ` [Bug middle-end/94442] [11/12/13/14 " rguenth at gcc dot gnu.org
2023-08-04 17:21 ` pinskia at gcc dot gnu.org

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).