public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] [RTL/fwprop] Allow propagations from inner loop to outer loop.
@ 2022-01-05  5:38 liuhongt
  2022-01-05  8:07 ` Richard Biener
  0 siblings, 1 reply; 6+ messages in thread
From: liuhongt @ 2022-01-05  5:38 UTC (permalink / raw)
  To: gcc-patches

Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
Ok for trunk.

gcc/ChangeLog:

	PR rtl/103750
	* cfgloop.h (loop_contains_p): New function.
	* fwprop.c (forward_propagate_into): Allow propagations from
	inner loop to outer loop.

gcc/testsuite/ChangeLog:

	* g++.target/i386/pr103750-fwprop-1.C: New test.
---
 gcc/cfgloop.h                                 | 12 +++++++++
 gcc/fwprop.c                                  |  7 +++--
 .../g++.target/i386/pr103750-fwprop-1.C       | 26 +++++++++++++++++++
 3 files changed, 43 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C

diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
index d2714e20cb0..e8fe0cedd5f 100644
--- a/gcc/cfgloop.h
+++ b/gcc/cfgloop.h
@@ -908,6 +908,18 @@ loop_outermost (class loop *loop)
   return (*loop->superloops)[1];
 }
 
+/* Returns true if loop OUTER contains loop INNER.  */
+static inline bool
+loop_contains_p (class loop* outer, class loop* inner)
+{
+  unsigned n = vec_safe_length (inner->superloops);
+
+  for (unsigned i = 0; i != n; i++)
+    if ((*inner->superloops)[i] == outer)
+      return true;
+  return false;
+}
+
 extern void record_niter_bound (class loop *, const widest_int &, bool, bool);
 extern HOST_WIDE_INT get_estimated_loop_iterations_int (class loop *);
 extern HOST_WIDE_INT get_max_loop_iterations_int (const class loop *);
diff --git a/gcc/fwprop.c b/gcc/fwprop.c
index 2eab4fd4614..aed48e7273f 100644
--- a/gcc/fwprop.c
+++ b/gcc/fwprop.c
@@ -866,10 +866,13 @@ forward_propagate_into (use_info *use, bool reg_prop_only = false)
   rtx src = SET_SRC (def_set);
 
   /* Allow propagations into a loop only for reg-to-reg copies, since
-     replacing one register by another shouldn't increase the cost.  */
+     replacing one register by another shouldn't increase the cost.
+     Propagations from inner loop to outer loop should be also ok.  */
   struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
   struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
-  if ((reg_prop_only || def_loop != use_loop)
+  if ((reg_prop_only
+       || (use_loop && def_loop != use_loop
+	   && !loop_contains_p (use_loop, def_loop)))
       && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
     return false;
 
diff --git a/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C b/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C
new file mode 100644
index 00000000000..26987d307aa
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C
@@ -0,0 +1,26 @@
+/* PR target/103750.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -std=c++1y -march=cannonlake -fdump-rtl-fwprop1" } */
+/* { dg-final { scan-rtl-dump-not "subreg:HI\[ \\\(\]*reg:SI\[^\n]*\n\[^\n]*UNSPEC_TZCNT" "fwprop1" } } */
+
+#include<immintrin.h>
+const char16_t *qustrchr(char16_t *n, char16_t *e, char16_t c) noexcept
+{
+  __m256i mch256 = _mm256_set1_epi16(c);
+  for ( ; n < e; n += 32) {
+    __m256i data1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
+    __m256i data2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n) + 1);
+    __mmask16 mask1 = _mm256_cmpeq_epu16_mask(data1, mch256);
+    __mmask16 mask2 = _mm256_cmpeq_epu16_mask(data2, mch256);
+    if (_kortestz_mask16_u8(mask1, mask2))
+      continue;
+
+    unsigned idx = _tzcnt_u32(mask1);
+    if (mask1 == 0) {
+      idx = __tzcnt_u16(mask2);
+      n += 16;
+    }
+    return n + idx;
+  }
+  return e;
+}
-- 
2.18.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] [RTL/fwprop] Allow propagations from inner loop to outer loop.
  2022-01-05  5:38 [PATCH] [RTL/fwprop] Allow propagations from inner loop to outer loop liuhongt
@ 2022-01-05  8:07 ` Richard Biener
  2022-01-06  6:51   ` liuhongt
  0 siblings, 1 reply; 6+ messages in thread
From: Richard Biener @ 2022-01-05  8:07 UTC (permalink / raw)
  To: liuhongt; +Cc: GCC Patches

On Wed, Jan 5, 2022 at 6:39 AM liuhongt via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,}.
> Ok for trunk.
>
> gcc/ChangeLog:
>
>         PR rtl/103750
>         * cfgloop.h (loop_contains_p): New function.
>         * fwprop.c (forward_propagate_into): Allow propagations from
>         inner loop to outer loop.
>
> gcc/testsuite/ChangeLog:
>
>         * g++.target/i386/pr103750-fwprop-1.C: New test.
> ---
>  gcc/cfgloop.h                                 | 12 +++++++++
>  gcc/fwprop.c                                  |  7 +++--
>  .../g++.target/i386/pr103750-fwprop-1.C       | 26 +++++++++++++++++++
>  3 files changed, 43 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C
>
> diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
> index d2714e20cb0..e8fe0cedd5f 100644
> --- a/gcc/cfgloop.h
> +++ b/gcc/cfgloop.h
> @@ -908,6 +908,18 @@ loop_outermost (class loop *loop)
>    return (*loop->superloops)[1];
>  }
>
> +/* Returns true if loop OUTER contains loop INNER.  */

that's flow_loop_nested_p (loop *outer, loop *inner) which
is implemented in O(1).  Note behavior for outer == inner
might be different (didn't check your implementation too hard)

Otherwise looks OK to me.

Thanks,
Richard.

> +static inline bool
> +loop_contains_p (class loop* outer, class loop* inner)
> +{
> +  unsigned n = vec_safe_length (inner->superloops);
> +
> +  for (unsigned i = 0; i != n; i++)
> +    if ((*inner->superloops)[i] == outer)
> +      return true;
> +  return false;
> +}
> +
>  extern void record_niter_bound (class loop *, const widest_int &, bool, bool);
>  extern HOST_WIDE_INT get_estimated_loop_iterations_int (class loop *);
>  extern HOST_WIDE_INT get_max_loop_iterations_int (const class loop *);
> diff --git a/gcc/fwprop.c b/gcc/fwprop.c
> index 2eab4fd4614..aed48e7273f 100644
> --- a/gcc/fwprop.c
> +++ b/gcc/fwprop.c
> @@ -866,10 +866,13 @@ forward_propagate_into (use_info *use, bool reg_prop_only = false)
>    rtx src = SET_SRC (def_set);
>
>    /* Allow propagations into a loop only for reg-to-reg copies, since
> -     replacing one register by another shouldn't increase the cost.  */
> +     replacing one register by another shouldn't increase the cost.
> +     Propagations from inner loop to outer loop should be also ok.  */
>    struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
>    struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
> -  if ((reg_prop_only || def_loop != use_loop)
> +  if ((reg_prop_only
> +       || (use_loop && def_loop != use_loop
> +          && !loop_contains_p (use_loop, def_loop)))
>        && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
>      return false;
>
> diff --git a/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C b/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C
> new file mode 100644
> index 00000000000..26987d307aa
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C
> @@ -0,0 +1,26 @@
> +/* PR target/103750.  */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -std=c++1y -march=cannonlake -fdump-rtl-fwprop1" } */
> +/* { dg-final { scan-rtl-dump-not "subreg:HI\[ \\\(\]*reg:SI\[^\n]*\n\[^\n]*UNSPEC_TZCNT" "fwprop1" } } */
> +
> +#include<immintrin.h>
> +const char16_t *qustrchr(char16_t *n, char16_t *e, char16_t c) noexcept
> +{
> +  __m256i mch256 = _mm256_set1_epi16(c);
> +  for ( ; n < e; n += 32) {
> +    __m256i data1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
> +    __m256i data2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n) + 1);
> +    __mmask16 mask1 = _mm256_cmpeq_epu16_mask(data1, mch256);
> +    __mmask16 mask2 = _mm256_cmpeq_epu16_mask(data2, mch256);
> +    if (_kortestz_mask16_u8(mask1, mask2))
> +      continue;
> +
> +    unsigned idx = _tzcnt_u32(mask1);
> +    if (mask1 == 0) {
> +      idx = __tzcnt_u16(mask2);
> +      n += 16;
> +    }
> +    return n + idx;
> +  }
> +  return e;
> +}
> --
> 2.18.1
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] [RTL/fwprop] Allow propagations from inner loop to outer loop.
  2022-01-05  8:07 ` Richard Biener
@ 2022-01-06  6:51   ` liuhongt
  2022-01-06 10:55     ` Richard Biener
  0 siblings, 1 reply; 6+ messages in thread
From: liuhongt @ 2022-01-06  6:51 UTC (permalink / raw)
  To: gcc-patches

> that's flow_loop_nested_p (loop *outer, loop *inner) which
> is implemented in O(1).  Note behavior for outer == inner
> might be different (didn't check your implementation too hard)
>
Thanks, it seems flow_loop_nested_p assume outer and inner not to be
NULL. So I add some conditions to check NULL which is considered as an outer
 loop of any other loop.



gcc/ChangeLog:

	PR rtl/103750
	* fwprop.c (forward_propagate_into): Allow propagations from
	inner loop to outer loop.

gcc/testsuite/ChangeLog:

	* g++.target/i386/pr103750-fwprop-1.C: New test.
---
 gcc/fwprop.c                                  |  7 +++--
 .../g++.target/i386/pr103750-fwprop-1.C       | 26 +++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C

diff --git a/gcc/fwprop.c b/gcc/fwprop.c
index 2eab4fd4614..af2e9d1c189 100644
--- a/gcc/fwprop.c
+++ b/gcc/fwprop.c
@@ -866,10 +866,13 @@ forward_propagate_into (use_info *use, bool reg_prop_only = false)
   rtx src = SET_SRC (def_set);
 
   /* Allow propagations into a loop only for reg-to-reg copies, since
-     replacing one register by another shouldn't increase the cost.  */
+     replacing one register by another shouldn't increase the cost.
+     Propagations from inner loop to outer loop should be also ok.  */
   struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
   struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
-  if ((reg_prop_only || def_loop != use_loop)
+  if ((reg_prop_only
+       || (use_loop && def_loop != use_loop
+	   &&(!def_loop || !flow_loop_nested_p (use_loop, def_loop))))
       && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
     return false;
 
diff --git a/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C b/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C
new file mode 100644
index 00000000000..26987d307aa
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C
@@ -0,0 +1,26 @@
+/* PR target/103750.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -std=c++1y -march=cannonlake -fdump-rtl-fwprop1" } */
+/* { dg-final { scan-rtl-dump-not "subreg:HI\[ \\\(\]*reg:SI\[^\n]*\n\[^\n]*UNSPEC_TZCNT" "fwprop1" } } */
+
+#include<immintrin.h>
+const char16_t *qustrchr(char16_t *n, char16_t *e, char16_t c) noexcept
+{
+  __m256i mch256 = _mm256_set1_epi16(c);
+  for ( ; n < e; n += 32) {
+    __m256i data1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
+    __m256i data2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n) + 1);
+    __mmask16 mask1 = _mm256_cmpeq_epu16_mask(data1, mch256);
+    __mmask16 mask2 = _mm256_cmpeq_epu16_mask(data2, mch256);
+    if (_kortestz_mask16_u8(mask1, mask2))
+      continue;
+
+    unsigned idx = _tzcnt_u32(mask1);
+    if (mask1 == 0) {
+      idx = __tzcnt_u16(mask2);
+      n += 16;
+    }
+    return n + idx;
+  }
+  return e;
+}
-- 
2.18.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] [RTL/fwprop] Allow propagations from inner loop to outer loop.
  2022-01-06  6:51   ` liuhongt
@ 2022-01-06 10:55     ` Richard Biener
  2022-01-07  5:16       ` liuhongt
  0 siblings, 1 reply; 6+ messages in thread
From: Richard Biener @ 2022-01-06 10:55 UTC (permalink / raw)
  To: liuhongt, gcc-patches

On January 6, 2022 7:51:48 AM GMT+01:00, liuhongt <hongtao.liu@intel.com> wrote:
>> that's flow_loop_nested_p (loop *outer, loop *inner) which
>> is implemented in O(1).  Note behavior for outer == inner
>> might be different (didn't check your implementation too hard)
>>
>Thanks, it seems flow_loop_nested_p assume outer and inner not to be
>NULL. So I add some conditions to check NULL which is considered as an outer
> loop of any other loop.

Huh, loop_father should never be NULL. Maybe when fwprop is run after RTL loop opts you instead want to add a check for current_loops or alternelatively initialize loops in fwprop. 

>
>
>gcc/ChangeLog:
>
>	PR rtl/103750
>	* fwprop.c (forward_propagate_into): Allow propagations from
>	inner loop to outer loop.
>
>gcc/testsuite/ChangeLog:
>
>	* g++.target/i386/pr103750-fwprop-1.C: New test.
>---
> gcc/fwprop.c                                  |  7 +++--
> .../g++.target/i386/pr103750-fwprop-1.C       | 26 +++++++++++++++++++
> 2 files changed, 31 insertions(+), 2 deletions(-)
> create mode 100644 gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C
>
>diff --git a/gcc/fwprop.c b/gcc/fwprop.c
>index 2eab4fd4614..af2e9d1c189 100644
>--- a/gcc/fwprop.c
>+++ b/gcc/fwprop.c
>@@ -866,10 +866,13 @@ forward_propagate_into (use_info *use, bool reg_prop_only = false)
>   rtx src = SET_SRC (def_set);
> 
>   /* Allow propagations into a loop only for reg-to-reg copies, since
>-     replacing one register by another shouldn't increase the cost.  */
>+     replacing one register by another shouldn't increase the cost.
>+     Propagations from inner loop to outer loop should be also ok.  */
>   struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
>   struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
>-  if ((reg_prop_only || def_loop != use_loop)
>+  if ((reg_prop_only
>+       || (use_loop && def_loop != use_loop
>+	   &&(!def_loop || !flow_loop_nested_p (use_loop, def_loop))))
>       && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
>     return false;
> 
>diff --git a/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C b/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C
>new file mode 100644
>index 00000000000..26987d307aa
>--- /dev/null
>+++ b/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C
>@@ -0,0 +1,26 @@
>+/* PR target/103750.  */
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -std=c++1y -march=cannonlake -fdump-rtl-fwprop1" } */
>+/* { dg-final { scan-rtl-dump-not "subreg:HI\[ \\\(\]*reg:SI\[^\n]*\n\[^\n]*UNSPEC_TZCNT" "fwprop1" } } */
>+
>+#include<immintrin.h>
>+const char16_t *qustrchr(char16_t *n, char16_t *e, char16_t c) noexcept
>+{
>+  __m256i mch256 = _mm256_set1_epi16(c);
>+  for ( ; n < e; n += 32) {
>+    __m256i data1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
>+    __m256i data2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n) + 1);
>+    __mmask16 mask1 = _mm256_cmpeq_epu16_mask(data1, mch256);
>+    __mmask16 mask2 = _mm256_cmpeq_epu16_mask(data2, mch256);
>+    if (_kortestz_mask16_u8(mask1, mask2))
>+      continue;
>+
>+    unsigned idx = _tzcnt_u32(mask1);
>+    if (mask1 == 0) {
>+      idx = __tzcnt_u16(mask2);
>+      n += 16;
>+    }
>+    return n + idx;
>+  }
>+  return e;
>+}


^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] [RTL/fwprop] Allow propagations from inner loop to outer loop.
  2022-01-06 10:55     ` Richard Biener
@ 2022-01-07  5:16       ` liuhongt
  2022-01-07 12:00         ` Richard Sandiford
  0 siblings, 1 reply; 6+ messages in thread
From: liuhongt @ 2022-01-07  5:16 UTC (permalink / raw)
  To: gcc-patches

>Huh, loop_father should never be NULL. Maybe when fwprop is run after RTL loop opts you instead want to add a check for current_loops or alternelatively initialize loops in fwprop.

Oh, I didn't know that, i once saw there's ICE and thought it's related to
NULL loop. But I can't reproduce the ICE either in GCC testsuite or buiding
spec2017. Anyway, here's update patch.

gcc/ChangeLog:

	PR rtl/103750
	* fwprop.c (forward_propagate_into): Allow propagations from
	inner loop to outer loop.

gcc/testsuite/ChangeLog:

	* g++.target/i386/pr103750-fwprop-1.C: New test.
---
 build.log                                     |  0
 gcc/fwprop.c                                  |  7 +++--
 .../g++.target/i386/pr103750-fwprop-1.C       | 26 +++++++++++++++++++
 3 files changed, 31 insertions(+), 2 deletions(-)
 create mode 100644 build.log
 create mode 100644 gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C

diff --git a/build.log b/build.log
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/gcc/fwprop.c b/gcc/fwprop.c
index 2eab4fd4614..4f5d6a8d4fc 100644
--- a/gcc/fwprop.c
+++ b/gcc/fwprop.c
@@ -866,10 +866,13 @@ forward_propagate_into (use_info *use, bool reg_prop_only = false)
   rtx src = SET_SRC (def_set);
 
   /* Allow propagations into a loop only for reg-to-reg copies, since
-     replacing one register by another shouldn't increase the cost.  */
+     replacing one register by another shouldn't increase the cost.
+     Propagations from inner loop to outer loop should be also ok.  */
   struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
   struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
-  if ((reg_prop_only || def_loop != use_loop)
+  if ((reg_prop_only
+       || (def_loop != use_loop
+	   && !flow_loop_nested_p (use_loop, def_loop)))
       && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
     return false;
 
diff --git a/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C b/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C
new file mode 100644
index 00000000000..26987d307aa
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C
@@ -0,0 +1,26 @@
+/* PR target/103750.  */
+/* { dg-do compile } */
+/* { dg-options "-O2 -std=c++1y -march=cannonlake -fdump-rtl-fwprop1" } */
+/* { dg-final { scan-rtl-dump-not "subreg:HI\[ \\\(\]*reg:SI\[^\n]*\n\[^\n]*UNSPEC_TZCNT" "fwprop1" } } */
+
+#include<immintrin.h>
+const char16_t *qustrchr(char16_t *n, char16_t *e, char16_t c) noexcept
+{
+  __m256i mch256 = _mm256_set1_epi16(c);
+  for ( ; n < e; n += 32) {
+    __m256i data1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
+    __m256i data2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n) + 1);
+    __mmask16 mask1 = _mm256_cmpeq_epu16_mask(data1, mch256);
+    __mmask16 mask2 = _mm256_cmpeq_epu16_mask(data2, mch256);
+    if (_kortestz_mask16_u8(mask1, mask2))
+      continue;
+
+    unsigned idx = _tzcnt_u32(mask1);
+    if (mask1 == 0) {
+      idx = __tzcnt_u16(mask2);
+      n += 16;
+    }
+    return n + idx;
+  }
+  return e;
+}
-- 
2.18.1


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] [RTL/fwprop] Allow propagations from inner loop to outer loop.
  2022-01-07  5:16       ` liuhongt
@ 2022-01-07 12:00         ` Richard Sandiford
  0 siblings, 0 replies; 6+ messages in thread
From: Richard Sandiford @ 2022-01-07 12:00 UTC (permalink / raw)
  To: liuhongt via Gcc-patches; +Cc: liuhongt

liuhongt via Gcc-patches <gcc-patches@gcc.gnu.org> writes:
>>Huh, loop_father should never be NULL. Maybe when fwprop is run after RTL loop opts you instead want to add a check for current_loops or alternelatively initialize loops in fwprop.
>
> Oh, I didn't know that, i once saw there's ICE and thought it's related to
> NULL loop. But I can't reproduce the ICE either in GCC testsuite or buiding
> spec2017. Anyway, here's update patch.
>
> gcc/ChangeLog:
>
> 	PR rtl/103750
> 	* fwprop.c (forward_propagate_into): Allow propagations from
> 	inner loop to outer loop.
>
> gcc/testsuite/ChangeLog:
>
> 	* g++.target/i386/pr103750-fwprop-1.C: New test.
> ---
>  build.log                                     |  0
>  gcc/fwprop.c                                  |  7 +++--
>  .../g++.target/i386/pr103750-fwprop-1.C       | 26 +++++++++++++++++++
>  3 files changed, 31 insertions(+), 2 deletions(-)
>  create mode 100644 build.log
>  create mode 100644 gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C
>
> diff --git a/build.log b/build.log
> new file mode 100644
> index 00000000000..e69de29bb2d
> diff --git a/gcc/fwprop.c b/gcc/fwprop.c
> index 2eab4fd4614..4f5d6a8d4fc 100644
> --- a/gcc/fwprop.c
> +++ b/gcc/fwprop.c
> @@ -866,10 +866,13 @@ forward_propagate_into (use_info *use, bool reg_prop_only = false)
>    rtx src = SET_SRC (def_set);
>  
>    /* Allow propagations into a loop only for reg-to-reg copies, since
> -     replacing one register by another shouldn't increase the cost.  */
> +     replacing one register by another shouldn't increase the cost.
> +     Propagations from inner loop to outer loop should be also ok.  */

“should also be ok”

OK with that change, thanks.

Richard

>    struct loop *def_loop = def_insn->bb ()->cfg_bb ()->loop_father;
>    struct loop *use_loop = use->bb ()->cfg_bb ()->loop_father;
> -  if ((reg_prop_only || def_loop != use_loop)
> +  if ((reg_prop_only
> +       || (def_loop != use_loop
> +	   && !flow_loop_nested_p (use_loop, def_loop)))
>        && (!reg_single_def_p (dest) || !reg_single_def_p (src)))
>      return false;
>  
> diff --git a/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C b/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C
> new file mode 100644
> index 00000000000..26987d307aa
> --- /dev/null
> +++ b/gcc/testsuite/g++.target/i386/pr103750-fwprop-1.C
> @@ -0,0 +1,26 @@
> +/* PR target/103750.  */
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -std=c++1y -march=cannonlake -fdump-rtl-fwprop1" } */
> +/* { dg-final { scan-rtl-dump-not "subreg:HI\[ \\\(\]*reg:SI\[^\n]*\n\[^\n]*UNSPEC_TZCNT" "fwprop1" } } */
> +
> +#include<immintrin.h>
> +const char16_t *qustrchr(char16_t *n, char16_t *e, char16_t c) noexcept
> +{
> +  __m256i mch256 = _mm256_set1_epi16(c);
> +  for ( ; n < e; n += 32) {
> +    __m256i data1 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n));
> +    __m256i data2 = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(n) + 1);
> +    __mmask16 mask1 = _mm256_cmpeq_epu16_mask(data1, mch256);
> +    __mmask16 mask2 = _mm256_cmpeq_epu16_mask(data2, mch256);
> +    if (_kortestz_mask16_u8(mask1, mask2))
> +      continue;
> +
> +    unsigned idx = _tzcnt_u32(mask1);
> +    if (mask1 == 0) {
> +      idx = __tzcnt_u16(mask2);
> +      n += 16;
> +    }
> +    return n + idx;
> +  }
> +  return e;
> +}

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2022-01-07 12:00 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2022-01-05  5:38 [PATCH] [RTL/fwprop] Allow propagations from inner loop to outer loop liuhongt
2022-01-05  8:07 ` Richard Biener
2022-01-06  6:51   ` liuhongt
2022-01-06 10:55     ` Richard Biener
2022-01-07  5:16       ` liuhongt
2022-01-07 12:00         ` Richard Sandiford

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).