public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [SVE] [gimple-isel] PR93183 - SVE does not use neg as conditional
@ 2021-10-06 10:57 Prathamesh Kulkarni
  2021-10-08 15:49 ` Richard Sandiford
  0 siblings, 1 reply; 9+ messages in thread
From: Prathamesh Kulkarni @ 2021-10-06 10:57 UTC (permalink / raw)
  To: gcc Patches, Richard Sandiford

[-- Attachment #1: Type: text/plain, Size: 1737 bytes --]

Hi,
As mentioned in PR, for the following test-case:

typedef unsigned char uint8_t;

static inline uint8_t
x264_clip_uint8(uint8_t x)
{
  uint8_t t = -x;
  uint8_t t1 = x & ~63;
  return (t1 != 0) ? t : x;
}

void
mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
{
  for (int x = 0; x < n*16; x++)
    dst[x] = x264_clip_uint8(src[x]);
}

-O3 -mcpu=generic+sve generates following code for the inner loop:

.L3:
        ld1b    z0.b, p0/z, [x1, x2]
        movprfx z2, z0
        and     z2.b, z2.b, #0xc0
        movprfx z1, z0
        neg     z1.b, p1/m, z0.b
        cmpeq   p2.b, p1/z, z2.b, #0
        sel     z0.b, p2, z0.b, z1.b
        st1b    z0.b, p0, [x0, x2]
        add     x2, x2, x4
        whilelo p0.b, w2, w3
        b.any   .L3

The sel is redundant since we could conditionally negate z0 based on
the predicate
comparing z2 with 0.

As suggested in the PR, the attached patch, introduces a new
conditional internal function .COND_NEG, and in gimple-isel replaces
the following sequence:
   op2 = -op1
   op0 = A cmp B
   lhs = op0 ? op1 : op2

with:
   op0 = A inverted_cmp B
   lhs = .COND_NEG (op0, op1, op1).

lhs = .COD_NEG (op0, op1, op1)
implies
lhs = neg (op1) if cond is true OR fall back to op1 if cond is false.

With patch, it generates the following code-gen:
.L3:
        ld1b    z0.b, p0/z, [x1, x2]
        movprfx z1, z0
        and     z1.b, z1.b, #0xc0
        cmpne   p1.b, p2/z, z1.b, #0
        neg     z0.b, p1/m, z0.b
        st1b    z0.b, p0, [x0, x2]
        add     x2, x2, x4
        whilelo p0.b, w2, w3
        b.any   .L3

While it seems to work for this test-case, I am not entirely sure if
the patch is correct. Does it look in the right direction ?

Thanks,
Prathamesh

[-- Attachment #2: pr93183-1.diff --]
[-- Type: application/octet-stream, Size: 3300 bytes --]

diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
index 38e90933c3e..5b0dd3c1993 100644
--- a/gcc/gimple-isel.cc
+++ b/gcc/gimple-isel.cc
@@ -39,6 +39,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "optabs.h"
 #include "gimple-fold.h"
 #include "internal-fn.h"
+#include "fold-const.h"
+#include "tree-pretty-print.h"
 
 /* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to
    internal function based on vector type of selected expansion.
@@ -203,6 +205,35 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
 	      return new_stmt;
 	    }
 
+	  /* Replace:
+	     op2 = -op1
+	     op0 = A cmp B
+	     lhs = op0 ? op1 : op2
+
+	     with:
+	     op0 = A inverted_cmp B
+	     lhs = .COND_NEG (op0, op1, op1).  */
+
+	  gassign *op1_def = nullptr;
+	  if (TREE_CODE (op1) == SSA_NAME)
+	    op1_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op1));
+
+	  gassign *op2_def = nullptr;
+	  if (TREE_CODE (op2) == SSA_NAME)
+	    op2_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op2));
+
+	  if (can_compute_op0 && op1_def && op2_def
+	      && gimple_assign_rhs_code (op2_def) == NEGATE_EXPR
+	      && operand_equal_p (gimple_assign_rhs1 (op2_def), op1, 0))
+	    {
+	      auto inverted_code
+		= invert_tree_comparison (gimple_assign_rhs_code (def_stmt), true);
+	      gimple_assign_set_rhs_code (def_stmt, inverted_code);
+	      auto gsi2 = gsi_for_stmt (op2_def);
+	      gsi_remove (&gsi2, true);
+	      return gimple_build_call_internal (IFN_COND_NEG, 3, op0, op1, op1);
+	    }
+
 	  if (can_compute_op0
 	      && used_vec_cond_exprs >= 2
 	      && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 78db25bbac4..b57c7a4ed3e 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
   T (BIT_IOR_EXPR, IFN_COND_IOR) \
   T (BIT_XOR_EXPR, IFN_COND_XOR) \
   T (LSHIFT_EXPR, IFN_COND_SHL) \
-  T (RSHIFT_EXPR, IFN_COND_SHR)
+  T (RSHIFT_EXPR, IFN_COND_SHR) \
+  T (NEGATE_EXPR, IFN_COND_NEG)
 
 /* Return a function that only performs CODE when a certain condition is met
    and that uses a given fallback value otherwise.  For example, if CODE is
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 88169ef4656..db40d1bfd18 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
 DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
 DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
 
+DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, cond_unary)
+
 DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
 
 DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 201b8aae1c0..fc65a3a7c23 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
 OPTAB_D (cond_fms_optab, "cond_fms$a")
 OPTAB_D (cond_fnma_optab, "cond_fnma$a")
 OPTAB_D (cond_fnms_optab, "cond_fnms$a")
+OPTAB_D (cond_neg_optab, "cond_neg$a")
 OPTAB_D (cmov_optab, "cmov$a6")
 OPTAB_D (cstore_optab, "cstore$a4")
 OPTAB_D (ctrap_optab, "ctrap$a4")

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [SVE] [gimple-isel] PR93183 - SVE does not use neg as conditional
  2021-10-06 10:57 [SVE] [gimple-isel] PR93183 - SVE does not use neg as conditional Prathamesh Kulkarni
@ 2021-10-08 15:49 ` Richard Sandiford
  2021-10-11 10:59   ` Prathamesh Kulkarni
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Sandiford @ 2021-10-08 15:49 UTC (permalink / raw)
  To: Prathamesh Kulkarni; +Cc: gcc Patches

Thanks for looking at this.

Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> Hi,
> As mentioned in PR, for the following test-case:
>
> typedef unsigned char uint8_t;
>
> static inline uint8_t
> x264_clip_uint8(uint8_t x)
> {
>   uint8_t t = -x;
>   uint8_t t1 = x & ~63;
>   return (t1 != 0) ? t : x;
> }
>
> void
> mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
> {
>   for (int x = 0; x < n*16; x++)
>     dst[x] = x264_clip_uint8(src[x]);
> }
>
> -O3 -mcpu=generic+sve generates following code for the inner loop:
>
> .L3:
>         ld1b    z0.b, p0/z, [x1, x2]
>         movprfx z2, z0
>         and     z2.b, z2.b, #0xc0
>         movprfx z1, z0
>         neg     z1.b, p1/m, z0.b
>         cmpeq   p2.b, p1/z, z2.b, #0
>         sel     z0.b, p2, z0.b, z1.b
>         st1b    z0.b, p0, [x0, x2]
>         add     x2, x2, x4
>         whilelo p0.b, w2, w3
>         b.any   .L3
>
> The sel is redundant since we could conditionally negate z0 based on
> the predicate
> comparing z2 with 0.
>
> As suggested in the PR, the attached patch, introduces a new
> conditional internal function .COND_NEG, and in gimple-isel replaces
> the following sequence:
>    op2 = -op1
>    op0 = A cmp B
>    lhs = op0 ? op1 : op2
>
> with:
>    op0 = A inverted_cmp B
>    lhs = .COND_NEG (op0, op1, op1).
>
> lhs = .COD_NEG (op0, op1, op1)
> implies
> lhs = neg (op1) if cond is true OR fall back to op1 if cond is false.
>
> With patch, it generates the following code-gen:
> .L3:
>         ld1b    z0.b, p0/z, [x1, x2]
>         movprfx z1, z0
>         and     z1.b, z1.b, #0xc0
>         cmpne   p1.b, p2/z, z1.b, #0
>         neg     z0.b, p1/m, z0.b
>         st1b    z0.b, p0, [x0, x2]
>         add     x2, x2, x4
>         whilelo p0.b, w2, w3
>         b.any   .L3
>
> While it seems to work for this test-case, I am not entirely sure if
> the patch is correct. Does it look in the right direction ?

For binary ops we use match.pd rather than isel:

(for uncond_op (UNCOND_BINARY)
     cond_op (COND_BINARY)
 (simplify
  (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
  (with { tree op_type = TREE_TYPE (@4); }
   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
	&& is_truth_type_for (op_type, TREE_TYPE (@0)))
    (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
 (simplify
  (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
  (with { tree op_type = TREE_TYPE (@4); }
   (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
	&& is_truth_type_for (op_type, TREE_TYPE (@0)))
    (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))

I think it'd be good to do the same here, using new (UN)COND_UNARY
iterators.  (The iterators will only have one value to start with,
but other unary ops could get the same treatment in future.)

Richard


>
> Thanks,
> Prathamesh
>
> diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
> index 38e90933c3e..5b0dd3c1993 100644
> --- a/gcc/gimple-isel.cc
> +++ b/gcc/gimple-isel.cc
> @@ -39,6 +39,8 @@ along with GCC; see the file COPYING3.  If not see
>  #include "optabs.h"
>  #include "gimple-fold.h"
>  #include "internal-fn.h"
> +#include "fold-const.h"
> +#include "tree-pretty-print.h"
>  
>  /* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to
>     internal function based on vector type of selected expansion.
> @@ -203,6 +205,35 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
>  	      return new_stmt;
>  	    }
>  
> +	  /* Replace:
> +	     op2 = -op1
> +	     op0 = A cmp B
> +	     lhs = op0 ? op1 : op2
> +
> +	     with:
> +	     op0 = A inverted_cmp B
> +	     lhs = .COND_NEG (op0, op1, op1).  */
> +
> +	  gassign *op1_def = nullptr;
> +	  if (TREE_CODE (op1) == SSA_NAME)
> +	    op1_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op1));
> +
> +	  gassign *op2_def = nullptr;
> +	  if (TREE_CODE (op2) == SSA_NAME)
> +	    op2_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op2));
> +
> +	  if (can_compute_op0 && op1_def && op2_def
> +	      && gimple_assign_rhs_code (op2_def) == NEGATE_EXPR
> +	      && operand_equal_p (gimple_assign_rhs1 (op2_def), op1, 0))
> +	    {
> +	      auto inverted_code
> +		= invert_tree_comparison (gimple_assign_rhs_code (def_stmt), true);
> +	      gimple_assign_set_rhs_code (def_stmt, inverted_code);
> +	      auto gsi2 = gsi_for_stmt (op2_def);
> +	      gsi_remove (&gsi2, true);
> +	      return gimple_build_call_internal (IFN_COND_NEG, 3, op0, op1, op1);
> +	    }
> +
>  	  if (can_compute_op0
>  	      && used_vec_cond_exprs >= 2
>  	      && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 78db25bbac4..b57c7a4ed3e 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
>    T (BIT_IOR_EXPR, IFN_COND_IOR) \
>    T (BIT_XOR_EXPR, IFN_COND_XOR) \
>    T (LSHIFT_EXPR, IFN_COND_SHL) \
> -  T (RSHIFT_EXPR, IFN_COND_SHR)
> +  T (RSHIFT_EXPR, IFN_COND_SHR) \
> +  T (NEGATE_EXPR, IFN_COND_NEG)
>  
>  /* Return a function that only performs CODE when a certain condition is met
>     and that uses a given fallback value otherwise.  For example, if CODE is
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 88169ef4656..db40d1bfd18 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
>  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
>  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
>  
> +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, cond_unary)
> +
>  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
>  
>  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 201b8aae1c0..fc65a3a7c23 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
>  OPTAB_D (cond_fms_optab, "cond_fms$a")
>  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
>  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
> +OPTAB_D (cond_neg_optab, "cond_neg$a")
>  OPTAB_D (cmov_optab, "cmov$a6")
>  OPTAB_D (cstore_optab, "cstore$a4")
>  OPTAB_D (ctrap_optab, "ctrap$a4")

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [SVE] [gimple-isel] PR93183 - SVE does not use neg as conditional
  2021-10-08 15:49 ` Richard Sandiford
@ 2021-10-11 10:59   ` Prathamesh Kulkarni
  2021-10-11 15:12     ` Richard Sandiford
  0 siblings, 1 reply; 9+ messages in thread
From: Prathamesh Kulkarni @ 2021-10-11 10:59 UTC (permalink / raw)
  To: Prathamesh Kulkarni, gcc Patches, Richard Sandiford

[-- Attachment #1: Type: text/plain, Size: 8073 bytes --]

On Fri, 8 Oct 2021 at 21:19, Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Thanks for looking at this.
>
> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> > Hi,
> > As mentioned in PR, for the following test-case:
> >
> > typedef unsigned char uint8_t;
> >
> > static inline uint8_t
> > x264_clip_uint8(uint8_t x)
> > {
> >   uint8_t t = -x;
> >   uint8_t t1 = x & ~63;
> >   return (t1 != 0) ? t : x;
> > }
> >
> > void
> > mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
> > {
> >   for (int x = 0; x < n*16; x++)
> >     dst[x] = x264_clip_uint8(src[x]);
> > }
> >
> > -O3 -mcpu=generic+sve generates following code for the inner loop:
> >
> > .L3:
> >         ld1b    z0.b, p0/z, [x1, x2]
> >         movprfx z2, z0
> >         and     z2.b, z2.b, #0xc0
> >         movprfx z1, z0
> >         neg     z1.b, p1/m, z0.b
> >         cmpeq   p2.b, p1/z, z2.b, #0
> >         sel     z0.b, p2, z0.b, z1.b
> >         st1b    z0.b, p0, [x0, x2]
> >         add     x2, x2, x4
> >         whilelo p0.b, w2, w3
> >         b.any   .L3
> >
> > The sel is redundant since we could conditionally negate z0 based on
> > the predicate
> > comparing z2 with 0.
> >
> > As suggested in the PR, the attached patch, introduces a new
> > conditional internal function .COND_NEG, and in gimple-isel replaces
> > the following sequence:
> >    op2 = -op1
> >    op0 = A cmp B
> >    lhs = op0 ? op1 : op2
> >
> > with:
> >    op0 = A inverted_cmp B
> >    lhs = .COND_NEG (op0, op1, op1).
> >
> > lhs = .COD_NEG (op0, op1, op1)
> > implies
> > lhs = neg (op1) if cond is true OR fall back to op1 if cond is false.
> >
> > With patch, it generates the following code-gen:
> > .L3:
> >         ld1b    z0.b, p0/z, [x1, x2]
> >         movprfx z1, z0
> >         and     z1.b, z1.b, #0xc0
> >         cmpne   p1.b, p2/z, z1.b, #0
> >         neg     z0.b, p1/m, z0.b
> >         st1b    z0.b, p0, [x0, x2]
> >         add     x2, x2, x4
> >         whilelo p0.b, w2, w3
> >         b.any   .L3
> >
> > While it seems to work for this test-case, I am not entirely sure if
> > the patch is correct. Does it look in the right direction ?
>
> For binary ops we use match.pd rather than isel:
>
> (for uncond_op (UNCOND_BINARY)
>      cond_op (COND_BINARY)
>  (simplify
>   (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
>   (with { tree op_type = TREE_TYPE (@4); }
>    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
>         && is_truth_type_for (op_type, TREE_TYPE (@0)))
>     (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
>  (simplify
>   (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
>   (with { tree op_type = TREE_TYPE (@4); }
>    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
>         && is_truth_type_for (op_type, TREE_TYPE (@0)))
>     (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
>
> I think it'd be good to do the same here, using new (UN)COND_UNARY
> iterators.  (The iterators will only have one value to start with,
> but other unary ops could get the same treatment in future.)
Thanks for the suggestions.
The attached patch adds a pattern to match.pd to replace:
cond = a cmp b
r = cond ? x : -x
with:
cond = a inverted_cmp b
r = cond ? -x : x

Code-gen with patch for inner loop:
.L3:
        ld1b    z0.b, p0/z, [x1, x2]
        movprfx z1, z0
        and     z1.b, z1.b, #0xc0
        cmpne   p1.b, p2/z, z1.b, #0
        neg     z0.b, p1/m, z0.b
        st1b    z0.b, p0, [x0, x2]
        add     x2, x2, x4
        whilelo p0.b, w2, w3
        b.any   .L3

Does it look OK ?
I didn't add it under (UN)COND_UNARY since it inverts the comparison,
which we might not want to do for other unary ops ?

Also, I am not sure, how to test if target supports conditional
internal function ?
I tried to use:
(for cmp (tcc_comparison)
     icmp (inverted_tcc_comparison)
 (simplify
  (vec_cond (cmp@2 @0 @1) @3 (negate @3))
   (with { auto op_type = TREE_TYPE (@2); }
    (if (vectorized_internal_fn_supported_p (IFN_COND_NEG, op_type)
         && is_truth_type_for (op_type, TREE_TYPE (@0)))
      (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3)))))

but both the conditions seem to fail.

Thanks,
Prathamesh


>
> Richard
>
>
> >
> > Thanks,
> > Prathamesh
> >
> > diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
> > index 38e90933c3e..5b0dd3c1993 100644
> > --- a/gcc/gimple-isel.cc
> > +++ b/gcc/gimple-isel.cc
> > @@ -39,6 +39,8 @@ along with GCC; see the file COPYING3.  If not see
> >  #include "optabs.h"
> >  #include "gimple-fold.h"
> >  #include "internal-fn.h"
> > +#include "fold-const.h"
> > +#include "tree-pretty-print.h"
> >
> >  /* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to
> >     internal function based on vector type of selected expansion.
> > @@ -203,6 +205,35 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
> >             return new_stmt;
> >           }
> >
> > +       /* Replace:
> > +          op2 = -op1
> > +          op0 = A cmp B
> > +          lhs = op0 ? op1 : op2
> > +
> > +          with:
> > +          op0 = A inverted_cmp B
> > +          lhs = .COND_NEG (op0, op1, op1).  */
> > +
> > +       gassign *op1_def = nullptr;
> > +       if (TREE_CODE (op1) == SSA_NAME)
> > +         op1_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op1));
> > +
> > +       gassign *op2_def = nullptr;
> > +       if (TREE_CODE (op2) == SSA_NAME)
> > +         op2_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op2));
> > +
> > +       if (can_compute_op0 && op1_def && op2_def
> > +           && gimple_assign_rhs_code (op2_def) == NEGATE_EXPR
> > +           && operand_equal_p (gimple_assign_rhs1 (op2_def), op1, 0))
> > +         {
> > +           auto inverted_code
> > +             = invert_tree_comparison (gimple_assign_rhs_code (def_stmt), true);
> > +           gimple_assign_set_rhs_code (def_stmt, inverted_code);
> > +           auto gsi2 = gsi_for_stmt (op2_def);
> > +           gsi_remove (&gsi2, true);
> > +           return gimple_build_call_internal (IFN_COND_NEG, 3, op0, op1, op1);
> > +         }
> > +
> >         if (can_compute_op0
> >             && used_vec_cond_exprs >= 2
> >             && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > index 78db25bbac4..b57c7a4ed3e 100644
> > --- a/gcc/internal-fn.c
> > +++ b/gcc/internal-fn.c
> > @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
> >    T (BIT_IOR_EXPR, IFN_COND_IOR) \
> >    T (BIT_XOR_EXPR, IFN_COND_XOR) \
> >    T (LSHIFT_EXPR, IFN_COND_SHL) \
> > -  T (RSHIFT_EXPR, IFN_COND_SHR)
> > +  T (RSHIFT_EXPR, IFN_COND_SHR) \
> > +  T (NEGATE_EXPR, IFN_COND_NEG)
> >
> >  /* Return a function that only performs CODE when a certain condition is met
> >     and that uses a given fallback value otherwise.  For example, if CODE is
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 88169ef4656..db40d1bfd18 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
> >  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
> >  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
> >
> > +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, cond_unary)
> > +
> >  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
> >
> >  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 201b8aae1c0..fc65a3a7c23 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
> >  OPTAB_D (cond_fms_optab, "cond_fms$a")
> >  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
> >  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
> > +OPTAB_D (cond_neg_optab, "cond_neg$a")
> >  OPTAB_D (cmov_optab, "cmov$a6")
> >  OPTAB_D (cstore_optab, "cstore$a4")
> >  OPTAB_D (ctrap_optab, "ctrap$a4")

[-- Attachment #2: pr93183-2.diff --]
[-- Type: application/octet-stream, Size: 3055 bytes --]

diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index 7112c116835..90cf131af48 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -878,6 +878,10 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op,
       if (!gimple_resimplify3 (seq, &cond_op, valueize))
 	return false;
       break;
+    case 1:
+      if (!gimple_resimplify1 (seq, &cond_op, valueize))
+	return false;
+      break;
     default:
       gcc_unreachable ();
     }
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 78db25bbac4..b57c7a4ed3e 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
   T (BIT_IOR_EXPR, IFN_COND_IOR) \
   T (BIT_XOR_EXPR, IFN_COND_XOR) \
   T (LSHIFT_EXPR, IFN_COND_SHL) \
-  T (RSHIFT_EXPR, IFN_COND_SHR)
+  T (RSHIFT_EXPR, IFN_COND_SHR) \
+  T (NEGATE_EXPR, IFN_COND_NEG)
 
 /* Return a function that only performs CODE when a certain condition is met
    and that uses a given fallback value otherwise.  For example, if CODE is
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 88169ef4656..db40d1bfd18 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
 DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
 DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
 
+DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, cond_unary)
+
 DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
 
 DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
diff --git a/gcc/match.pd b/gcc/match.pd
index a9791ceb74a..fde6d32b2c4 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -7037,6 +7037,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 						  wi::mask (tree_to_uhwi (@1),
 						  false, prec)); })
 		     { build_zero_cst (TREE_TYPE (@0)); }))))))))
+#if GIMPLE
+
+/* Simplify:
+
+     cond = a cmp b
+     r = cond ? x : -x
+
+    to:
+
+     cond = a inverted_cmp b
+     r = cond ? -x : x.  */
+
+(for cmp (tcc_comparison)
+     icmp (inverted_tcc_comparison)
+ (simplify
+  (vec_cond (cmp@2 @0 @1) @3 (negate @3))
+   (with { auto op_type = TREE_TYPE (@2); }
+    (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3))))
 
 /* Simplify:
 
@@ -7056,7 +7074,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    conditional internal functions must support the same comparisons
    inside and outside a VEC_COND_EXPR.  */
 
-#if GIMPLE
 (for uncond_op (UNCOND_BINARY)
      cond_op (COND_BINARY)
  (simplify
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 201b8aae1c0..fc65a3a7c23 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
 OPTAB_D (cond_fms_optab, "cond_fms$a")
 OPTAB_D (cond_fnma_optab, "cond_fnma$a")
 OPTAB_D (cond_fnms_optab, "cond_fnms$a")
+OPTAB_D (cond_neg_optab, "cond_neg$a")
 OPTAB_D (cmov_optab, "cmov$a6")
 OPTAB_D (cstore_optab, "cstore$a4")
 OPTAB_D (ctrap_optab, "ctrap$a4")

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [SVE] [gimple-isel] PR93183 - SVE does not use neg as conditional
  2021-10-11 10:59   ` Prathamesh Kulkarni
@ 2021-10-11 15:12     ` Richard Sandiford
  2021-10-12 12:05       ` Prathamesh Kulkarni
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Sandiford @ 2021-10-11 15:12 UTC (permalink / raw)
  To: Prathamesh Kulkarni; +Cc: gcc Patches

Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> On Fri, 8 Oct 2021 at 21:19, Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>>
>> Thanks for looking at this.
>>
>> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
>> > Hi,
>> > As mentioned in PR, for the following test-case:
>> >
>> > typedef unsigned char uint8_t;
>> >
>> > static inline uint8_t
>> > x264_clip_uint8(uint8_t x)
>> > {
>> >   uint8_t t = -x;
>> >   uint8_t t1 = x & ~63;
>> >   return (t1 != 0) ? t : x;
>> > }
>> >
>> > void
>> > mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
>> > {
>> >   for (int x = 0; x < n*16; x++)
>> >     dst[x] = x264_clip_uint8(src[x]);
>> > }
>> >
>> > -O3 -mcpu=generic+sve generates following code for the inner loop:
>> >
>> > .L3:
>> >         ld1b    z0.b, p0/z, [x1, x2]
>> >         movprfx z2, z0
>> >         and     z2.b, z2.b, #0xc0
>> >         movprfx z1, z0
>> >         neg     z1.b, p1/m, z0.b
>> >         cmpeq   p2.b, p1/z, z2.b, #0
>> >         sel     z0.b, p2, z0.b, z1.b
>> >         st1b    z0.b, p0, [x0, x2]
>> >         add     x2, x2, x4
>> >         whilelo p0.b, w2, w3
>> >         b.any   .L3
>> >
>> > The sel is redundant since we could conditionally negate z0 based on
>> > the predicate
>> > comparing z2 with 0.
>> >
>> > As suggested in the PR, the attached patch, introduces a new
>> > conditional internal function .COND_NEG, and in gimple-isel replaces
>> > the following sequence:
>> >    op2 = -op1
>> >    op0 = A cmp B
>> >    lhs = op0 ? op1 : op2
>> >
>> > with:
>> >    op0 = A inverted_cmp B
>> >    lhs = .COND_NEG (op0, op1, op1).
>> >
>> > lhs = .COD_NEG (op0, op1, op1)
>> > implies
>> > lhs = neg (op1) if cond is true OR fall back to op1 if cond is false.
>> >
>> > With patch, it generates the following code-gen:
>> > .L3:
>> >         ld1b    z0.b, p0/z, [x1, x2]
>> >         movprfx z1, z0
>> >         and     z1.b, z1.b, #0xc0
>> >         cmpne   p1.b, p2/z, z1.b, #0
>> >         neg     z0.b, p1/m, z0.b
>> >         st1b    z0.b, p0, [x0, x2]
>> >         add     x2, x2, x4
>> >         whilelo p0.b, w2, w3
>> >         b.any   .L3
>> >
>> > While it seems to work for this test-case, I am not entirely sure if
>> > the patch is correct. Does it look in the right direction ?
>>
>> For binary ops we use match.pd rather than isel:
>>
>> (for uncond_op (UNCOND_BINARY)
>>      cond_op (COND_BINARY)
>>  (simplify
>>   (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
>>   (with { tree op_type = TREE_TYPE (@4); }
>>    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
>>         && is_truth_type_for (op_type, TREE_TYPE (@0)))
>>     (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
>>  (simplify
>>   (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
>>   (with { tree op_type = TREE_TYPE (@4); }
>>    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
>>         && is_truth_type_for (op_type, TREE_TYPE (@0)))
>>     (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
>>
>> I think it'd be good to do the same here, using new (UN)COND_UNARY
>> iterators.  (The iterators will only have one value to start with,
>> but other unary ops could get the same treatment in future.)
> Thanks for the suggestions.
> The attached patch adds a pattern to match.pd to replace:
> cond = a cmp b
> r = cond ? x : -x
> with:
> cond = a inverted_cmp b
> r = cond ? -x : x
>
> Code-gen with patch for inner loop:
> .L3:
>         ld1b    z0.b, p0/z, [x1, x2]
>         movprfx z1, z0
>         and     z1.b, z1.b, #0xc0
>         cmpne   p1.b, p2/z, z1.b, #0
>         neg     z0.b, p1/m, z0.b
>         st1b    z0.b, p0, [x0, x2]
>         add     x2, x2, x4
>         whilelo p0.b, w2, w3
>         b.any   .L3
>
> Does it look OK ?
> I didn't add it under (UN)COND_UNARY since it inverts the comparison,
> which we might not want to do for other unary ops ?

I think we should follow the structure of the current binary and
ternary patterns: cope with unary operations in either arm of the
vec_cond and use bit_not for the case in which the unary operation
is in the “false” arm of the vec_cond.

The bit_not will be folded away if the comparison can be inverted,
but it will be left in-place if the comparison can't be inverted
(as for some FP comparisons).

Thanks,
Richard

>
> Also, I am not sure, how to test if target supports conditional
> internal function ?
> I tried to use:
> (for cmp (tcc_comparison)
>      icmp (inverted_tcc_comparison)
>  (simplify
>   (vec_cond (cmp@2 @0 @1) @3 (negate @3))
>    (with { auto op_type = TREE_TYPE (@2); }
>     (if (vectorized_internal_fn_supported_p (IFN_COND_NEG, op_type)
>          && is_truth_type_for (op_type, TREE_TYPE (@0)))
>       (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3)))))
>
> but both the conditions seem to fail.
>
> Thanks,
> Prathamesh
>
>
>>
>> Richard
>>
>>
>> >
>> > Thanks,
>> > Prathamesh
>> >
>> > diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
>> > index 38e90933c3e..5b0dd3c1993 100644
>> > --- a/gcc/gimple-isel.cc
>> > +++ b/gcc/gimple-isel.cc
>> > @@ -39,6 +39,8 @@ along with GCC; see the file COPYING3.  If not see
>> >  #include "optabs.h"
>> >  #include "gimple-fold.h"
>> >  #include "internal-fn.h"
>> > +#include "fold-const.h"
>> > +#include "tree-pretty-print.h"
>> >
>> >  /* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to
>> >     internal function based on vector type of selected expansion.
>> > @@ -203,6 +205,35 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
>> >             return new_stmt;
>> >           }
>> >
>> > +       /* Replace:
>> > +          op2 = -op1
>> > +          op0 = A cmp B
>> > +          lhs = op0 ? op1 : op2
>> > +
>> > +          with:
>> > +          op0 = A inverted_cmp B
>> > +          lhs = .COND_NEG (op0, op1, op1).  */
>> > +
>> > +       gassign *op1_def = nullptr;
>> > +       if (TREE_CODE (op1) == SSA_NAME)
>> > +         op1_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op1));
>> > +
>> > +       gassign *op2_def = nullptr;
>> > +       if (TREE_CODE (op2) == SSA_NAME)
>> > +         op2_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op2));
>> > +
>> > +       if (can_compute_op0 && op1_def && op2_def
>> > +           && gimple_assign_rhs_code (op2_def) == NEGATE_EXPR
>> > +           && operand_equal_p (gimple_assign_rhs1 (op2_def), op1, 0))
>> > +         {
>> > +           auto inverted_code
>> > +             = invert_tree_comparison (gimple_assign_rhs_code (def_stmt), true);
>> > +           gimple_assign_set_rhs_code (def_stmt, inverted_code);
>> > +           auto gsi2 = gsi_for_stmt (op2_def);
>> > +           gsi_remove (&gsi2, true);
>> > +           return gimple_build_call_internal (IFN_COND_NEG, 3, op0, op1, op1);
>> > +         }
>> > +
>> >         if (can_compute_op0
>> >             && used_vec_cond_exprs >= 2
>> >             && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
>> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
>> > index 78db25bbac4..b57c7a4ed3e 100644
>> > --- a/gcc/internal-fn.c
>> > +++ b/gcc/internal-fn.c
>> > @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
>> >    T (BIT_IOR_EXPR, IFN_COND_IOR) \
>> >    T (BIT_XOR_EXPR, IFN_COND_XOR) \
>> >    T (LSHIFT_EXPR, IFN_COND_SHL) \
>> > -  T (RSHIFT_EXPR, IFN_COND_SHR)
>> > +  T (RSHIFT_EXPR, IFN_COND_SHR) \
>> > +  T (NEGATE_EXPR, IFN_COND_NEG)
>> >
>> >  /* Return a function that only performs CODE when a certain condition is met
>> >     and that uses a given fallback value otherwise.  For example, if CODE is
>> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
>> > index 88169ef4656..db40d1bfd18 100644
>> > --- a/gcc/internal-fn.def
>> > +++ b/gcc/internal-fn.def
>> > @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
>> >  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
>> >  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
>> >
>> > +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, cond_unary)
>> > +
>> >  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
>> >
>> >  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
>> > diff --git a/gcc/optabs.def b/gcc/optabs.def
>> > index 201b8aae1c0..fc65a3a7c23 100644
>> > --- a/gcc/optabs.def
>> > +++ b/gcc/optabs.def
>> > @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
>> >  OPTAB_D (cond_fms_optab, "cond_fms$a")
>> >  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
>> >  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
>> > +OPTAB_D (cond_neg_optab, "cond_neg$a")
>> >  OPTAB_D (cmov_optab, "cmov$a6")
>> >  OPTAB_D (cstore_optab, "cstore$a4")
>> >  OPTAB_D (ctrap_optab, "ctrap$a4")
>
> diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
> index 7112c116835..90cf131af48 100644
> --- a/gcc/gimple-match-head.c
> +++ b/gcc/gimple-match-head.c
> @@ -878,6 +878,10 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op,
>        if (!gimple_resimplify3 (seq, &cond_op, valueize))
>  	return false;
>        break;
> +    case 1:
> +      if (!gimple_resimplify1 (seq, &cond_op, valueize))
> +	return false;
> +      break;
>      default:
>        gcc_unreachable ();
>      }
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 78db25bbac4..b57c7a4ed3e 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
>    T (BIT_IOR_EXPR, IFN_COND_IOR) \
>    T (BIT_XOR_EXPR, IFN_COND_XOR) \
>    T (LSHIFT_EXPR, IFN_COND_SHL) \
> -  T (RSHIFT_EXPR, IFN_COND_SHR)
> +  T (RSHIFT_EXPR, IFN_COND_SHR) \
> +  T (NEGATE_EXPR, IFN_COND_NEG)
>  
>  /* Return a function that only performs CODE when a certain condition is met
>     and that uses a given fallback value otherwise.  For example, if CODE is
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 88169ef4656..db40d1bfd18 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
>  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
>  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
>  
> +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, cond_unary)
> +
>  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
>  
>  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
> diff --git a/gcc/match.pd b/gcc/match.pd
> index a9791ceb74a..fde6d32b2c4 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -7037,6 +7037,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  						  wi::mask (tree_to_uhwi (@1),
>  						  false, prec)); })
>  		     { build_zero_cst (TREE_TYPE (@0)); }))))))))
> +#if GIMPLE
> +
> +/* Simplify:
> +
> +     cond = a cmp b
> +     r = cond ? x : -x
> +
> +    to:
> +
> +     cond = a inverted_cmp b
> +     r = cond ? -x : x.  */
> +
> +(for cmp (tcc_comparison)
> +     icmp (inverted_tcc_comparison)
> + (simplify
> +  (vec_cond (cmp@2 @0 @1) @3 (negate @3))
> +   (with { auto op_type = TREE_TYPE (@2); }
> +    (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3))))
>  
>  /* Simplify:
>  
> @@ -7056,7 +7074,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>     conditional internal functions must support the same comparisons
>     inside and outside a VEC_COND_EXPR.  */
>  
> -#if GIMPLE
>  (for uncond_op (UNCOND_BINARY)
>       cond_op (COND_BINARY)
>   (simplify
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 201b8aae1c0..fc65a3a7c23 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
>  OPTAB_D (cond_fms_optab, "cond_fms$a")
>  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
>  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
> +OPTAB_D (cond_neg_optab, "cond_neg$a")
>  OPTAB_D (cmov_optab, "cmov$a6")
>  OPTAB_D (cstore_optab, "cstore$a4")
>  OPTAB_D (ctrap_optab, "ctrap$a4")

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [SVE] [gimple-isel] PR93183 - SVE does not use neg as conditional
  2021-10-11 15:12     ` Richard Sandiford
@ 2021-10-12 12:05       ` Prathamesh Kulkarni
  2021-10-13  7:56         ` Richard Sandiford
  0 siblings, 1 reply; 9+ messages in thread
From: Prathamesh Kulkarni @ 2021-10-12 12:05 UTC (permalink / raw)
  To: Prathamesh Kulkarni, gcc Patches, Richard Sandiford

[-- Attachment #1: Type: text/plain, Size: 13334 bytes --]

On Mon, 11 Oct 2021 at 20:42, Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> > On Fri, 8 Oct 2021 at 21:19, Richard Sandiford
> > <richard.sandiford@arm.com> wrote:
> >>
> >> Thanks for looking at this.
> >>
> >> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> >> > Hi,
> >> > As mentioned in PR, for the following test-case:
> >> >
> >> > typedef unsigned char uint8_t;
> >> >
> >> > static inline uint8_t
> >> > x264_clip_uint8(uint8_t x)
> >> > {
> >> >   uint8_t t = -x;
> >> >   uint8_t t1 = x & ~63;
> >> >   return (t1 != 0) ? t : x;
> >> > }
> >> >
> >> > void
> >> > mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
> >> > {
> >> >   for (int x = 0; x < n*16; x++)
> >> >     dst[x] = x264_clip_uint8(src[x]);
> >> > }
> >> >
> >> > -O3 -mcpu=generic+sve generates following code for the inner loop:
> >> >
> >> > .L3:
> >> >         ld1b    z0.b, p0/z, [x1, x2]
> >> >         movprfx z2, z0
> >> >         and     z2.b, z2.b, #0xc0
> >> >         movprfx z1, z0
> >> >         neg     z1.b, p1/m, z0.b
> >> >         cmpeq   p2.b, p1/z, z2.b, #0
> >> >         sel     z0.b, p2, z0.b, z1.b
> >> >         st1b    z0.b, p0, [x0, x2]
> >> >         add     x2, x2, x4
> >> >         whilelo p0.b, w2, w3
> >> >         b.any   .L3
> >> >
> >> > The sel is redundant since we could conditionally negate z0 based on
> >> > the predicate
> >> > comparing z2 with 0.
> >> >
> >> > As suggested in the PR, the attached patch, introduces a new
> >> > conditional internal function .COND_NEG, and in gimple-isel replaces
> >> > the following sequence:
> >> >    op2 = -op1
> >> >    op0 = A cmp B
> >> >    lhs = op0 ? op1 : op2
> >> >
> >> > with:
> >> >    op0 = A inverted_cmp B
> >> >    lhs = .COND_NEG (op0, op1, op1).
> >> >
> >> > lhs = .COD_NEG (op0, op1, op1)
> >> > implies
> >> > lhs = neg (op1) if cond is true OR fall back to op1 if cond is false.
> >> >
> >> > With patch, it generates the following code-gen:
> >> > .L3:
> >> >         ld1b    z0.b, p0/z, [x1, x2]
> >> >         movprfx z1, z0
> >> >         and     z1.b, z1.b, #0xc0
> >> >         cmpne   p1.b, p2/z, z1.b, #0
> >> >         neg     z0.b, p1/m, z0.b
> >> >         st1b    z0.b, p0, [x0, x2]
> >> >         add     x2, x2, x4
> >> >         whilelo p0.b, w2, w3
> >> >         b.any   .L3
> >> >
> >> > While it seems to work for this test-case, I am not entirely sure if
> >> > the patch is correct. Does it look in the right direction ?
> >>
> >> For binary ops we use match.pd rather than isel:
> >>
> >> (for uncond_op (UNCOND_BINARY)
> >>      cond_op (COND_BINARY)
> >>  (simplify
> >>   (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> >>   (with { tree op_type = TREE_TYPE (@4); }
> >>    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> >>         && is_truth_type_for (op_type, TREE_TYPE (@0)))
> >>     (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
> >>  (simplify
> >>   (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> >>   (with { tree op_type = TREE_TYPE (@4); }
> >>    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> >>         && is_truth_type_for (op_type, TREE_TYPE (@0)))
> >>     (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
> >>
> >> I think it'd be good to do the same here, using new (UN)COND_UNARY
> >> iterators.  (The iterators will only have one value to start with,
> >> but other unary ops could get the same treatment in future.)
> > Thanks for the suggestions.
> > The attached patch adds a pattern to match.pd to replace:
> > cond = a cmp b
> > r = cond ? x : -x
> > with:
> > cond = a inverted_cmp b
> > r = cond ? -x : x
> >
> > Code-gen with patch for inner loop:
> > .L3:
> >         ld1b    z0.b, p0/z, [x1, x2]
> >         movprfx z1, z0
> >         and     z1.b, z1.b, #0xc0
> >         cmpne   p1.b, p2/z, z1.b, #0
> >         neg     z0.b, p1/m, z0.b
> >         st1b    z0.b, p0, [x0, x2]
> >         add     x2, x2, x4
> >         whilelo p0.b, w2, w3
> >         b.any   .L3
> >
> > Does it look OK ?
> > I didn't add it under (UN)COND_UNARY since it inverts the comparison,
> > which we might not want to do for other unary ops ?
>
> I think we should follow the structure of the current binary and
> ternary patterns: cope with unary operations in either arm of the
> vec_cond and use bit_not for the case in which the unary operation
> is in the “false” arm of the vec_cond.
>
> The bit_not will be folded away if the comparison can be inverted,
> but it will be left in-place if the comparison can't be inverted
> (as for some FP comparisons).
Ah indeed, done in the attached patch.
Does it look OK ?

Thanks,
Prathamesh
>
> Thanks,
> Richard
>
> >
> > Also, I am not sure, how to test if target supports conditional
> > internal function ?
> > I tried to use:
> > (for cmp (tcc_comparison)
> >      icmp (inverted_tcc_comparison)
> >  (simplify
> >   (vec_cond (cmp@2 @0 @1) @3 (negate @3))
> >    (with { auto op_type = TREE_TYPE (@2); }
> >     (if (vectorized_internal_fn_supported_p (IFN_COND_NEG, op_type)
> >          && is_truth_type_for (op_type, TREE_TYPE (@0)))
> >       (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3)))))
> >
> > but both the conditions seem to fail.
> >
> > Thanks,
> > Prathamesh
> >
> >
> >>
> >> Richard
> >>
> >>
> >> >
> >> > Thanks,
> >> > Prathamesh
> >> >
> >> > diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
> >> > index 38e90933c3e..5b0dd3c1993 100644
> >> > --- a/gcc/gimple-isel.cc
> >> > +++ b/gcc/gimple-isel.cc
> >> > @@ -39,6 +39,8 @@ along with GCC; see the file COPYING3.  If not see
> >> >  #include "optabs.h"
> >> >  #include "gimple-fold.h"
> >> >  #include "internal-fn.h"
> >> > +#include "fold-const.h"
> >> > +#include "tree-pretty-print.h"
> >> >
> >> >  /* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to
> >> >     internal function based on vector type of selected expansion.
> >> > @@ -203,6 +205,35 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
> >> >             return new_stmt;
> >> >           }
> >> >
> >> > +       /* Replace:
> >> > +          op2 = -op1
> >> > +          op0 = A cmp B
> >> > +          lhs = op0 ? op1 : op2
> >> > +
> >> > +          with:
> >> > +          op0 = A inverted_cmp B
> >> > +          lhs = .COND_NEG (op0, op1, op1).  */
> >> > +
> >> > +       gassign *op1_def = nullptr;
> >> > +       if (TREE_CODE (op1) == SSA_NAME)
> >> > +         op1_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op1));
> >> > +
> >> > +       gassign *op2_def = nullptr;
> >> > +       if (TREE_CODE (op2) == SSA_NAME)
> >> > +         op2_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op2));
> >> > +
> >> > +       if (can_compute_op0 && op1_def && op2_def
> >> > +           && gimple_assign_rhs_code (op2_def) == NEGATE_EXPR
> >> > +           && operand_equal_p (gimple_assign_rhs1 (op2_def), op1, 0))
> >> > +         {
> >> > +           auto inverted_code
> >> > +             = invert_tree_comparison (gimple_assign_rhs_code (def_stmt), true);
> >> > +           gimple_assign_set_rhs_code (def_stmt, inverted_code);
> >> > +           auto gsi2 = gsi_for_stmt (op2_def);
> >> > +           gsi_remove (&gsi2, true);
> >> > +           return gimple_build_call_internal (IFN_COND_NEG, 3, op0, op1, op1);
> >> > +         }
> >> > +
> >> >         if (can_compute_op0
> >> >             && used_vec_cond_exprs >= 2
> >> >             && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
> >> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> >> > index 78db25bbac4..b57c7a4ed3e 100644
> >> > --- a/gcc/internal-fn.c
> >> > +++ b/gcc/internal-fn.c
> >> > @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
> >> >    T (BIT_IOR_EXPR, IFN_COND_IOR) \
> >> >    T (BIT_XOR_EXPR, IFN_COND_XOR) \
> >> >    T (LSHIFT_EXPR, IFN_COND_SHL) \
> >> > -  T (RSHIFT_EXPR, IFN_COND_SHR)
> >> > +  T (RSHIFT_EXPR, IFN_COND_SHR) \
> >> > +  T (NEGATE_EXPR, IFN_COND_NEG)
> >> >
> >> >  /* Return a function that only performs CODE when a certain condition is met
> >> >     and that uses a given fallback value otherwise.  For example, if CODE is
> >> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> >> > index 88169ef4656..db40d1bfd18 100644
> >> > --- a/gcc/internal-fn.def
> >> > +++ b/gcc/internal-fn.def
> >> > @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
> >> >  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
> >> >  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
> >> >
> >> > +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, cond_unary)
> >> > +
> >> >  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
> >> >
> >> >  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
> >> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> >> > index 201b8aae1c0..fc65a3a7c23 100644
> >> > --- a/gcc/optabs.def
> >> > +++ b/gcc/optabs.def
> >> > @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
> >> >  OPTAB_D (cond_fms_optab, "cond_fms$a")
> >> >  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
> >> >  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
> >> > +OPTAB_D (cond_neg_optab, "cond_neg$a")
> >> >  OPTAB_D (cmov_optab, "cmov$a6")
> >> >  OPTAB_D (cstore_optab, "cstore$a4")
> >> >  OPTAB_D (ctrap_optab, "ctrap$a4")
> >
> > diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
> > index 7112c116835..90cf131af48 100644
> > --- a/gcc/gimple-match-head.c
> > +++ b/gcc/gimple-match-head.c
> > @@ -878,6 +878,10 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op,
> >        if (!gimple_resimplify3 (seq, &cond_op, valueize))
> >       return false;
> >        break;
> > +    case 1:
> > +      if (!gimple_resimplify1 (seq, &cond_op, valueize))
> > +     return false;
> > +      break;
> >      default:
> >        gcc_unreachable ();
> >      }
> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > index 78db25bbac4..b57c7a4ed3e 100644
> > --- a/gcc/internal-fn.c
> > +++ b/gcc/internal-fn.c
> > @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
> >    T (BIT_IOR_EXPR, IFN_COND_IOR) \
> >    T (BIT_XOR_EXPR, IFN_COND_XOR) \
> >    T (LSHIFT_EXPR, IFN_COND_SHL) \
> > -  T (RSHIFT_EXPR, IFN_COND_SHR)
> > +  T (RSHIFT_EXPR, IFN_COND_SHR) \
> > +  T (NEGATE_EXPR, IFN_COND_NEG)
> >
> >  /* Return a function that only performs CODE when a certain condition is met
> >     and that uses a given fallback value otherwise.  For example, if CODE is
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 88169ef4656..db40d1bfd18 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
> >  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
> >  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
> >
> > +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, cond_unary)
> > +
> >  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
> >
> >  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index a9791ceb74a..fde6d32b2c4 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -7037,6 +7037,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >                                                 wi::mask (tree_to_uhwi (@1),
> >                                                 false, prec)); })
> >                    { build_zero_cst (TREE_TYPE (@0)); }))))))))
> > +#if GIMPLE
> > +
> > +/* Simplify:
> > +
> > +     cond = a cmp b
> > +     r = cond ? x : -x
> > +
> > +    to:
> > +
> > +     cond = a inverted_cmp b
> > +     r = cond ? -x : x.  */
> > +
> > +(for cmp (tcc_comparison)
> > +     icmp (inverted_tcc_comparison)
> > + (simplify
> > +  (vec_cond (cmp@2 @0 @1) @3 (negate @3))
> > +   (with { auto op_type = TREE_TYPE (@2); }
> > +    (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3))))
> >
> >  /* Simplify:
> >
> > @@ -7056,7 +7074,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >     conditional internal functions must support the same comparisons
> >     inside and outside a VEC_COND_EXPR.  */
> >
> > -#if GIMPLE
> >  (for uncond_op (UNCOND_BINARY)
> >       cond_op (COND_BINARY)
> >   (simplify
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 201b8aae1c0..fc65a3a7c23 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
> >  OPTAB_D (cond_fms_optab, "cond_fms$a")
> >  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
> >  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
> > +OPTAB_D (cond_neg_optab, "cond_neg$a")
> >  OPTAB_D (cmov_optab, "cmov$a6")
> >  OPTAB_D (cstore_optab, "cstore$a4")
> >  OPTAB_D (ctrap_optab, "ctrap$a4")

[-- Attachment #2: pr93183-3.txt --]
[-- Type: text/plain, Size: 4609 bytes --]

diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index 7112c116835..9d88b2f8551 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -870,6 +870,10 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op,
   memcpy (cond_op.ops, res_op->ops + 1, (num_ops - 1) * sizeof *cond_op.ops);
   switch (num_ops - 2)
     {
+    case 1:
+      if (!gimple_resimplify1 (seq, &cond_op, valueize))
+	return false;
+      break;
     case 2:
       if (!gimple_resimplify2 (seq, &cond_op, valueize))
 	return false;
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 78db25bbac4..b57c7a4ed3e 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
   T (BIT_IOR_EXPR, IFN_COND_IOR) \
   T (BIT_XOR_EXPR, IFN_COND_XOR) \
   T (LSHIFT_EXPR, IFN_COND_SHL) \
-  T (RSHIFT_EXPR, IFN_COND_SHR)
+  T (RSHIFT_EXPR, IFN_COND_SHR) \
+  T (NEGATE_EXPR, IFN_COND_NEG)
 
 /* Return a function that only performs CODE when a certain condition is met
    and that uses a given fallback value otherwise.  For example, if CODE is
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 88169ef4656..798a50ba332 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
 DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
 DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
 
+DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST, cond_neg, cond_unary)
+
 DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
 
 DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
diff --git a/gcc/match.pd b/gcc/match.pd
index a9791ceb74a..1a4c157bd31 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -78,6 +78,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (CEIL)
 DEFINE_INT_AND_FLOAT_ROUND_FN (ROUND)
 DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
+/* Unary operations and their associated IFN_COND_* function.  */
+(define_operator_list UNCOND_UNARY
+  negate)
+(define_operator_list COND_UNARY
+  IFN_COND_NEG)
+
 /* Binary operations and their associated IFN_COND_* function.  */
 (define_operator_list UNCOND_BINARY
   plus minus
@@ -7038,6 +7044,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 						  false, prec)); })
 		     { build_zero_cst (TREE_TYPE (@0)); }))))))))
 
+#if GIMPLE
+
+/* Simplify:
+     a = op a1
+     r = cond ? a : b
+     --> r = .COND_FN (cond, a, b)
+and,
+    a = op a1
+    r = cond ? b : a
+    --> r = .COND_FN (~cond, b, a).  */
+
+(for uncond_op (UNCOND_UNARY)
+     cond_op (COND_UNARY)
+ (simplify
+  (vec_cond @0 (uncond_op@3 @1) @2)
+   (with { tree op_type = TREE_TYPE (@3); }
+    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+        && is_truth_type_for (op_type, TREE_TYPE (@0)))
+     (cond_op @0 @1 @2))))
+ (simplify
+  (vec_cond @0 @1 (uncond_op@3 @2))
+   (with { tree op_type = TREE_TYPE (@3); }
+    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+        && is_truth_type_for (op_type, TREE_TYPE (@0)))
+     (cond_op (bit_not @0) @2 @1)))))
+
 /* Simplify:
 
      a = a1 op a2
@@ -7056,7 +7088,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    conditional internal functions must support the same comparisons
    inside and outside a VEC_COND_EXPR.  */
 
-#if GIMPLE
 (for uncond_op (UNCOND_BINARY)
      cond_op (COND_BINARY)
  (simplify
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 201b8aae1c0..fc65a3a7c23 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
 OPTAB_D (cond_fms_optab, "cond_fms$a")
 OPTAB_D (cond_fnma_optab, "cond_fnma$a")
 OPTAB_D (cond_fnms_optab, "cond_fnms$a")
+OPTAB_D (cond_neg_optab, "cond_neg$a")
 OPTAB_D (cmov_optab, "cmov$a6")
 OPTAB_D (cstore_optab, "cstore$a4")
 OPTAB_D (ctrap_optab, "ctrap$a4")
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
new file mode 100644
index 00000000000..2f92224cecb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mcpu=generic+sve" } */
+
+typedef unsigned char uint8_t;
+
+static inline uint8_t
+x264_clip_uint8(uint8_t x)
+{
+  uint8_t t = -x;
+  uint8_t t1 = x & ~63; 
+  return (t1 != 0) ? t : x; 
+}
+
+void
+mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
+{
+  for (int x = 0; x < n*16; x++)
+    dst[x] = x264_clip_uint8(src[x]);
+}
+
+/* { dg-final { scan-assembler-not {\tsel} } } */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [SVE] [gimple-isel] PR93183 - SVE does not use neg as conditional
  2021-10-12 12:05       ` Prathamesh Kulkarni
@ 2021-10-13  7:56         ` Richard Sandiford
  2021-10-18  6:01           ` Prathamesh Kulkarni
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Sandiford @ 2021-10-13  7:56 UTC (permalink / raw)
  To: Prathamesh Kulkarni; +Cc: gcc Patches

Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> On Mon, 11 Oct 2021 at 20:42, Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>>
>> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
>> > On Fri, 8 Oct 2021 at 21:19, Richard Sandiford
>> > <richard.sandiford@arm.com> wrote:
>> >>
>> >> Thanks for looking at this.
>> >>
>> >> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
>> >> > Hi,
>> >> > As mentioned in PR, for the following test-case:
>> >> >
>> >> > typedef unsigned char uint8_t;
>> >> >
>> >> > static inline uint8_t
>> >> > x264_clip_uint8(uint8_t x)
>> >> > {
>> >> >   uint8_t t = -x;
>> >> >   uint8_t t1 = x & ~63;
>> >> >   return (t1 != 0) ? t : x;
>> >> > }
>> >> >
>> >> > void
>> >> > mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
>> >> > {
>> >> >   for (int x = 0; x < n*16; x++)
>> >> >     dst[x] = x264_clip_uint8(src[x]);
>> >> > }
>> >> >
>> >> > -O3 -mcpu=generic+sve generates following code for the inner loop:
>> >> >
>> >> > .L3:
>> >> >         ld1b    z0.b, p0/z, [x1, x2]
>> >> >         movprfx z2, z0
>> >> >         and     z2.b, z2.b, #0xc0
>> >> >         movprfx z1, z0
>> >> >         neg     z1.b, p1/m, z0.b
>> >> >         cmpeq   p2.b, p1/z, z2.b, #0
>> >> >         sel     z0.b, p2, z0.b, z1.b
>> >> >         st1b    z0.b, p0, [x0, x2]
>> >> >         add     x2, x2, x4
>> >> >         whilelo p0.b, w2, w3
>> >> >         b.any   .L3
>> >> >
>> >> > The sel is redundant since we could conditionally negate z0 based on
>> >> > the predicate
>> >> > comparing z2 with 0.
>> >> >
>> >> > As suggested in the PR, the attached patch, introduces a new
>> >> > conditional internal function .COND_NEG, and in gimple-isel replaces
>> >> > the following sequence:
>> >> >    op2 = -op1
>> >> >    op0 = A cmp B
>> >> >    lhs = op0 ? op1 : op2
>> >> >
>> >> > with:
>> >> >    op0 = A inverted_cmp B
>> >> >    lhs = .COND_NEG (op0, op1, op1).
>> >> >
>> >> > lhs = .COD_NEG (op0, op1, op1)
>> >> > implies
>> >> > lhs = neg (op1) if cond is true OR fall back to op1 if cond is false.
>> >> >
>> >> > With patch, it generates the following code-gen:
>> >> > .L3:
>> >> >         ld1b    z0.b, p0/z, [x1, x2]
>> >> >         movprfx z1, z0
>> >> >         and     z1.b, z1.b, #0xc0
>> >> >         cmpne   p1.b, p2/z, z1.b, #0
>> >> >         neg     z0.b, p1/m, z0.b
>> >> >         st1b    z0.b, p0, [x0, x2]
>> >> >         add     x2, x2, x4
>> >> >         whilelo p0.b, w2, w3
>> >> >         b.any   .L3
>> >> >
>> >> > While it seems to work for this test-case, I am not entirely sure if
>> >> > the patch is correct. Does it look in the right direction ?
>> >>
>> >> For binary ops we use match.pd rather than isel:
>> >>
>> >> (for uncond_op (UNCOND_BINARY)
>> >>      cond_op (COND_BINARY)
>> >>  (simplify
>> >>   (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
>> >>   (with { tree op_type = TREE_TYPE (@4); }
>> >>    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
>> >>         && is_truth_type_for (op_type, TREE_TYPE (@0)))
>> >>     (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
>> >>  (simplify
>> >>   (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
>> >>   (with { tree op_type = TREE_TYPE (@4); }
>> >>    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
>> >>         && is_truth_type_for (op_type, TREE_TYPE (@0)))
>> >>     (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
>> >>
>> >> I think it'd be good to do the same here, using new (UN)COND_UNARY
>> >> iterators.  (The iterators will only have one value to start with,
>> >> but other unary ops could get the same treatment in future.)
>> > Thanks for the suggestions.
>> > The attached patch adds a pattern to match.pd to replace:
>> > cond = a cmp b
>> > r = cond ? x : -x
>> > with:
>> > cond = a inverted_cmp b
>> > r = cond ? -x : x
>> >
>> > Code-gen with patch for inner loop:
>> > .L3:
>> >         ld1b    z0.b, p0/z, [x1, x2]
>> >         movprfx z1, z0
>> >         and     z1.b, z1.b, #0xc0
>> >         cmpne   p1.b, p2/z, z1.b, #0
>> >         neg     z0.b, p1/m, z0.b
>> >         st1b    z0.b, p0, [x0, x2]
>> >         add     x2, x2, x4
>> >         whilelo p0.b, w2, w3
>> >         b.any   .L3
>> >
>> > Does it look OK ?
>> > I didn't add it under (UN)COND_UNARY since it inverts the comparison,
>> > which we might not want to do for other unary ops ?
>>
>> I think we should follow the structure of the current binary and
>> ternary patterns: cope with unary operations in either arm of the
>> vec_cond and use bit_not for the case in which the unary operation
>> is in the “false” arm of the vec_cond.
>>
>> The bit_not will be folded away if the comparison can be inverted,
>> but it will be left in-place if the comparison can't be inverted
>> (as for some FP comparisons).
> Ah indeed, done in the attached patch.
> Does it look OK ?

Yeah, this is OK, thanks.

Richard

>
> Thanks,
> Prathamesh
>>
>> Thanks,
>> Richard
>>
>> >
>> > Also, I am not sure, how to test if target supports conditional
>> > internal function ?
>> > I tried to use:
>> > (for cmp (tcc_comparison)
>> >      icmp (inverted_tcc_comparison)
>> >  (simplify
>> >   (vec_cond (cmp@2 @0 @1) @3 (negate @3))
>> >    (with { auto op_type = TREE_TYPE (@2); }
>> >     (if (vectorized_internal_fn_supported_p (IFN_COND_NEG, op_type)
>> >          && is_truth_type_for (op_type, TREE_TYPE (@0)))
>> >       (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3)))))
>> >
>> > but both the conditions seem to fail.
>> >
>> > Thanks,
>> > Prathamesh
>> >
>> >
>> >>
>> >> Richard
>> >>
>> >>
>> >> >
>> >> > Thanks,
>> >> > Prathamesh
>> >> >
>> >> > diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
>> >> > index 38e90933c3e..5b0dd3c1993 100644
>> >> > --- a/gcc/gimple-isel.cc
>> >> > +++ b/gcc/gimple-isel.cc
>> >> > @@ -39,6 +39,8 @@ along with GCC; see the file COPYING3.  If not see
>> >> >  #include "optabs.h"
>> >> >  #include "gimple-fold.h"
>> >> >  #include "internal-fn.h"
>> >> > +#include "fold-const.h"
>> >> > +#include "tree-pretty-print.h"
>> >> >
>> >> >  /* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to
>> >> >     internal function based on vector type of selected expansion.
>> >> > @@ -203,6 +205,35 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
>> >> >             return new_stmt;
>> >> >           }
>> >> >
>> >> > +       /* Replace:
>> >> > +          op2 = -op1
>> >> > +          op0 = A cmp B
>> >> > +          lhs = op0 ? op1 : op2
>> >> > +
>> >> > +          with:
>> >> > +          op0 = A inverted_cmp B
>> >> > +          lhs = .COND_NEG (op0, op1, op1).  */
>> >> > +
>> >> > +       gassign *op1_def = nullptr;
>> >> > +       if (TREE_CODE (op1) == SSA_NAME)
>> >> > +         op1_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op1));
>> >> > +
>> >> > +       gassign *op2_def = nullptr;
>> >> > +       if (TREE_CODE (op2) == SSA_NAME)
>> >> > +         op2_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op2));
>> >> > +
>> >> > +       if (can_compute_op0 && op1_def && op2_def
>> >> > +           && gimple_assign_rhs_code (op2_def) == NEGATE_EXPR
>> >> > +           && operand_equal_p (gimple_assign_rhs1 (op2_def), op1, 0))
>> >> > +         {
>> >> > +           auto inverted_code
>> >> > +             = invert_tree_comparison (gimple_assign_rhs_code (def_stmt), true);
>> >> > +           gimple_assign_set_rhs_code (def_stmt, inverted_code);
>> >> > +           auto gsi2 = gsi_for_stmt (op2_def);
>> >> > +           gsi_remove (&gsi2, true);
>> >> > +           return gimple_build_call_internal (IFN_COND_NEG, 3, op0, op1, op1);
>> >> > +         }
>> >> > +
>> >> >         if (can_compute_op0
>> >> >             && used_vec_cond_exprs >= 2
>> >> >             && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
>> >> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
>> >> > index 78db25bbac4..b57c7a4ed3e 100644
>> >> > --- a/gcc/internal-fn.c
>> >> > +++ b/gcc/internal-fn.c
>> >> > @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
>> >> >    T (BIT_IOR_EXPR, IFN_COND_IOR) \
>> >> >    T (BIT_XOR_EXPR, IFN_COND_XOR) \
>> >> >    T (LSHIFT_EXPR, IFN_COND_SHL) \
>> >> > -  T (RSHIFT_EXPR, IFN_COND_SHR)
>> >> > +  T (RSHIFT_EXPR, IFN_COND_SHR) \
>> >> > +  T (NEGATE_EXPR, IFN_COND_NEG)
>> >> >
>> >> >  /* Return a function that only performs CODE when a certain condition is met
>> >> >     and that uses a given fallback value otherwise.  For example, if CODE is
>> >> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
>> >> > index 88169ef4656..db40d1bfd18 100644
>> >> > --- a/gcc/internal-fn.def
>> >> > +++ b/gcc/internal-fn.def
>> >> > @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
>> >> >  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
>> >> >  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
>> >> >
>> >> > +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, cond_unary)
>> >> > +
>> >> >  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
>> >> >
>> >> >  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
>> >> > diff --git a/gcc/optabs.def b/gcc/optabs.def
>> >> > index 201b8aae1c0..fc65a3a7c23 100644
>> >> > --- a/gcc/optabs.def
>> >> > +++ b/gcc/optabs.def
>> >> > @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
>> >> >  OPTAB_D (cond_fms_optab, "cond_fms$a")
>> >> >  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
>> >> >  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
>> >> > +OPTAB_D (cond_neg_optab, "cond_neg$a")
>> >> >  OPTAB_D (cmov_optab, "cmov$a6")
>> >> >  OPTAB_D (cstore_optab, "cstore$a4")
>> >> >  OPTAB_D (ctrap_optab, "ctrap$a4")
>> >
>> > diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
>> > index 7112c116835..90cf131af48 100644
>> > --- a/gcc/gimple-match-head.c
>> > +++ b/gcc/gimple-match-head.c
>> > @@ -878,6 +878,10 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op,
>> >        if (!gimple_resimplify3 (seq, &cond_op, valueize))
>> >       return false;
>> >        break;
>> > +    case 1:
>> > +      if (!gimple_resimplify1 (seq, &cond_op, valueize))
>> > +     return false;
>> > +      break;
>> >      default:
>> >        gcc_unreachable ();
>> >      }
>> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
>> > index 78db25bbac4..b57c7a4ed3e 100644
>> > --- a/gcc/internal-fn.c
>> > +++ b/gcc/internal-fn.c
>> > @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
>> >    T (BIT_IOR_EXPR, IFN_COND_IOR) \
>> >    T (BIT_XOR_EXPR, IFN_COND_XOR) \
>> >    T (LSHIFT_EXPR, IFN_COND_SHL) \
>> > -  T (RSHIFT_EXPR, IFN_COND_SHR)
>> > +  T (RSHIFT_EXPR, IFN_COND_SHR) \
>> > +  T (NEGATE_EXPR, IFN_COND_NEG)
>> >
>> >  /* Return a function that only performs CODE when a certain condition is met
>> >     and that uses a given fallback value otherwise.  For example, if CODE is
>> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
>> > index 88169ef4656..db40d1bfd18 100644
>> > --- a/gcc/internal-fn.def
>> > +++ b/gcc/internal-fn.def
>> > @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
>> >  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
>> >  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
>> >
>> > +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, cond_unary)
>> > +
>> >  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
>> >
>> >  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
>> > diff --git a/gcc/match.pd b/gcc/match.pd
>> > index a9791ceb74a..fde6d32b2c4 100644
>> > --- a/gcc/match.pd
>> > +++ b/gcc/match.pd
>> > @@ -7037,6 +7037,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>> >                                                 wi::mask (tree_to_uhwi (@1),
>> >                                                 false, prec)); })
>> >                    { build_zero_cst (TREE_TYPE (@0)); }))))))))
>> > +#if GIMPLE
>> > +
>> > +/* Simplify:
>> > +
>> > +     cond = a cmp b
>> > +     r = cond ? x : -x
>> > +
>> > +    to:
>> > +
>> > +     cond = a inverted_cmp b
>> > +     r = cond ? -x : x.  */
>> > +
>> > +(for cmp (tcc_comparison)
>> > +     icmp (inverted_tcc_comparison)
>> > + (simplify
>> > +  (vec_cond (cmp@2 @0 @1) @3 (negate @3))
>> > +   (with { auto op_type = TREE_TYPE (@2); }
>> > +    (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3))))
>> >
>> >  /* Simplify:
>> >
>> > @@ -7056,7 +7074,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>> >     conditional internal functions must support the same comparisons
>> >     inside and outside a VEC_COND_EXPR.  */
>> >
>> > -#if GIMPLE
>> >  (for uncond_op (UNCOND_BINARY)
>> >       cond_op (COND_BINARY)
>> >   (simplify
>> > diff --git a/gcc/optabs.def b/gcc/optabs.def
>> > index 201b8aae1c0..fc65a3a7c23 100644
>> > --- a/gcc/optabs.def
>> > +++ b/gcc/optabs.def
>> > @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
>> >  OPTAB_D (cond_fms_optab, "cond_fms$a")
>> >  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
>> >  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
>> > +OPTAB_D (cond_neg_optab, "cond_neg$a")
>> >  OPTAB_D (cmov_optab, "cmov$a6")
>> >  OPTAB_D (cstore_optab, "cstore$a4")
>> >  OPTAB_D (ctrap_optab, "ctrap$a4")
>
> diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
> index 7112c116835..9d88b2f8551 100644
> --- a/gcc/gimple-match-head.c
> +++ b/gcc/gimple-match-head.c
> @@ -870,6 +870,10 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op,
>    memcpy (cond_op.ops, res_op->ops + 1, (num_ops - 1) * sizeof *cond_op.ops);
>    switch (num_ops - 2)
>      {
> +    case 1:
> +      if (!gimple_resimplify1 (seq, &cond_op, valueize))
> +	return false;
> +      break;
>      case 2:
>        if (!gimple_resimplify2 (seq, &cond_op, valueize))
>  	return false;
> diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> index 78db25bbac4..b57c7a4ed3e 100644
> --- a/gcc/internal-fn.c
> +++ b/gcc/internal-fn.c
> @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
>    T (BIT_IOR_EXPR, IFN_COND_IOR) \
>    T (BIT_XOR_EXPR, IFN_COND_XOR) \
>    T (LSHIFT_EXPR, IFN_COND_SHL) \
> -  T (RSHIFT_EXPR, IFN_COND_SHR)
> +  T (RSHIFT_EXPR, IFN_COND_SHR) \
> +  T (NEGATE_EXPR, IFN_COND_NEG)
>  
>  /* Return a function that only performs CODE when a certain condition is met
>     and that uses a given fallback value otherwise.  For example, if CODE is
> diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> index 88169ef4656..798a50ba332 100644
> --- a/gcc/internal-fn.def
> +++ b/gcc/internal-fn.def
> @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
>  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
>  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
>  
> +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST, cond_neg, cond_unary)
> +
>  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
>  
>  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
> diff --git a/gcc/match.pd b/gcc/match.pd
> index a9791ceb74a..1a4c157bd31 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -78,6 +78,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (CEIL)
>  DEFINE_INT_AND_FLOAT_ROUND_FN (ROUND)
>  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  
> +/* Unary operations and their associated IFN_COND_* function.  */
> +(define_operator_list UNCOND_UNARY
> +  negate)
> +(define_operator_list COND_UNARY
> +  IFN_COND_NEG)
> +
>  /* Binary operations and their associated IFN_COND_* function.  */
>  (define_operator_list UNCOND_BINARY
>    plus minus
> @@ -7038,6 +7044,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>  						  false, prec)); })
>  		     { build_zero_cst (TREE_TYPE (@0)); }))))))))
>  
> +#if GIMPLE
> +
> +/* Simplify:
> +     a = op a1
> +     r = cond ? a : b
> +     --> r = .COND_FN (cond, a, b)
> +and,
> +    a = op a1
> +    r = cond ? b : a
> +    --> r = .COND_FN (~cond, b, a).  */
> +
> +(for uncond_op (UNCOND_UNARY)
> +     cond_op (COND_UNARY)
> + (simplify
> +  (vec_cond @0 (uncond_op@3 @1) @2)
> +   (with { tree op_type = TREE_TYPE (@3); }
> +    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> +        && is_truth_type_for (op_type, TREE_TYPE (@0)))
> +     (cond_op @0 @1 @2))))
> + (simplify
> +  (vec_cond @0 @1 (uncond_op@3 @2))
> +   (with { tree op_type = TREE_TYPE (@3); }
> +    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> +        && is_truth_type_for (op_type, TREE_TYPE (@0)))
> +     (cond_op (bit_not @0) @2 @1)))))
> +
>  /* Simplify:
>  
>       a = a1 op a2
> @@ -7056,7 +7088,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>     conditional internal functions must support the same comparisons
>     inside and outside a VEC_COND_EXPR.  */
>  
> -#if GIMPLE
>  (for uncond_op (UNCOND_BINARY)
>       cond_op (COND_BINARY)
>   (simplify
> diff --git a/gcc/optabs.def b/gcc/optabs.def
> index 201b8aae1c0..fc65a3a7c23 100644
> --- a/gcc/optabs.def
> +++ b/gcc/optabs.def
> @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
>  OPTAB_D (cond_fms_optab, "cond_fms$a")
>  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
>  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
> +OPTAB_D (cond_neg_optab, "cond_neg$a")
>  OPTAB_D (cmov_optab, "cmov$a6")
>  OPTAB_D (cstore_optab, "cstore$a4")
>  OPTAB_D (ctrap_optab, "ctrap$a4")
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
> new file mode 100644
> index 00000000000..2f92224cecb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mcpu=generic+sve" } */
> +
> +typedef unsigned char uint8_t;
> +
> +static inline uint8_t
> +x264_clip_uint8(uint8_t x)
> +{
> +  uint8_t t = -x;
> +  uint8_t t1 = x & ~63; 
> +  return (t1 != 0) ? t : x; 
> +}
> +
> +void
> +mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
> +{
> +  for (int x = 0; x < n*16; x++)
> +    dst[x] = x264_clip_uint8(src[x]);
> +}
> +
> +/* { dg-final { scan-assembler-not {\tsel} } } */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [SVE] [gimple-isel] PR93183 - SVE does not use neg as conditional
  2021-10-13  7:56         ` Richard Sandiford
@ 2021-10-18  6:01           ` Prathamesh Kulkarni
  2021-10-18  9:04             ` Richard Sandiford
  0 siblings, 1 reply; 9+ messages in thread
From: Prathamesh Kulkarni @ 2021-10-18  6:01 UTC (permalink / raw)
  To: Prathamesh Kulkarni, gcc Patches, Richard Sandiford

[-- Attachment #1: Type: text/plain, Size: 20829 bytes --]

On Wed, 13 Oct 2021 at 13:26, Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> > On Mon, 11 Oct 2021 at 20:42, Richard Sandiford
> > <richard.sandiford@arm.com> wrote:
> >>
> >> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> >> > On Fri, 8 Oct 2021 at 21:19, Richard Sandiford
> >> > <richard.sandiford@arm.com> wrote:
> >> >>
> >> >> Thanks for looking at this.
> >> >>
> >> >> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> >> >> > Hi,
> >> >> > As mentioned in PR, for the following test-case:
> >> >> >
> >> >> > typedef unsigned char uint8_t;
> >> >> >
> >> >> > static inline uint8_t
> >> >> > x264_clip_uint8(uint8_t x)
> >> >> > {
> >> >> >   uint8_t t = -x;
> >> >> >   uint8_t t1 = x & ~63;
> >> >> >   return (t1 != 0) ? t : x;
> >> >> > }
> >> >> >
> >> >> > void
> >> >> > mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
> >> >> > {
> >> >> >   for (int x = 0; x < n*16; x++)
> >> >> >     dst[x] = x264_clip_uint8(src[x]);
> >> >> > }
> >> >> >
> >> >> > -O3 -mcpu=generic+sve generates following code for the inner loop:
> >> >> >
> >> >> > .L3:
> >> >> >         ld1b    z0.b, p0/z, [x1, x2]
> >> >> >         movprfx z2, z0
> >> >> >         and     z2.b, z2.b, #0xc0
> >> >> >         movprfx z1, z0
> >> >> >         neg     z1.b, p1/m, z0.b
> >> >> >         cmpeq   p2.b, p1/z, z2.b, #0
> >> >> >         sel     z0.b, p2, z0.b, z1.b
> >> >> >         st1b    z0.b, p0, [x0, x2]
> >> >> >         add     x2, x2, x4
> >> >> >         whilelo p0.b, w2, w3
> >> >> >         b.any   .L3
> >> >> >
> >> >> > The sel is redundant since we could conditionally negate z0 based on
> >> >> > the predicate
> >> >> > comparing z2 with 0.
> >> >> >
> >> >> > As suggested in the PR, the attached patch, introduces a new
> >> >> > conditional internal function .COND_NEG, and in gimple-isel replaces
> >> >> > the following sequence:
> >> >> >    op2 = -op1
> >> >> >    op0 = A cmp B
> >> >> >    lhs = op0 ? op1 : op2
> >> >> >
> >> >> > with:
> >> >> >    op0 = A inverted_cmp B
> >> >> >    lhs = .COND_NEG (op0, op1, op1).
> >> >> >
> >> >> > lhs = .COD_NEG (op0, op1, op1)
> >> >> > implies
> >> >> > lhs = neg (op1) if cond is true OR fall back to op1 if cond is false.
> >> >> >
> >> >> > With patch, it generates the following code-gen:
> >> >> > .L3:
> >> >> >         ld1b    z0.b, p0/z, [x1, x2]
> >> >> >         movprfx z1, z0
> >> >> >         and     z1.b, z1.b, #0xc0
> >> >> >         cmpne   p1.b, p2/z, z1.b, #0
> >> >> >         neg     z0.b, p1/m, z0.b
> >> >> >         st1b    z0.b, p0, [x0, x2]
> >> >> >         add     x2, x2, x4
> >> >> >         whilelo p0.b, w2, w3
> >> >> >         b.any   .L3
> >> >> >
> >> >> > While it seems to work for this test-case, I am not entirely sure if
> >> >> > the patch is correct. Does it look in the right direction ?
> >> >>
> >> >> For binary ops we use match.pd rather than isel:
> >> >>
> >> >> (for uncond_op (UNCOND_BINARY)
> >> >>      cond_op (COND_BINARY)
> >> >>  (simplify
> >> >>   (vec_cond @0 (view_convert? (uncond_op@4 @1 @2)) @3)
> >> >>   (with { tree op_type = TREE_TYPE (@4); }
> >> >>    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> >> >>         && is_truth_type_for (op_type, TREE_TYPE (@0)))
> >> >>     (view_convert (cond_op @0 @1 @2 (view_convert:op_type @3))))))
> >> >>  (simplify
> >> >>   (vec_cond @0 @1 (view_convert? (uncond_op@4 @2 @3)))
> >> >>   (with { tree op_type = TREE_TYPE (@4); }
> >> >>    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> >> >>         && is_truth_type_for (op_type, TREE_TYPE (@0)))
> >> >>     (view_convert (cond_op (bit_not @0) @2 @3 (view_convert:op_type @1)))))))
> >> >>
> >> >> I think it'd be good to do the same here, using new (UN)COND_UNARY
> >> >> iterators.  (The iterators will only have one value to start with,
> >> >> but other unary ops could get the same treatment in future.)
> >> > Thanks for the suggestions.
> >> > The attached patch adds a pattern to match.pd to replace:
> >> > cond = a cmp b
> >> > r = cond ? x : -x
> >> > with:
> >> > cond = a inverted_cmp b
> >> > r = cond ? -x : x
> >> >
> >> > Code-gen with patch for inner loop:
> >> > .L3:
> >> >         ld1b    z0.b, p0/z, [x1, x2]
> >> >         movprfx z1, z0
> >> >         and     z1.b, z1.b, #0xc0
> >> >         cmpne   p1.b, p2/z, z1.b, #0
> >> >         neg     z0.b, p1/m, z0.b
> >> >         st1b    z0.b, p0, [x0, x2]
> >> >         add     x2, x2, x4
> >> >         whilelo p0.b, w2, w3
> >> >         b.any   .L3
> >> >
> >> > Does it look OK ?
> >> > I didn't add it under (UN)COND_UNARY since it inverts the comparison,
> >> > which we might not want to do for other unary ops ?
> >>
> >> I think we should follow the structure of the current binary and
> >> ternary patterns: cope with unary operations in either arm of the
> >> vec_cond and use bit_not for the case in which the unary operation
> >> is in the “false” arm of the vec_cond.
> >>
> >> The bit_not will be folded away if the comparison can be inverted,
> >> but it will be left in-place if the comparison can't be inverted
> >> (as for some FP comparisons).
> > Ah indeed, done in the attached patch.
> > Does it look OK ?
>
> Yeah, this is OK, thanks.
Hi,
Sorry for late reply. The patch regressed cond_unary_4.c,
For the following test:

void __attribute__((noipa))
test_int64_t_neg(int64_t *__restrict r, int64_t *__restrict a,
                 int64_t *__restrict pred, int n) {
  for (int i = 0; i < n; ++i)
    r[i] = pred[i] ? (-(a[i])) : 0;
}

code-gen difference before/after patch:
@@ -15,3 +15,2 @@
        whilelo p0.d, wzr, w3
-       mov     z2.b, #0
        .p2align 3,,7
@@ -21,3 +20,3 @@
        ld1d    z1.d, p1/z, [x1, x4, lsl 3]
-       movprfx z0, z2
+       movprfx z0.d, p1/z, z1.d
        neg     z0.d, p1/m, z1.d

and similarly for _neg tests of different type.
I guess using /z is an improvement ?

The attached patch adds view_convert to the pattern (needed for
int16_t) and adjusts cond_unary_4.c accordingly.
Bootstrapped + tested on aarch64-linux-gnu.
OK to commit ?

Thanks,
Prathamesh
>
> Richard
>
> >
> > Thanks,
> > Prathamesh
> >>
> >> Thanks,
> >> Richard
> >>
> >> >
> >> > Also, I am not sure, how to test if target supports conditional
> >> > internal function ?
> >> > I tried to use:
> >> > (for cmp (tcc_comparison)
> >> >      icmp (inverted_tcc_comparison)
> >> >  (simplify
> >> >   (vec_cond (cmp@2 @0 @1) @3 (negate @3))
> >> >    (with { auto op_type = TREE_TYPE (@2); }
> >> >     (if (vectorized_internal_fn_supported_p (IFN_COND_NEG, op_type)
> >> >          && is_truth_type_for (op_type, TREE_TYPE (@0)))
> >> >       (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3)))))
> >> >
> >> > but both the conditions seem to fail.
> >> >
> >> > Thanks,
> >> > Prathamesh
> >> >
> >> >
> >> >>
> >> >> Richard
> >> >>
> >> >>
> >> >> >
> >> >> > Thanks,
> >> >> > Prathamesh
> >> >> >
> >> >> > diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
> >> >> > index 38e90933c3e..5b0dd3c1993 100644
> >> >> > --- a/gcc/gimple-isel.cc
> >> >> > +++ b/gcc/gimple-isel.cc
> >> >> > @@ -39,6 +39,8 @@ along with GCC; see the file COPYING3.  If not see
> >> >> >  #include "optabs.h"
> >> >> >  #include "gimple-fold.h"
> >> >> >  #include "internal-fn.h"
> >> >> > +#include "fold-const.h"
> >> >> > +#include "tree-pretty-print.h"
> >> >> >
> >> >> >  /* Expand all ARRAY_REF(VIEW_CONVERT_EXPR) gimple assignments into calls to
> >> >> >     internal function based on vector type of selected expansion.
> >> >> > @@ -203,6 +205,35 @@ gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
> >> >> >             return new_stmt;
> >> >> >           }
> >> >> >
> >> >> > +       /* Replace:
> >> >> > +          op2 = -op1
> >> >> > +          op0 = A cmp B
> >> >> > +          lhs = op0 ? op1 : op2
> >> >> > +
> >> >> > +          with:
> >> >> > +          op0 = A inverted_cmp B
> >> >> > +          lhs = .COND_NEG (op0, op1, op1).  */
> >> >> > +
> >> >> > +       gassign *op1_def = nullptr;
> >> >> > +       if (TREE_CODE (op1) == SSA_NAME)
> >> >> > +         op1_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op1));
> >> >> > +
> >> >> > +       gassign *op2_def = nullptr;
> >> >> > +       if (TREE_CODE (op2) == SSA_NAME)
> >> >> > +         op2_def = static_cast<gassign *> (SSA_NAME_DEF_STMT (op2));
> >> >> > +
> >> >> > +       if (can_compute_op0 && op1_def && op2_def
> >> >> > +           && gimple_assign_rhs_code (op2_def) == NEGATE_EXPR
> >> >> > +           && operand_equal_p (gimple_assign_rhs1 (op2_def), op1, 0))
> >> >> > +         {
> >> >> > +           auto inverted_code
> >> >> > +             = invert_tree_comparison (gimple_assign_rhs_code (def_stmt), true);
> >> >> > +           gimple_assign_set_rhs_code (def_stmt, inverted_code);
> >> >> > +           auto gsi2 = gsi_for_stmt (op2_def);
> >> >> > +           gsi_remove (&gsi2, true);
> >> >> > +           return gimple_build_call_internal (IFN_COND_NEG, 3, op0, op1, op1);
> >> >> > +         }
> >> >> > +
> >> >> >         if (can_compute_op0
> >> >> >             && used_vec_cond_exprs >= 2
> >> >> >             && (get_vcond_mask_icode (mode, TYPE_MODE (op0_type))
> >> >> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> >> >> > index 78db25bbac4..b57c7a4ed3e 100644
> >> >> > --- a/gcc/internal-fn.c
> >> >> > +++ b/gcc/internal-fn.c
> >> >> > @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
> >> >> >    T (BIT_IOR_EXPR, IFN_COND_IOR) \
> >> >> >    T (BIT_XOR_EXPR, IFN_COND_XOR) \
> >> >> >    T (LSHIFT_EXPR, IFN_COND_SHL) \
> >> >> > -  T (RSHIFT_EXPR, IFN_COND_SHR)
> >> >> > +  T (RSHIFT_EXPR, IFN_COND_SHR) \
> >> >> > +  T (NEGATE_EXPR, IFN_COND_NEG)
> >> >> >
> >> >> >  /* Return a function that only performs CODE when a certain condition is met
> >> >> >     and that uses a given fallback value otherwise.  For example, if CODE is
> >> >> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> >> >> > index 88169ef4656..db40d1bfd18 100644
> >> >> > --- a/gcc/internal-fn.def
> >> >> > +++ b/gcc/internal-fn.def
> >> >> > @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
> >> >> >  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
> >> >> >  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
> >> >> >
> >> >> > +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, cond_unary)
> >> >> > +
> >> >> >  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
> >> >> >
> >> >> >  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
> >> >> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> >> >> > index 201b8aae1c0..fc65a3a7c23 100644
> >> >> > --- a/gcc/optabs.def
> >> >> > +++ b/gcc/optabs.def
> >> >> > @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
> >> >> >  OPTAB_D (cond_fms_optab, "cond_fms$a")
> >> >> >  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
> >> >> >  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
> >> >> > +OPTAB_D (cond_neg_optab, "cond_neg$a")
> >> >> >  OPTAB_D (cmov_optab, "cmov$a6")
> >> >> >  OPTAB_D (cstore_optab, "cstore$a4")
> >> >> >  OPTAB_D (ctrap_optab, "ctrap$a4")
> >> >
> >> > diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
> >> > index 7112c116835..90cf131af48 100644
> >> > --- a/gcc/gimple-match-head.c
> >> > +++ b/gcc/gimple-match-head.c
> >> > @@ -878,6 +878,10 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op,
> >> >        if (!gimple_resimplify3 (seq, &cond_op, valueize))
> >> >       return false;
> >> >        break;
> >> > +    case 1:
> >> > +      if (!gimple_resimplify1 (seq, &cond_op, valueize))
> >> > +     return false;
> >> > +      break;
> >> >      default:
> >> >        gcc_unreachable ();
> >> >      }
> >> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> >> > index 78db25bbac4..b57c7a4ed3e 100644
> >> > --- a/gcc/internal-fn.c
> >> > +++ b/gcc/internal-fn.c
> >> > @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
> >> >    T (BIT_IOR_EXPR, IFN_COND_IOR) \
> >> >    T (BIT_XOR_EXPR, IFN_COND_XOR) \
> >> >    T (LSHIFT_EXPR, IFN_COND_SHL) \
> >> > -  T (RSHIFT_EXPR, IFN_COND_SHR)
> >> > +  T (RSHIFT_EXPR, IFN_COND_SHR) \
> >> > +  T (NEGATE_EXPR, IFN_COND_NEG)
> >> >
> >> >  /* Return a function that only performs CODE when a certain condition is met
> >> >     and that uses a given fallback value otherwise.  For example, if CODE is
> >> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> >> > index 88169ef4656..db40d1bfd18 100644
> >> > --- a/gcc/internal-fn.def
> >> > +++ b/gcc/internal-fn.def
> >> > @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
> >> >  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
> >> >  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
> >> >
> >> > +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST | ECF_NOTHROW, cond_neg, cond_unary)
> >> > +
> >> >  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
> >> >
> >> >  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
> >> > diff --git a/gcc/match.pd b/gcc/match.pd
> >> > index a9791ceb74a..fde6d32b2c4 100644
> >> > --- a/gcc/match.pd
> >> > +++ b/gcc/match.pd
> >> > @@ -7037,6 +7037,24 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >> >                                                 wi::mask (tree_to_uhwi (@1),
> >> >                                                 false, prec)); })
> >> >                    { build_zero_cst (TREE_TYPE (@0)); }))))))))
> >> > +#if GIMPLE
> >> > +
> >> > +/* Simplify:
> >> > +
> >> > +     cond = a cmp b
> >> > +     r = cond ? x : -x
> >> > +
> >> > +    to:
> >> > +
> >> > +     cond = a inverted_cmp b
> >> > +     r = cond ? -x : x.  */
> >> > +
> >> > +(for cmp (tcc_comparison)
> >> > +     icmp (inverted_tcc_comparison)
> >> > + (simplify
> >> > +  (vec_cond (cmp@2 @0 @1) @3 (negate @3))
> >> > +   (with { auto op_type = TREE_TYPE (@2); }
> >> > +    (IFN_COND_NEG (icmp:op_type @0 @1) @3 @3))))
> >> >
> >> >  /* Simplify:
> >> >
> >> > @@ -7056,7 +7074,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >> >     conditional internal functions must support the same comparisons
> >> >     inside and outside a VEC_COND_EXPR.  */
> >> >
> >> > -#if GIMPLE
> >> >  (for uncond_op (UNCOND_BINARY)
> >> >       cond_op (COND_BINARY)
> >> >   (simplify
> >> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> >> > index 201b8aae1c0..fc65a3a7c23 100644
> >> > --- a/gcc/optabs.def
> >> > +++ b/gcc/optabs.def
> >> > @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
> >> >  OPTAB_D (cond_fms_optab, "cond_fms$a")
> >> >  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
> >> >  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
> >> > +OPTAB_D (cond_neg_optab, "cond_neg$a")
> >> >  OPTAB_D (cmov_optab, "cmov$a6")
> >> >  OPTAB_D (cstore_optab, "cstore$a4")
> >> >  OPTAB_D (ctrap_optab, "ctrap$a4")
> >
> > diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
> > index 7112c116835..9d88b2f8551 100644
> > --- a/gcc/gimple-match-head.c
> > +++ b/gcc/gimple-match-head.c
> > @@ -870,6 +870,10 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op,
> >    memcpy (cond_op.ops, res_op->ops + 1, (num_ops - 1) * sizeof *cond_op.ops);
> >    switch (num_ops - 2)
> >      {
> > +    case 1:
> > +      if (!gimple_resimplify1 (seq, &cond_op, valueize))
> > +     return false;
> > +      break;
> >      case 2:
> >        if (!gimple_resimplify2 (seq, &cond_op, valueize))
> >       return false;
> > diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
> > index 78db25bbac4..b57c7a4ed3e 100644
> > --- a/gcc/internal-fn.c
> > +++ b/gcc/internal-fn.c
> > @@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
> >    T (BIT_IOR_EXPR, IFN_COND_IOR) \
> >    T (BIT_XOR_EXPR, IFN_COND_XOR) \
> >    T (LSHIFT_EXPR, IFN_COND_SHL) \
> > -  T (RSHIFT_EXPR, IFN_COND_SHR)
> > +  T (RSHIFT_EXPR, IFN_COND_SHR) \
> > +  T (NEGATE_EXPR, IFN_COND_NEG)
> >
> >  /* Return a function that only performs CODE when a certain condition is met
> >     and that uses a given fallback value otherwise.  For example, if CODE is
> > diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
> > index 88169ef4656..798a50ba332 100644
> > --- a/gcc/internal-fn.def
> > +++ b/gcc/internal-fn.def
> > @@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
> >  DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
> >  DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
> >
> > +DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST, cond_neg, cond_unary)
> > +
> >  DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
> >
> >  DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
> > diff --git a/gcc/match.pd b/gcc/match.pd
> > index a9791ceb74a..1a4c157bd31 100644
> > --- a/gcc/match.pd
> > +++ b/gcc/match.pd
> > @@ -78,6 +78,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (CEIL)
> >  DEFINE_INT_AND_FLOAT_ROUND_FN (ROUND)
> >  DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >
> > +/* Unary operations and their associated IFN_COND_* function.  */
> > +(define_operator_list UNCOND_UNARY
> > +  negate)
> > +(define_operator_list COND_UNARY
> > +  IFN_COND_NEG)
> > +
> >  /* Binary operations and their associated IFN_COND_* function.  */
> >  (define_operator_list UNCOND_BINARY
> >    plus minus
> > @@ -7038,6 +7044,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >                                                 false, prec)); })
> >                    { build_zero_cst (TREE_TYPE (@0)); }))))))))
> >
> > +#if GIMPLE
> > +
> > +/* Simplify:
> > +     a = op a1
> > +     r = cond ? a : b
> > +     --> r = .COND_FN (cond, a, b)
> > +and,
> > +    a = op a1
> > +    r = cond ? b : a
> > +    --> r = .COND_FN (~cond, b, a).  */
> > +
> > +(for uncond_op (UNCOND_UNARY)
> > +     cond_op (COND_UNARY)
> > + (simplify
> > +  (vec_cond @0 (uncond_op@3 @1) @2)
> > +   (with { tree op_type = TREE_TYPE (@3); }
> > +    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > +        && is_truth_type_for (op_type, TREE_TYPE (@0)))
> > +     (cond_op @0 @1 @2))))
> > + (simplify
> > +  (vec_cond @0 @1 (uncond_op@3 @2))
> > +   (with { tree op_type = TREE_TYPE (@3); }
> > +    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
> > +        && is_truth_type_for (op_type, TREE_TYPE (@0)))
> > +     (cond_op (bit_not @0) @2 @1)))))
> > +
> >  /* Simplify:
> >
> >       a = a1 op a2
> > @@ -7056,7 +7088,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
> >     conditional internal functions must support the same comparisons
> >     inside and outside a VEC_COND_EXPR.  */
> >
> > -#if GIMPLE
> >  (for uncond_op (UNCOND_BINARY)
> >       cond_op (COND_BINARY)
> >   (simplify
> > diff --git a/gcc/optabs.def b/gcc/optabs.def
> > index 201b8aae1c0..fc65a3a7c23 100644
> > --- a/gcc/optabs.def
> > +++ b/gcc/optabs.def
> > @@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
> >  OPTAB_D (cond_fms_optab, "cond_fms$a")
> >  OPTAB_D (cond_fnma_optab, "cond_fnma$a")
> >  OPTAB_D (cond_fnms_optab, "cond_fnms$a")
> > +OPTAB_D (cond_neg_optab, "cond_neg$a")
> >  OPTAB_D (cmov_optab, "cmov$a6")
> >  OPTAB_D (cstore_optab, "cstore$a4")
> >  OPTAB_D (ctrap_optab, "ctrap$a4")
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
> > new file mode 100644
> > index 00000000000..2f92224cecb
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
> > @@ -0,0 +1,21 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -mcpu=generic+sve" } */
> > +
> > +typedef unsigned char uint8_t;
> > +
> > +static inline uint8_t
> > +x264_clip_uint8(uint8_t x)
> > +{
> > +  uint8_t t = -x;
> > +  uint8_t t1 = x & ~63;
> > +  return (t1 != 0) ? t : x;
> > +}
> > +
> > +void
> > +mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
> > +{
> > +  for (int x = 0; x < n*16; x++)
> > +    dst[x] = x264_clip_uint8(src[x]);
> > +}
> > +
> > +/* { dg-final { scan-assembler-not {\tsel} } } */

[-- Attachment #2: pr93183-5.txt --]
[-- Type: text/plain, Size: 6425 bytes --]

gcc/ChangeLog:
	PR target/93183
	* gimple-match-head.c (try_conditional_simplification): Add case for single operand.
	* internal-fn.def: Add entry for COND_NEG internal function.
	* internal-fn.c (FOR_EACH_CODE_MAPPING): Add entry for
	NEGATE_EXPR, COND_NEG mapping.
	* optabs.def: Add entry for cond_neg_optab.
	* match.pd (UNCOND_UNARY, COND_UNARY): New operator lists.
	(vec_cond COND (foo A) B) -> (IFN_COND_FOO COND A B): New pattern.
	(vec_cond COND B (foo A)) -> (IFN_COND_FOO ~COND A B): Likewise.

gcc/testsuite/ChangeLog:
	PR target/93183
	* gcc.target/aarch64/sve/cond_unary_4.c: Adjust.
	* gcc.target/aarch64/sve/pr93183.c: New test.

diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index 7112c116835..9d88b2f8551 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -870,6 +870,10 @@ try_conditional_simplification (internal_fn ifn, gimple_match_op *res_op,
   memcpy (cond_op.ops, res_op->ops + 1, (num_ops - 1) * sizeof *cond_op.ops);
   switch (num_ops - 2)
     {
+    case 1:
+      if (!gimple_resimplify1 (seq, &cond_op, valueize))
+	return false;
+      break;
     case 2:
       if (!gimple_resimplify2 (seq, &cond_op, valueize))
 	return false;
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 78db25bbac4..b57c7a4ed3e 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -3877,7 +3877,8 @@ static void (*const internal_fn_expanders[]) (internal_fn, gcall *) = {
   T (BIT_IOR_EXPR, IFN_COND_IOR) \
   T (BIT_XOR_EXPR, IFN_COND_XOR) \
   T (LSHIFT_EXPR, IFN_COND_SHL) \
-  T (RSHIFT_EXPR, IFN_COND_SHR)
+  T (RSHIFT_EXPR, IFN_COND_SHR) \
+  T (NEGATE_EXPR, IFN_COND_NEG)
 
 /* Return a function that only performs CODE when a certain condition is met
    and that uses a given fallback value otherwise.  For example, if CODE is
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 88169ef4656..798a50ba332 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -204,6 +204,8 @@ DEF_INTERNAL_OPTAB_FN (COND_FMS, ECF_CONST, cond_fms, cond_ternary)
 DEF_INTERNAL_OPTAB_FN (COND_FNMA, ECF_CONST, cond_fnma, cond_ternary)
 DEF_INTERNAL_OPTAB_FN (COND_FNMS, ECF_CONST, cond_fnms, cond_ternary)
 
+DEF_INTERNAL_OPTAB_FN (COND_NEG, ECF_CONST, cond_neg, cond_unary)
+
 DEF_INTERNAL_OPTAB_FN (RSQRT, ECF_CONST, rsqrt, unary)
 
 DEF_INTERNAL_OPTAB_FN (REDUC_PLUS, ECF_CONST | ECF_NOTHROW,
diff --git a/gcc/match.pd b/gcc/match.pd
index a9791ceb74a..f5a741705a6 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -78,6 +78,12 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (CEIL)
 DEFINE_INT_AND_FLOAT_ROUND_FN (ROUND)
 DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 
+/* Unary operations and their associated IFN_COND_* function.  */
+(define_operator_list UNCOND_UNARY
+  negate)
+(define_operator_list COND_UNARY
+  IFN_COND_NEG)
+
 /* Binary operations and their associated IFN_COND_* function.  */
 (define_operator_list UNCOND_BINARY
   plus minus
@@ -7038,6 +7044,32 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 						  false, prec)); })
 		     { build_zero_cst (TREE_TYPE (@0)); }))))))))
 
+#if GIMPLE
+
+/* Simplify:
+     a = op a1
+     r = cond ? a : b
+     --> r = .COND_FN (cond, a, b)
+and,
+    a = op a1
+    r = cond ? b : a
+    --> r = .COND_FN (~cond, b, a).  */
+
+(for uncond_op (UNCOND_UNARY)
+     cond_op (COND_UNARY)
+ (simplify
+  (vec_cond @0 (view_convert? (uncond_op@3 @1)) @2)
+   (with { tree op_type = TREE_TYPE (@3); }
+    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+        && is_truth_type_for (op_type, TREE_TYPE (@0)))
+     (cond_op @0 @1 @2))))
+ (simplify
+  (vec_cond @0 @1 (view_convert? (uncond_op@3 @2)))
+   (with { tree op_type = TREE_TYPE (@3); }
+    (if (vectorized_internal_fn_supported_p (as_internal_fn (cond_op), op_type)
+        && is_truth_type_for (op_type, TREE_TYPE (@0)))
+     (cond_op (bit_not @0) @2 @1)))))
+
 /* Simplify:
 
      a = a1 op a2
@@ -7056,7 +7088,6 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
    conditional internal functions must support the same comparisons
    inside and outside a VEC_COND_EXPR.  */
 
-#if GIMPLE
 (for uncond_op (UNCOND_BINARY)
      cond_op (COND_BINARY)
  (simplify
diff --git a/gcc/optabs.def b/gcc/optabs.def
index 201b8aae1c0..fc65a3a7c23 100644
--- a/gcc/optabs.def
+++ b/gcc/optabs.def
@@ -245,6 +245,7 @@ OPTAB_D (cond_fma_optab, "cond_fma$a")
 OPTAB_D (cond_fms_optab, "cond_fms$a")
 OPTAB_D (cond_fnma_optab, "cond_fnma$a")
 OPTAB_D (cond_fnms_optab, "cond_fnms$a")
+OPTAB_D (cond_neg_optab, "cond_neg$a")
 OPTAB_D (cmov_optab, "cmov$a6")
 OPTAB_D (cstore_optab, "cstore$a4")
 OPTAB_D (ctrap_optab, "ctrap$a4")
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c
index 4604365fbef..cedc5b7c549 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c
@@ -56,7 +56,11 @@ TEST_ALL (DEF_LOOP)
    we're relying on combine to merge a SEL and an arithmetic operation,
    and the SEL doesn't allow the "false" value to be zero when the "true"
    value is a register.  */
-/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 14 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 7 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-9]/z, z[0-9]+\.b} 1 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-9]/z, z[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-9]/z, z[0-9]+\.s} 2 } } */
+/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-9]/z, z[0-9]+\.d} 2 } } */
 
 /* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */
 /* { dg-final { scan-assembler-not {\tsel\t} } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
new file mode 100644
index 00000000000..2f92224cecb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O3 -mcpu=generic+sve" } */
+
+typedef unsigned char uint8_t;
+
+static inline uint8_t
+x264_clip_uint8(uint8_t x)
+{
+  uint8_t t = -x;
+  uint8_t t1 = x & ~63; 
+  return (t1 != 0) ? t : x; 
+}
+
+void
+mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
+{
+  for (int x = 0; x < n*16; x++)
+    dst[x] = x264_clip_uint8(src[x]);
+}
+
+/* { dg-final { scan-assembler-not {\tsel} } } */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [SVE] [gimple-isel] PR93183 - SVE does not use neg as conditional
  2021-10-18  6:01           ` Prathamesh Kulkarni
@ 2021-10-18  9:04             ` Richard Sandiford
  2021-10-18 10:16               ` Prathamesh Kulkarni
  0 siblings, 1 reply; 9+ messages in thread
From: Richard Sandiford @ 2021-10-18  9:04 UTC (permalink / raw)
  To: Prathamesh Kulkarni; +Cc: gcc Patches

Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c
> index 4604365fbef..cedc5b7c549 100644
> --- a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c
> @@ -56,7 +56,11 @@ TEST_ALL (DEF_LOOP)
>     we're relying on combine to merge a SEL and an arithmetic operation,
>     and the SEL doesn't allow the "false" value to be zero when the "true"
>     value is a register.  */
> -/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 14 } } */
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 7 } } */
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-9]/z, z[0-9]+\.b} 1 } } */
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-9]/z, z[0-9]+\.h} 2 } } */
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-9]/z, z[0-9]+\.s} 2 } } */
> +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-9]/z, z[0-9]+\.d} 2 } } */

Very minor, but: p[0-7] is more accurate than p[0-9].

OK with that change, thanks.

Richard

>  
>  /* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */
>  /* { dg-final { scan-assembler-not {\tsel\t} } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
> new file mode 100644
> index 00000000000..2f92224cecb
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
> @@ -0,0 +1,21 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O3 -mcpu=generic+sve" } */
> +
> +typedef unsigned char uint8_t;
> +
> +static inline uint8_t
> +x264_clip_uint8(uint8_t x)
> +{
> +  uint8_t t = -x;
> +  uint8_t t1 = x & ~63; 
> +  return (t1 != 0) ? t : x; 
> +}
> +
> +void
> +mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
> +{
> +  for (int x = 0; x < n*16; x++)
> +    dst[x] = x264_clip_uint8(src[x]);
> +}
> +
> +/* { dg-final { scan-assembler-not {\tsel} } } */

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [SVE] [gimple-isel] PR93183 - SVE does not use neg as conditional
  2021-10-18  9:04             ` Richard Sandiford
@ 2021-10-18 10:16               ` Prathamesh Kulkarni
  0 siblings, 0 replies; 9+ messages in thread
From: Prathamesh Kulkarni @ 2021-10-18 10:16 UTC (permalink / raw)
  To: Prathamesh Kulkarni, gcc Patches, Richard Sandiford

On Mon, 18 Oct 2021 at 14:34, Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> writes:
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c
> > index 4604365fbef..cedc5b7c549 100644
> > --- a/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/cond_unary_4.c
> > @@ -56,7 +56,11 @@ TEST_ALL (DEF_LOOP)
> >     we're relying on combine to merge a SEL and an arithmetic operation,
> >     and the SEL doesn't allow the "false" value to be zero when the "true"
> >     value is a register.  */
> > -/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 14 } } */
> > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+, z[0-9]+\n} 7 } } */
> > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.b, p[0-9]/z, z[0-9]+\.b} 1 } } */
> > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.h, p[0-9]/z, z[0-9]+\.h} 2 } } */
> > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.s, p[0-9]/z, z[0-9]+\.s} 2 } } */
> > +/* { dg-final { scan-assembler-times {\tmovprfx\tz[0-9]+\.d, p[0-9]/z, z[0-9]+\.d} 2 } } */
>
> Very minor, but: p[0-7] is more accurate than p[0-9].
Oops sorry, typo.
>
> OK with that change, thanks.
Thanks, committed as 20dcda98ed376cb61c74b2c71656f99c671ec9ce.

Thanks,
Prathamesh
>
> Richard
>
> >
> >  /* { dg-final { scan-assembler-not {\tmov\tz[^\n]*z} } } */
> >  /* { dg-final { scan-assembler-not {\tsel\t} } } */
> > diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
> > new file mode 100644
> > index 00000000000..2f92224cecb
> > --- /dev/null
> > +++ b/gcc/testsuite/gcc.target/aarch64/sve/pr93183.c
> > @@ -0,0 +1,21 @@
> > +/* { dg-do compile } */
> > +/* { dg-options "-O3 -mcpu=generic+sve" } */
> > +
> > +typedef unsigned char uint8_t;
> > +
> > +static inline uint8_t
> > +x264_clip_uint8(uint8_t x)
> > +{
> > +  uint8_t t = -x;
> > +  uint8_t t1 = x & ~63;
> > +  return (t1 != 0) ? t : x;
> > +}
> > +
> > +void
> > +mc_weight(uint8_t *restrict dst, uint8_t *restrict src, int n)
> > +{
> > +  for (int x = 0; x < n*16; x++)
> > +    dst[x] = x264_clip_uint8(src[x]);
> > +}
> > +
> > +/* { dg-final { scan-assembler-not {\tsel} } } */

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2021-10-18 10:17 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-10-06 10:57 [SVE] [gimple-isel] PR93183 - SVE does not use neg as conditional Prathamesh Kulkarni
2021-10-08 15:49 ` Richard Sandiford
2021-10-11 10:59   ` Prathamesh Kulkarni
2021-10-11 15:12     ` Richard Sandiford
2021-10-12 12:05       ` Prathamesh Kulkarni
2021-10-13  7:56         ` Richard Sandiford
2021-10-18  6:01           ` Prathamesh Kulkarni
2021-10-18  9:04             ` Richard Sandiford
2021-10-18 10:16               ` Prathamesh Kulkarni

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).