public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH, rs6000] gimple folding of vec_msum()
@ 2017-12-01 17:22 Will Schmidt
  2017-12-01 17:46 ` Richard Biener
  0 siblings, 1 reply; 5+ messages in thread
From: Will Schmidt @ 2017-12-01 17:22 UTC (permalink / raw)
  To: GCC Patches
  Cc: Segher Boessenkool, Richard Biener, Bill Schmidt, David Edelsohn

Hi,
Add support for folding of vec_msum in GIMPLE.
    
This uses the DOT_PROD_EXPR gimple op, which is sensitive to type mismatches:
	error: type mismatch in dot product reduction
	__vector signed int
	__vector signed char
	__vector unsigned char
	D.2798 = DOT_PROD_EXPR <vsc2, vuc3, vsi2>;
So for those cases with a signed/unsigned mismatch in the arguments, this
converts those arguments to their signed type.
    
This also adds a define_expand for sdot_prodv16qi. This is based on a similar
existing entry.
    
Testing coverage is handled by the existing gcc.target/powerpc/fold-vec-msum*.c tests.
    
Sniff-tests have passed on P8.  full regtests currently running on other assorted
power systems.
OK for trunk with successful results?
    
Thanks
-Will
    
[gcc]

2017-12-01  Will Schmidt  <will_schmidt@vnet.ibm.com>

	* config/rs6000/altivec.md (sdot_prodv16qi): New.
	* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
	gimple-folding of vec_msum.
	(builtin_function_type): Add entries for VMSUMU[BH]M and VMSUMMBM.

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 7122f99..fa9e121 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -3349,11 +3349,26 @@
                                  (match_operand:V8HI 2 "register_operand" "v")]
                                 UNSPEC_VMSUMSHM)))]
   "TARGET_ALTIVEC"
   "
 {
-  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1], operands[2], operands[3]));
+  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
+				   operands[2], operands[3]));
+  DONE;
+}")
+
+(define_expand "sdot_prodv16qi"
+  [(set (match_operand:V4SI 0 "register_operand" "=v")
+        (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
+                   (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v")
+                                 (match_operand:V16QI 2 "register_operand" "v")]
+                                UNSPEC_VMSUMM)))]
+  "TARGET_ALTIVEC"
+  "
+{
+  emit_insn (gen_altivec_vmsummbm (operands[0], operands[1],
+				   operands[2], operands[3]));
   DONE;
 }")
 
 (define_expand "widen_usum<mode>3"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 551d9c4..552fcdd 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -16614,10 +16614,40 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
     case VSX_BUILTIN_CMPLE_2DI:
     case VSX_BUILTIN_CMPLE_U2DI:
       fold_compare_helper (gsi, LE_EXPR, stmt);
       return true;
 
+    /* vec_msum.  */
+    case ALTIVEC_BUILTIN_VMSUMUHM:
+    case ALTIVEC_BUILTIN_VMSUMSHM:
+    case ALTIVEC_BUILTIN_VMSUMUBM:
+    case ALTIVEC_BUILTIN_VMSUMMBM:
+      {
+	arg0 = gimple_call_arg (stmt, 0);
+	arg1 = gimple_call_arg (stmt, 1);
+	tree arg2 = gimple_call_arg (stmt, 2);
+	lhs = gimple_call_lhs (stmt);
+	if ( TREE_TYPE (arg0) == TREE_TYPE (arg1))
+	  g = gimple_build_assign (lhs, DOT_PROD_EXPR, arg0, arg1, arg2);
+	else
+	  {
+	    // For the case where we have a mix of signed/unsigned
+	    // arguments, convert both multiply args to their signed type.
+	    gimple_seq stmts = NULL;
+	    location_t loc = gimple_location (stmt);
+	    tree new_arg_type = signed_type_for (TREE_TYPE (arg0));
+	    tree signed_arg0 = gimple_convert (&stmts, loc, new_arg_type, arg0);
+	    tree signed_arg1 = gimple_convert (&stmts, loc, new_arg_type, arg1);
+	    gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+	    g = gimple_build_assign (lhs, DOT_PROD_EXPR,
+				     signed_arg0, signed_arg1, arg2);
+	  }
+	gimple_set_location (g, gimple_location (stmt));
+	gsi_replace (gsi, g, true);
+	return true;
+      }
+
     default:
       if (TARGET_DEBUG_BUILTIN)
 	fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
 		 fn_code, fn_name1, fn_name2);
       break;
@@ -18080,16 +18110,23 @@ builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
     case CRYPTO_BUILTIN_VPERMXOR_V8HI:
     case CRYPTO_BUILTIN_VPERMXOR_V16QI:
     case CRYPTO_BUILTIN_VSHASIGMAW:
     case CRYPTO_BUILTIN_VSHASIGMAD:
     case CRYPTO_BUILTIN_VSHASIGMA:
+    case ALTIVEC_BUILTIN_VMSUMUHM:
+    case ALTIVEC_BUILTIN_VMSUMUBM:
       h.uns_p[0] = 1;
       h.uns_p[1] = 1;
       h.uns_p[2] = 1;
       h.uns_p[3] = 1;
       break;
 
+    /* The second parm to this vec_msum variant is unsigned.  */
+    case ALTIVEC_BUILTIN_VMSUMMBM:
+      h.uns_p[2] = 1;
+      break;
+
     /* signed permute functions with unsigned char mask.  */
     case ALTIVEC_BUILTIN_VPERM_16QI:
     case ALTIVEC_BUILTIN_VPERM_8HI:
     case ALTIVEC_BUILTIN_VPERM_4SI:
     case ALTIVEC_BUILTIN_VPERM_4SF:


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH, rs6000] gimple folding of vec_msum()
  2017-12-01 17:22 [PATCH, rs6000] gimple folding of vec_msum() Will Schmidt
@ 2017-12-01 17:46 ` Richard Biener
  2017-12-01 21:43   ` Will Schmidt
  0 siblings, 1 reply; 5+ messages in thread
From: Richard Biener @ 2017-12-01 17:46 UTC (permalink / raw)
  To: will_schmidt, Will Schmidt, GCC Patches
  Cc: Segher Boessenkool, Bill Schmidt, David Edelsohn

On December 1, 2017 6:22:21 PM GMT+01:00, Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
>Hi,
>Add support for folding of vec_msum in GIMPLE.
>    
>This uses the DOT_PROD_EXPR gimple op, which is sensitive to type
>mismatches:
>	error: type mismatch in dot product reduction
>	__vector signed int
>	__vector signed char
>	__vector unsigned char
>	D.2798 = DOT_PROD_EXPR <vsc2, vuc3, vsi2>;
>So for those cases with a signed/unsigned mismatch in the arguments,
>this
>converts those arguments to their signed type.
>    
>This also adds a define_expand for sdot_prodv16qi. This is based on a
>similar
>existing entry.
>    
>Testing coverage is handled by the existing
>gcc.target/powerpc/fold-vec-msum*.c tests.
>    
>Sniff-tests have passed on P8.  full regtests currently running on
>other assorted
>power systems.
>OK for trunk with successful results?

Note DOT_PROD_EXPR is only useful when the result is reduced to a scalar later and the reduction order is irrelevant. 

This is because GIMPLE doesn't specify whether the reduction reduces odd/even or high/low lanes of the argument vectors.  Does vec_msum specify that? 

That said, it exists as a 'hack' for the vectorizer and isn't otherwise useful for GIMPLE. 

Richard. 

>Thanks
>-Will
>    
>[gcc]
>
>2017-12-01  Will Schmidt  <will_schmidt@vnet.ibm.com>
>
>	* config/rs6000/altivec.md (sdot_prodv16qi): New.
>	* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
>	gimple-folding of vec_msum.
>	(builtin_function_type): Add entries for VMSUMU[BH]M and VMSUMMBM.
>
>diff --git a/gcc/config/rs6000/altivec.md
>b/gcc/config/rs6000/altivec.md
>index 7122f99..fa9e121 100644
>--- a/gcc/config/rs6000/altivec.md
>+++ b/gcc/config/rs6000/altivec.md
>@@ -3349,11 +3349,26 @@
>                         (match_operand:V8HI 2 "register_operand" "v")]
>                                 UNSPEC_VMSUMSHM)))]
>   "TARGET_ALTIVEC"
>   "
> {
>-  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
>operands[2], operands[3]));
>+  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
>+				   operands[2], operands[3]));
>+  DONE;
>+}")
>+
>+(define_expand "sdot_prodv16qi"
>+  [(set (match_operand:V4SI 0 "register_operand" "=v")
>+        (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
>+                   (unspec:V4SI [(match_operand:V16QI 1
>"register_operand" "v")
>+                                 (match_operand:V16QI 2
>"register_operand" "v")]
>+                                UNSPEC_VMSUMM)))]
>+  "TARGET_ALTIVEC"
>+  "
>+{
>+  emit_insn (gen_altivec_vmsummbm (operands[0], operands[1],
>+				   operands[2], operands[3]));
>   DONE;
> }")
> 
> (define_expand "widen_usum<mode>3"
>   [(set (match_operand:V4SI 0 "register_operand" "=v")
>diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>index 551d9c4..552fcdd 100644
>--- a/gcc/config/rs6000/rs6000.c
>+++ b/gcc/config/rs6000/rs6000.c
>@@ -16614,10 +16614,40 @@ rs6000_gimple_fold_builtin
>(gimple_stmt_iterator *gsi)
>     case VSX_BUILTIN_CMPLE_2DI:
>     case VSX_BUILTIN_CMPLE_U2DI:
>       fold_compare_helper (gsi, LE_EXPR, stmt);
>       return true;
> 
>+    /* vec_msum.  */
>+    case ALTIVEC_BUILTIN_VMSUMUHM:
>+    case ALTIVEC_BUILTIN_VMSUMSHM:
>+    case ALTIVEC_BUILTIN_VMSUMUBM:
>+    case ALTIVEC_BUILTIN_VMSUMMBM:
>+      {
>+	arg0 = gimple_call_arg (stmt, 0);
>+	arg1 = gimple_call_arg (stmt, 1);
>+	tree arg2 = gimple_call_arg (stmt, 2);
>+	lhs = gimple_call_lhs (stmt);
>+	if ( TREE_TYPE (arg0) == TREE_TYPE (arg1))
>+	  g = gimple_build_assign (lhs, DOT_PROD_EXPR, arg0, arg1, arg2);
>+	else
>+	  {
>+	    // For the case where we have a mix of signed/unsigned
>+	    // arguments, convert both multiply args to their signed type.
>+	    gimple_seq stmts = NULL;
>+	    location_t loc = gimple_location (stmt);
>+	    tree new_arg_type = signed_type_for (TREE_TYPE (arg0));
>+	    tree signed_arg0 = gimple_convert (&stmts, loc, new_arg_type,
>arg0);
>+	    tree signed_arg1 = gimple_convert (&stmts, loc, new_arg_type,
>arg1);
>+	    gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>+	    g = gimple_build_assign (lhs, DOT_PROD_EXPR,
>+				     signed_arg0, signed_arg1, arg2);
>+	  }
>+	gimple_set_location (g, gimple_location (stmt));
>+	gsi_replace (gsi, g, true);
>+	return true;
>+      }
>+
>     default:
>       if (TARGET_DEBUG_BUILTIN)
> 	fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
> 		 fn_code, fn_name1, fn_name2);
>       break;
>@@ -18080,16 +18110,23 @@ builtin_function_type (machine_mode mode_ret,
>machine_mode mode_arg0,
>     case CRYPTO_BUILTIN_VPERMXOR_V8HI:
>     case CRYPTO_BUILTIN_VPERMXOR_V16QI:
>     case CRYPTO_BUILTIN_VSHASIGMAW:
>     case CRYPTO_BUILTIN_VSHASIGMAD:
>     case CRYPTO_BUILTIN_VSHASIGMA:
>+    case ALTIVEC_BUILTIN_VMSUMUHM:
>+    case ALTIVEC_BUILTIN_VMSUMUBM:
>       h.uns_p[0] = 1;
>       h.uns_p[1] = 1;
>       h.uns_p[2] = 1;
>       h.uns_p[3] = 1;
>       break;
> 
>+    /* The second parm to this vec_msum variant is unsigned.  */
>+    case ALTIVEC_BUILTIN_VMSUMMBM:
>+      h.uns_p[2] = 1;
>+      break;
>+
>     /* signed permute functions with unsigned char mask.  */
>     case ALTIVEC_BUILTIN_VPERM_16QI:
>     case ALTIVEC_BUILTIN_VPERM_8HI:
>     case ALTIVEC_BUILTIN_VPERM_4SI:
>     case ALTIVEC_BUILTIN_VPERM_4SF:

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH, rs6000] gimple folding of vec_msum()
  2017-12-01 17:46 ` Richard Biener
@ 2017-12-01 21:43   ` Will Schmidt
  2017-12-01 23:08     ` Bill Schmidt
  0 siblings, 1 reply; 5+ messages in thread
From: Will Schmidt @ 2017-12-01 21:43 UTC (permalink / raw)
  To: Richard Biener
  Cc: GCC Patches, Segher Boessenkool, Bill Schmidt, David Edelsohn

On Fri, 2017-12-01 at 18:46 +0100, Richard Biener wrote:
> On December 1, 2017 6:22:21 PM GMT+01:00, Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
> >Hi,
> >Add support for folding of vec_msum in GIMPLE.
> >    
> >This uses the DOT_PROD_EXPR gimple op, which is sensitive to type
> >mismatches:
> >	error: type mismatch in dot product reduction
> >	__vector signed int
> >	__vector signed char
> >	__vector unsigned char
> >	D.2798 = DOT_PROD_EXPR <vsc2, vuc3, vsi2>;
> >So for those cases with a signed/unsigned mismatch in the arguments,
> >this
> >converts those arguments to their signed type.
> >    
> >This also adds a define_expand for sdot_prodv16qi. This is based on a
> >similar
> >existing entry.
> >    
> >Testing coverage is handled by the existing
> >gcc.target/powerpc/fold-vec-msum*.c tests.
> >    
> >Sniff-tests have passed on P8.  full regtests currently running on
> >other assorted
> >power systems.
> >OK for trunk with successful results?
> 
> Note DOT_PROD_EXPR is only useful when the result is reduced to a scalar later and the reduction order is irrelevant. 
> 
> This is because GIMPLE doesn't specify whether the reduction reduces odd/even or high/low lanes of the argument vectors.  Does vec_msum specify that? 

Not that I see, but there may be an implied intent here that just isn't
obvious to me.   I'll defer to ... someone. :-)

> That said, it exists as a 'hack' for the vectorizer and isn't otherwise useful for GIMPLE.

OK.  With that in mind, should I just try to split this out into
separate multiply and add steps?

Thanks,
-Will




>  
> 
> Richard. 
> 
> >Thanks
> >-Will
> >    
> >[gcc]
> >
> >2017-12-01  Will Schmidt  <will_schmidt@vnet.ibm.com>
> >
> >	* config/rs6000/altivec.md (sdot_prodv16qi): New.
> >	* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
> >	gimple-folding of vec_msum.
> >	(builtin_function_type): Add entries for VMSUMU[BH]M and VMSUMMBM.
> >
> >diff --git a/gcc/config/rs6000/altivec.md
> >b/gcc/config/rs6000/altivec.md
> >index 7122f99..fa9e121 100644
> >--- a/gcc/config/rs6000/altivec.md
> >+++ b/gcc/config/rs6000/altivec.md
> >@@ -3349,11 +3349,26 @@
> >                         (match_operand:V8HI 2 "register_operand" "v")]
> >                                 UNSPEC_VMSUMSHM)))]
> >   "TARGET_ALTIVEC"
> >   "
> > {
> >-  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
> >operands[2], operands[3]));
> >+  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
> >+				   operands[2], operands[3]));
> >+  DONE;
> >+}")
> >+
> >+(define_expand "sdot_prodv16qi"
> >+  [(set (match_operand:V4SI 0 "register_operand" "=v")
> >+        (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
> >+                   (unspec:V4SI [(match_operand:V16QI 1
> >"register_operand" "v")
> >+                                 (match_operand:V16QI 2
> >"register_operand" "v")]
> >+                                UNSPEC_VMSUMM)))]
> >+  "TARGET_ALTIVEC"
> >+  "
> >+{
> >+  emit_insn (gen_altivec_vmsummbm (operands[0], operands[1],
> >+				   operands[2], operands[3]));
> >   DONE;
> > }")
> > 
> > (define_expand "widen_usum<mode>3"
> >   [(set (match_operand:V4SI 0 "register_operand" "=v")
> >diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> >index 551d9c4..552fcdd 100644
> >--- a/gcc/config/rs6000/rs6000.c
> >+++ b/gcc/config/rs6000/rs6000.c
> >@@ -16614,10 +16614,40 @@ rs6000_gimple_fold_builtin
> >(gimple_stmt_iterator *gsi)
> >     case VSX_BUILTIN_CMPLE_2DI:
> >     case VSX_BUILTIN_CMPLE_U2DI:
> >       fold_compare_helper (gsi, LE_EXPR, stmt);
> >       return true;
> > 
> >+    /* vec_msum.  */
> >+    case ALTIVEC_BUILTIN_VMSUMUHM:
> >+    case ALTIVEC_BUILTIN_VMSUMSHM:
> >+    case ALTIVEC_BUILTIN_VMSUMUBM:
> >+    case ALTIVEC_BUILTIN_VMSUMMBM:
> >+      {
> >+	arg0 = gimple_call_arg (stmt, 0);
> >+	arg1 = gimple_call_arg (stmt, 1);
> >+	tree arg2 = gimple_call_arg (stmt, 2);
> >+	lhs = gimple_call_lhs (stmt);
> >+	if ( TREE_TYPE (arg0) == TREE_TYPE (arg1))
> >+	  g = gimple_build_assign (lhs, DOT_PROD_EXPR, arg0, arg1, arg2);
> >+	else
> >+	  {
> >+	    // For the case where we have a mix of signed/unsigned
> >+	    // arguments, convert both multiply args to their signed type.
> >+	    gimple_seq stmts = NULL;
> >+	    location_t loc = gimple_location (stmt);
> >+	    tree new_arg_type = signed_type_for (TREE_TYPE (arg0));
> >+	    tree signed_arg0 = gimple_convert (&stmts, loc, new_arg_type,
> >arg0);
> >+	    tree signed_arg1 = gimple_convert (&stmts, loc, new_arg_type,
> >arg1);
> >+	    gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
> >+	    g = gimple_build_assign (lhs, DOT_PROD_EXPR,
> >+				     signed_arg0, signed_arg1, arg2);
> >+	  }
> >+	gimple_set_location (g, gimple_location (stmt));
> >+	gsi_replace (gsi, g, true);
> >+	return true;
> >+      }
> >+
> >     default:
> >       if (TARGET_DEBUG_BUILTIN)
> > 	fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
> > 		 fn_code, fn_name1, fn_name2);
> >       break;
> >@@ -18080,16 +18110,23 @@ builtin_function_type (machine_mode mode_ret,
> >machine_mode mode_arg0,
> >     case CRYPTO_BUILTIN_VPERMXOR_V8HI:
> >     case CRYPTO_BUILTIN_VPERMXOR_V16QI:
> >     case CRYPTO_BUILTIN_VSHASIGMAW:
> >     case CRYPTO_BUILTIN_VSHASIGMAD:
> >     case CRYPTO_BUILTIN_VSHASIGMA:
> >+    case ALTIVEC_BUILTIN_VMSUMUHM:
> >+    case ALTIVEC_BUILTIN_VMSUMUBM:
> >       h.uns_p[0] = 1;
> >       h.uns_p[1] = 1;
> >       h.uns_p[2] = 1;
> >       h.uns_p[3] = 1;
> >       break;
> > 
> >+    /* The second parm to this vec_msum variant is unsigned.  */
> >+    case ALTIVEC_BUILTIN_VMSUMMBM:
> >+      h.uns_p[2] = 1;
> >+      break;
> >+
> >     /* signed permute functions with unsigned char mask.  */
> >     case ALTIVEC_BUILTIN_VPERM_16QI:
> >     case ALTIVEC_BUILTIN_VPERM_8HI:
> >     case ALTIVEC_BUILTIN_VPERM_4SI:
> >     case ALTIVEC_BUILTIN_VPERM_4SF:
> 


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH, rs6000] gimple folding of vec_msum()
  2017-12-01 21:43   ` Will Schmidt
@ 2017-12-01 23:08     ` Bill Schmidt
  2017-12-04 10:22       ` Richard Biener
  0 siblings, 1 reply; 5+ messages in thread
From: Bill Schmidt @ 2017-12-01 23:08 UTC (permalink / raw)
  To: Will Schmidt
  Cc: Richard Biener, GCC Patches, Segher Boessenkool, David Edelsohn

Hi Will,

> On Dec 1, 2017, at 3:43 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
> 
> On Fri, 2017-12-01 at 18:46 +0100, Richard Biener wrote:
>> On December 1, 2017 6:22:21 PM GMT+01:00, Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
>>> Hi,
>>> Add support for folding of vec_msum in GIMPLE.
>>> 
>>> This uses the DOT_PROD_EXPR gimple op, which is sensitive to type
>>> mismatches:
>>> 	error: type mismatch in dot product reduction
>>> 	__vector signed int
>>> 	__vector signed char
>>> 	__vector unsigned char
>>> 	D.2798 = DOT_PROD_EXPR <vsc2, vuc3, vsi2>;
>>> So for those cases with a signed/unsigned mismatch in the arguments,
>>> this
>>> converts those arguments to their signed type.
>>> 
>>> This also adds a define_expand for sdot_prodv16qi. This is based on a
>>> similar
>>> existing entry.
>>> 
>>> Testing coverage is handled by the existing
>>> gcc.target/powerpc/fold-vec-msum*.c tests.
>>> 
>>> Sniff-tests have passed on P8.  full regtests currently running on
>>> other assorted
>>> power systems.
>>> OK for trunk with successful results?
>> 
>> Note DOT_PROD_EXPR is only useful when the result is reduced to a scalar later and the reduction order is irrelevant. 
>> 
>> This is because GIMPLE doesn't specify whether the reduction reduces odd/even or high/low lanes of the argument vectors.  Does vec_msum specify that? 
> 
> Not that I see, but there may be an implied intent here that just isn't
> obvious to me.   I'll defer to ... someone. :-)
> 
>> That said, it exists as a 'hack' for the vectorizer and isn't otherwise useful for GIMPLE.
> 
> OK.  With that in mind, should I just try to split this out into
> separate multiply and add steps?

No.  The semantics of vec_msum are very specific and can't be accurately represented in GIMPLE.  This one should be left as a call until expand.

Thanks!
Bill

> 
> Thanks,
> -Will
> 
> 
> 
> 
>> 
>> 
>> Richard. 
>> 
>>> Thanks
>>> -Will
>>> 
>>> [gcc]
>>> 
>>> 2017-12-01  Will Schmidt  <will_schmidt@vnet.ibm.com>
>>> 
>>> 	* config/rs6000/altivec.md (sdot_prodv16qi): New.
>>> 	* config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
>>> 	gimple-folding of vec_msum.
>>> 	(builtin_function_type): Add entries for VMSUMU[BH]M and VMSUMMBM.
>>> 
>>> diff --git a/gcc/config/rs6000/altivec.md
>>> b/gcc/config/rs6000/altivec.md
>>> index 7122f99..fa9e121 100644
>>> --- a/gcc/config/rs6000/altivec.md
>>> +++ b/gcc/config/rs6000/altivec.md
>>> @@ -3349,11 +3349,26 @@
>>>                        (match_operand:V8HI 2 "register_operand" "v")]
>>>                                UNSPEC_VMSUMSHM)))]
>>>  "TARGET_ALTIVEC"
>>>  "
>>> {
>>> -  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
>>> operands[2], operands[3]));
>>> +  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
>>> +				   operands[2], operands[3]));
>>> +  DONE;
>>> +}")
>>> +
>>> +(define_expand "sdot_prodv16qi"
>>> +  [(set (match_operand:V4SI 0 "register_operand" "=v")
>>> +        (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
>>> +                   (unspec:V4SI [(match_operand:V16QI 1
>>> "register_operand" "v")
>>> +                                 (match_operand:V16QI 2
>>> "register_operand" "v")]
>>> +                                UNSPEC_VMSUMM)))]
>>> +  "TARGET_ALTIVEC"
>>> +  "
>>> +{
>>> +  emit_insn (gen_altivec_vmsummbm (operands[0], operands[1],
>>> +				   operands[2], operands[3]));
>>>  DONE;
>>> }")
>>> 
>>> (define_expand "widen_usum<mode>3"
>>>  [(set (match_operand:V4SI 0 "register_operand" "=v")
>>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>>> index 551d9c4..552fcdd 100644
>>> --- a/gcc/config/rs6000/rs6000.c
>>> +++ b/gcc/config/rs6000/rs6000.c
>>> @@ -16614,10 +16614,40 @@ rs6000_gimple_fold_builtin
>>> (gimple_stmt_iterator *gsi)
>>>    case VSX_BUILTIN_CMPLE_2DI:
>>>    case VSX_BUILTIN_CMPLE_U2DI:
>>>      fold_compare_helper (gsi, LE_EXPR, stmt);
>>>      return true;
>>> 
>>> +    /* vec_msum.  */
>>> +    case ALTIVEC_BUILTIN_VMSUMUHM:
>>> +    case ALTIVEC_BUILTIN_VMSUMSHM:
>>> +    case ALTIVEC_BUILTIN_VMSUMUBM:
>>> +    case ALTIVEC_BUILTIN_VMSUMMBM:
>>> +      {
>>> +	arg0 = gimple_call_arg (stmt, 0);
>>> +	arg1 = gimple_call_arg (stmt, 1);
>>> +	tree arg2 = gimple_call_arg (stmt, 2);
>>> +	lhs = gimple_call_lhs (stmt);
>>> +	if ( TREE_TYPE (arg0) == TREE_TYPE (arg1))
>>> +	  g = gimple_build_assign (lhs, DOT_PROD_EXPR, arg0, arg1, arg2);
>>> +	else
>>> +	  {
>>> +	    // For the case where we have a mix of signed/unsigned
>>> +	    // arguments, convert both multiply args to their signed type.
>>> +	    gimple_seq stmts = NULL;
>>> +	    location_t loc = gimple_location (stmt);
>>> +	    tree new_arg_type = signed_type_for (TREE_TYPE (arg0));
>>> +	    tree signed_arg0 = gimple_convert (&stmts, loc, new_arg_type,
>>> arg0);
>>> +	    tree signed_arg1 = gimple_convert (&stmts, loc, new_arg_type,
>>> arg1);
>>> +	    gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>>> +	    g = gimple_build_assign (lhs, DOT_PROD_EXPR,
>>> +				     signed_arg0, signed_arg1, arg2);
>>> +	  }
>>> +	gimple_set_location (g, gimple_location (stmt));
>>> +	gsi_replace (gsi, g, true);
>>> +	return true;
>>> +      }
>>> +
>>>    default:
>>>      if (TARGET_DEBUG_BUILTIN)
>>> 	fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
>>> 		 fn_code, fn_name1, fn_name2);
>>>      break;
>>> @@ -18080,16 +18110,23 @@ builtin_function_type (machine_mode mode_ret,
>>> machine_mode mode_arg0,
>>>    case CRYPTO_BUILTIN_VPERMXOR_V8HI:
>>>    case CRYPTO_BUILTIN_VPERMXOR_V16QI:
>>>    case CRYPTO_BUILTIN_VSHASIGMAW:
>>>    case CRYPTO_BUILTIN_VSHASIGMAD:
>>>    case CRYPTO_BUILTIN_VSHASIGMA:
>>> +    case ALTIVEC_BUILTIN_VMSUMUHM:
>>> +    case ALTIVEC_BUILTIN_VMSUMUBM:
>>>      h.uns_p[0] = 1;
>>>      h.uns_p[1] = 1;
>>>      h.uns_p[2] = 1;
>>>      h.uns_p[3] = 1;
>>>      break;
>>> 
>>> +    /* The second parm to this vec_msum variant is unsigned.  */
>>> +    case ALTIVEC_BUILTIN_VMSUMMBM:
>>> +      h.uns_p[2] = 1;
>>> +      break;
>>> +
>>>    /* signed permute functions with unsigned char mask.  */
>>>    case ALTIVEC_BUILTIN_VPERM_16QI:
>>>    case ALTIVEC_BUILTIN_VPERM_8HI:
>>>    case ALTIVEC_BUILTIN_VPERM_4SI:
>>>    case ALTIVEC_BUILTIN_VPERM_4SF:
>> 
> 
> 

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH, rs6000] gimple folding of vec_msum()
  2017-12-01 23:08     ` Bill Schmidt
@ 2017-12-04 10:22       ` Richard Biener
  0 siblings, 0 replies; 5+ messages in thread
From: Richard Biener @ 2017-12-04 10:22 UTC (permalink / raw)
  To: Bill Schmidt
  Cc: Will Schmidt, GCC Patches, Segher Boessenkool, David Edelsohn

On Sat, Dec 2, 2017 at 12:08 AM, Bill Schmidt
<wschmidt@linux.vnet.ibm.com> wrote:
> Hi Will,
>
>> On Dec 1, 2017, at 3:43 PM, Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
>>
>> On Fri, 2017-12-01 at 18:46 +0100, Richard Biener wrote:
>>> On December 1, 2017 6:22:21 PM GMT+01:00, Will Schmidt <will_schmidt@vnet.ibm.com> wrote:
>>>> Hi,
>>>> Add support for folding of vec_msum in GIMPLE.
>>>>
>>>> This uses the DOT_PROD_EXPR gimple op, which is sensitive to type
>>>> mismatches:
>>>>     error: type mismatch in dot product reduction
>>>>     __vector signed int
>>>>     __vector signed char
>>>>     __vector unsigned char
>>>>     D.2798 = DOT_PROD_EXPR <vsc2, vuc3, vsi2>;
>>>> So for those cases with a signed/unsigned mismatch in the arguments,
>>>> this
>>>> converts those arguments to their signed type.
>>>>
>>>> This also adds a define_expand for sdot_prodv16qi. This is based on a
>>>> similar
>>>> existing entry.
>>>>
>>>> Testing coverage is handled by the existing
>>>> gcc.target/powerpc/fold-vec-msum*.c tests.
>>>>
>>>> Sniff-tests have passed on P8.  full regtests currently running on
>>>> other assorted
>>>> power systems.
>>>> OK for trunk with successful results?
>>>
>>> Note DOT_PROD_EXPR is only useful when the result is reduced to a scalar later and the reduction order is irrelevant.
>>>
>>> This is because GIMPLE doesn't specify whether the reduction reduces odd/even or high/low lanes of the argument vectors.  Does vec_msum specify that?
>>
>> Not that I see, but there may be an implied intent here that just isn't
>> obvious to me.   I'll defer to ... someone. :-)
>>
>>> That said, it exists as a 'hack' for the vectorizer and isn't otherwise useful for GIMPLE.
>>
>> OK.  With that in mind, should I just try to split this out into
>> separate multiply and add steps?
>
> No.  The semantics of vec_msum are very specific and can't be accurately represented in GIMPLE.  This one should be left as a call until expand.

I think some of Richards patches also remove those tree codes in favor
of IFNs.  More thoroughly specifying them would be nice
though - I do expect most targets doing odd/even reduction for them.
Fact of the unspecifiedness is that for example we can't
even constant-fold those on GIMPLE!

Richard.

> Thanks!
> Bill
>
>>
>> Thanks,
>> -Will
>>
>>
>>
>>
>>>
>>>
>>> Richard.
>>>
>>>> Thanks
>>>> -Will
>>>>
>>>> [gcc]
>>>>
>>>> 2017-12-01  Will Schmidt  <will_schmidt@vnet.ibm.com>
>>>>
>>>>     * config/rs6000/altivec.md (sdot_prodv16qi): New.
>>>>     * config/rs6000/rs6000.c (rs6000_gimple_fold_builtin): Add support for
>>>>     gimple-folding of vec_msum.
>>>>     (builtin_function_type): Add entries for VMSUMU[BH]M and VMSUMMBM.
>>>>
>>>> diff --git a/gcc/config/rs6000/altivec.md
>>>> b/gcc/config/rs6000/altivec.md
>>>> index 7122f99..fa9e121 100644
>>>> --- a/gcc/config/rs6000/altivec.md
>>>> +++ b/gcc/config/rs6000/altivec.md
>>>> @@ -3349,11 +3349,26 @@
>>>>                        (match_operand:V8HI 2 "register_operand" "v")]
>>>>                                UNSPEC_VMSUMSHM)))]
>>>>  "TARGET_ALTIVEC"
>>>>  "
>>>> {
>>>> -  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
>>>> operands[2], operands[3]));
>>>> +  emit_insn (gen_altivec_vmsumshm (operands[0], operands[1],
>>>> +                              operands[2], operands[3]));
>>>> +  DONE;
>>>> +}")
>>>> +
>>>> +(define_expand "sdot_prodv16qi"
>>>> +  [(set (match_operand:V4SI 0 "register_operand" "=v")
>>>> +        (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
>>>> +                   (unspec:V4SI [(match_operand:V16QI 1
>>>> "register_operand" "v")
>>>> +                                 (match_operand:V16QI 2
>>>> "register_operand" "v")]
>>>> +                                UNSPEC_VMSUMM)))]
>>>> +  "TARGET_ALTIVEC"
>>>> +  "
>>>> +{
>>>> +  emit_insn (gen_altivec_vmsummbm (operands[0], operands[1],
>>>> +                              operands[2], operands[3]));
>>>>  DONE;
>>>> }")
>>>>
>>>> (define_expand "widen_usum<mode>3"
>>>>  [(set (match_operand:V4SI 0 "register_operand" "=v")
>>>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>>>> index 551d9c4..552fcdd 100644
>>>> --- a/gcc/config/rs6000/rs6000.c
>>>> +++ b/gcc/config/rs6000/rs6000.c
>>>> @@ -16614,10 +16614,40 @@ rs6000_gimple_fold_builtin
>>>> (gimple_stmt_iterator *gsi)
>>>>    case VSX_BUILTIN_CMPLE_2DI:
>>>>    case VSX_BUILTIN_CMPLE_U2DI:
>>>>      fold_compare_helper (gsi, LE_EXPR, stmt);
>>>>      return true;
>>>>
>>>> +    /* vec_msum.  */
>>>> +    case ALTIVEC_BUILTIN_VMSUMUHM:
>>>> +    case ALTIVEC_BUILTIN_VMSUMSHM:
>>>> +    case ALTIVEC_BUILTIN_VMSUMUBM:
>>>> +    case ALTIVEC_BUILTIN_VMSUMMBM:
>>>> +      {
>>>> +   arg0 = gimple_call_arg (stmt, 0);
>>>> +   arg1 = gimple_call_arg (stmt, 1);
>>>> +   tree arg2 = gimple_call_arg (stmt, 2);
>>>> +   lhs = gimple_call_lhs (stmt);
>>>> +   if ( TREE_TYPE (arg0) == TREE_TYPE (arg1))
>>>> +     g = gimple_build_assign (lhs, DOT_PROD_EXPR, arg0, arg1, arg2);
>>>> +   else
>>>> +     {
>>>> +       // For the case where we have a mix of signed/unsigned
>>>> +       // arguments, convert both multiply args to their signed type.
>>>> +       gimple_seq stmts = NULL;
>>>> +       location_t loc = gimple_location (stmt);
>>>> +       tree new_arg_type = signed_type_for (TREE_TYPE (arg0));
>>>> +       tree signed_arg0 = gimple_convert (&stmts, loc, new_arg_type,
>>>> arg0);
>>>> +       tree signed_arg1 = gimple_convert (&stmts, loc, new_arg_type,
>>>> arg1);
>>>> +       gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
>>>> +       g = gimple_build_assign (lhs, DOT_PROD_EXPR,
>>>> +                                signed_arg0, signed_arg1, arg2);
>>>> +     }
>>>> +   gimple_set_location (g, gimple_location (stmt));
>>>> +   gsi_replace (gsi, g, true);
>>>> +   return true;
>>>> +      }
>>>> +
>>>>    default:
>>>>      if (TARGET_DEBUG_BUILTIN)
>>>>     fprintf (stderr, "gimple builtin intrinsic not matched:%d %s %s\n",
>>>>              fn_code, fn_name1, fn_name2);
>>>>      break;
>>>> @@ -18080,16 +18110,23 @@ builtin_function_type (machine_mode mode_ret,
>>>> machine_mode mode_arg0,
>>>>    case CRYPTO_BUILTIN_VPERMXOR_V8HI:
>>>>    case CRYPTO_BUILTIN_VPERMXOR_V16QI:
>>>>    case CRYPTO_BUILTIN_VSHASIGMAW:
>>>>    case CRYPTO_BUILTIN_VSHASIGMAD:
>>>>    case CRYPTO_BUILTIN_VSHASIGMA:
>>>> +    case ALTIVEC_BUILTIN_VMSUMUHM:
>>>> +    case ALTIVEC_BUILTIN_VMSUMUBM:
>>>>      h.uns_p[0] = 1;
>>>>      h.uns_p[1] = 1;
>>>>      h.uns_p[2] = 1;
>>>>      h.uns_p[3] = 1;
>>>>      break;
>>>>
>>>> +    /* The second parm to this vec_msum variant is unsigned.  */
>>>> +    case ALTIVEC_BUILTIN_VMSUMMBM:
>>>> +      h.uns_p[2] = 1;
>>>> +      break;
>>>> +
>>>>    /* signed permute functions with unsigned char mask.  */
>>>>    case ALTIVEC_BUILTIN_VPERM_16QI:
>>>>    case ALTIVEC_BUILTIN_VPERM_8HI:
>>>>    case ALTIVEC_BUILTIN_VPERM_4SI:
>>>>    case ALTIVEC_BUILTIN_VPERM_4SF:
>>>
>>
>>
>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2017-12-04 10:22 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2017-12-01 17:22 [PATCH, rs6000] gimple folding of vec_msum() Will Schmidt
2017-12-01 17:46 ` Richard Biener
2017-12-01 21:43   ` Will Schmidt
2017-12-01 23:08     ` Bill Schmidt
2017-12-04 10:22       ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).