public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [RFC] [Patch]:  Try and vectorize with shift for mult expr with power 2 integer constant.
@ 2015-07-28 15:30 Kumar, Venkataramanan
  2015-07-28 20:24 ` Jakub Jelinek
  0 siblings, 1 reply; 16+ messages in thread
From: Kumar, Venkataramanan @ 2015-07-28 15:30 UTC (permalink / raw)
  To: Richard Beiner (richard.guenther@gmail.com), gcc-patches

[-- Attachment #1: Type: text/plain, Size: 1019 bytes --]

Hi Richard,

For Aarch64 target, I was trying to  vectorize  the expression  "arr[i]=arr[i]*4;"   via vector shifts instructions since they don't have vector mults.

unsigned  long int __attribute__ ((aligned (64)))arr[100];
int i;
#if 1
void test_vector_shifts()
{
        for(i=0; i<=99;i++)
        arr[i]=arr[i]<<2;
}
#endif

void test_vectorshift_via_mul()
{
        for(i=0; i<=99;i++)
        arr[i]=arr[i]*4;

}

I found a similar PR and your comments https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65952#c6. 
Based on that and IRC discussion I had with you,  I added vector recog pattern that transforms mults to shifts.  The vectorizer is now able to generate vector shifts for the above test case.
PR case also gets vectorized https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65952#c10.

This is just an initial patch and tries to optimize integer type power 2 constants.  I wanted to get feedback on this .  I bootstrapped and reg tested on aarch64-none-linux-gnu .

Regards,
Venkat.

[-- Attachment #2: vectorize_mults_shift.diff.txt --]
[-- Type: text/plain, Size: 4189 bytes --]

diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index f034635..948203d 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -76,6 +76,10 @@ static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
 						      tree *, tree *);
 static gimple vect_recog_divmod_pattern (vec<gimple> *,
 					 tree *, tree *);
+
+static gimple vect_recog_multconst_pattern (vec<gimple> *,
+                                         tree *, tree *);
+
 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
 						  tree *, tree *);
 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
@@ -90,6 +94,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
 	vect_recog_rotate_pattern,
 	vect_recog_vector_vector_shift_pattern,
 	vect_recog_divmod_pattern,
+        vect_recog_multconst_pattern,
 	vect_recog_mixed_size_cond_pattern,
 	vect_recog_bool_pattern};
 
@@ -2147,6 +2152,87 @@ vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
   return pattern_stmt;
 }
 
+static gimple
+vect_recog_multconst_pattern (vec<gimple> *stmts,
+                           tree *type_in, tree *type_out)
+{
+  gimple last_stmt = stmts->pop ();
+  tree oprnd0, oprnd1, vectype, itype;
+  gimple pattern_stmt;
+  enum tree_code rhs_code;
+  optab optab;
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+
+  if (!is_gimple_assign (last_stmt))
+    return NULL;
+
+  rhs_code = gimple_assign_rhs_code (last_stmt);
+  switch (rhs_code)
+    {
+    case MULT_EXPR:
+      break;
+    default:
+      return NULL;
+    }
+
+  if (STMT_VINFO_IN_PATTERN_P (stmt_vinfo))
+    return NULL;
+
+  oprnd0 = gimple_assign_rhs1 (last_stmt);
+  oprnd1 = gimple_assign_rhs2 (last_stmt);
+  itype = TREE_TYPE (oprnd0);
+
+  if (TREE_CODE (oprnd0) != SSA_NAME
+      || TREE_CODE (oprnd1) != INTEGER_CST
+      || TREE_CODE (itype) != INTEGER_TYPE
+      || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
+    return NULL;
+
+  vectype = get_vectype_for_scalar_type (itype);
+  if (vectype == NULL_TREE)
+    return NULL;
+
+  /* If the target can handle vectorized multiplication natively,
+     don't attempt to optimize this.  */
+  optab = optab_for_tree_code (rhs_code, vectype, optab_default);
+  if (optab != unknown_optab)
+    {
+      machine_mode vec_mode = TYPE_MODE (vectype);
+      int icode = (int) optab_handler (optab, vec_mode);
+      if (icode != CODE_FOR_nothing)
+        return NULL;
+    }
+
+  /* If target cannot handle vector left shift then we cannot 
+     optimize and bail out.  */ 
+  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
+  if (!optab
+      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+        return NULL;
+
+  if (integer_pow2p (oprnd1))
+    {
+      /* Pattern detected.  */
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "vect_recog_multconst_pattern: detected:\n");
+
+      tree shift;
+      shift = build_int_cst (itype, tree_log2 (oprnd1));
+      pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+					  LSHIFT_EXPR, oprnd0, shift);
+      if (dump_enabled_p ())
+	dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
+                              0);
+      stmts->safe_push (last_stmt);
+      *type_in = vectype;
+      *type_out = vectype;
+      return pattern_stmt;
+    } 
+
+  return NULL;
+}
+
 /* Detect a signed division by a constant that wouldn't be
    otherwise vectorized:
 
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 48c1f8d..833fe4b 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1131,7 +1131,7 @@ extern void vect_slp_transform_bb (basic_block);
    Additional pattern recognition functions can (and will) be added
    in the future.  */
 typedef gimple (* vect_recog_func_ptr) (vec<gimple> *, tree *, tree *);
-#define NUM_PATTERNS 12
+#define NUM_PATTERNS 13
 void vect_pattern_recog (loop_vec_info, bb_vec_info);
 
 /* In tree-vectorizer.c.  */

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] [Patch]:  Try and vectorize with shift for mult expr with power 2 integer constant.
  2015-07-28 15:30 [RFC] [Patch]: Try and vectorize with shift for mult expr with power 2 integer constant Kumar, Venkataramanan
@ 2015-07-28 20:24 ` Jakub Jelinek
  2015-07-28 20:34   ` Jakub Jelinek
  2015-08-02 11:03   ` Kumar, Venkataramanan
  0 siblings, 2 replies; 16+ messages in thread
From: Jakub Jelinek @ 2015-07-28 20:24 UTC (permalink / raw)
  To: Kumar, Venkataramanan
  Cc: Richard Beiner (richard.guenther@gmail.com), gcc-patches

Hi!

> This is just an initial patch and tries to optimize integer type power 2
> constants.  I wanted to get feedback on this .  I bootstrapped and reg
> tested on aarch64-none-linux-gnu .

Thanks for working on it.
ChangeLog entry for the patch is missing, probably also some testcases.

> @@ -90,6 +94,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
>  	vect_recog_rotate_pattern,
>  	vect_recog_vector_vector_shift_pattern,
>  	vect_recog_divmod_pattern,
> +        vect_recog_multconst_pattern,
>  	vect_recog_mixed_size_cond_pattern,
>  	vect_recog_bool_pattern};

Please watch formatting, the other lines are tab indented, so please use a
tab rather than 8 spaces.

> @@ -2147,6 +2152,87 @@ vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
>    return pattern_stmt;
>  }
>  

Function comment is missing here.

> +static gimple
> +vect_recog_multconst_pattern (vec<gimple> *stmts,
> +                           tree *type_in, tree *type_out)

About the function name, wonder if just vect_recog_mult_pattern wouldn't be
enough.

> +  rhs_code = gimple_assign_rhs_code (last_stmt);
> +  switch (rhs_code)
> +    {
> +    case MULT_EXPR:
> +      break;
> +    default:
> +      return NULL;
> +    }

This looks too weird, I'd just do
  if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
    return NULL;
(you handle just one pattern).

> +  /* If the target can handle vectorized multiplication natively,
> +     don't attempt to optimize this.  */
> +  optab = optab_for_tree_code (rhs_code, vectype, optab_default);

Supposedly you can use MULT_EXPR directly here.

> +  /* If target cannot handle vector left shift then we cannot 
> +     optimize and bail out.  */ 
> +  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
> +  if (!optab
> +      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
> +        return NULL;
> +
> +  if (integer_pow2p (oprnd1))
> +    {
> +      /* Pattern detected.  */
> +      if (dump_enabled_p ())
> +	dump_printf_loc (MSG_NOTE, vect_location,
> +			 "vect_recog_multconst_pattern: detected:\n");
> +
> +      tree shift;
> +      shift = build_int_cst (itype, tree_log2 (oprnd1));
> +      pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> +					  LSHIFT_EXPR, oprnd0, shift);
> +      if (dump_enabled_p ())
> +	dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
> +                              0);
> +      stmts->safe_push (last_stmt);
> +      *type_in = vectype;
> +      *type_out = vectype;
> +      return pattern_stmt;
> +    } 

Trailing whitespace.
The integer_pow2p case (have you checked signed multiply by INT_MIN?)
is only one of the cases you can actually handle, you can look at
expand_mult for many other cases - e.g. multiplication by negated powers of
2, or call choose_mult_variant and handle whatever it returns.

	Jakub

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] [Patch]:  Try and vectorize with shift for mult expr with power 2 integer constant.
  2015-07-28 20:24 ` Jakub Jelinek
@ 2015-07-28 20:34   ` Jakub Jelinek
  2015-08-02 11:03   ` Kumar, Venkataramanan
  1 sibling, 0 replies; 16+ messages in thread
From: Jakub Jelinek @ 2015-07-28 20:34 UTC (permalink / raw)
  To: Kumar, Venkataramanan
  Cc: Richard Beiner (richard.guenther@gmail.com), gcc-patches

Hi!

> This is just an initial patch and tries to optimize integer type power 2
> constants.  I wanted to get feedback on this .  I bootstrapped and reg
> tested on aarch64-none-linux-gnu .

Thanks for working on it.
ChangeLog entry for the patch is missing, probably also some testcases.

> @@ -90,6 +94,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
>  	vect_recog_rotate_pattern,
>  	vect_recog_vector_vector_shift_pattern,
>  	vect_recog_divmod_pattern,
> +        vect_recog_multconst_pattern,
>  	vect_recog_mixed_size_cond_pattern,
>  	vect_recog_bool_pattern};

Please watch formatting, the other lines are tab indented, so please use a
tab rather than 8 spaces.

> @@ -2147,6 +2152,87 @@ vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
>    return pattern_stmt;
>  }
>  

Function comment is missing here.

> +static gimple
> +vect_recog_multconst_pattern (vec<gimple> *stmts,
> +                           tree *type_in, tree *type_out)

About the function name, wonder if just vect_recog_mult_pattern wouldn't be
enough.

> +  rhs_code = gimple_assign_rhs_code (last_stmt);
> +  switch (rhs_code)
> +    {
> +    case MULT_EXPR:
> +      break;
> +    default:
> +      return NULL;
> +    }

This looks too weird, I'd just do
  if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
    return NULL;
(you handle just one pattern).

> +  /* If the target can handle vectorized multiplication natively,
> +     don't attempt to optimize this.  */
> +  optab = optab_for_tree_code (rhs_code, vectype, optab_default);

Supposedly you can use MULT_EXPR directly here.

> +  /* If target cannot handle vector left shift then we cannot 
> +     optimize and bail out.  */ 
> +  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
> +  if (!optab
> +      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
> +        return NULL;
> +
> +  if (integer_pow2p (oprnd1))
> +    {
> +      /* Pattern detected.  */
> +      if (dump_enabled_p ())
> +	dump_printf_loc (MSG_NOTE, vect_location,
> +			 "vect_recog_multconst_pattern: detected:\n");
> +
> +      tree shift;
> +      shift = build_int_cst (itype, tree_log2 (oprnd1));
> +      pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
> +					  LSHIFT_EXPR, oprnd0, shift);
> +      if (dump_enabled_p ())
> +	dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM, pattern_stmt,
> +                              0);
> +      stmts->safe_push (last_stmt);
> +      *type_in = vectype;
> +      *type_out = vectype;
> +      return pattern_stmt;
> +    } 

Trailing whitespace.
The integer_pow2p case (have you checked signed multiply by INT_MIN?)
is only one of the cases you can actually handle, you can look at
expand_mult for many other cases - e.g. multiplication by negated powers of
2, or call choose_mult_variant and handle whatever it returns.

	Jakub

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [RFC] [Patch]:  Try and vectorize with shift for mult expr with power 2 integer constant.
  2015-07-28 20:24 ` Jakub Jelinek
  2015-07-28 20:34   ` Jakub Jelinek
@ 2015-08-02 11:03   ` Kumar, Venkataramanan
  2015-08-03 18:11     ` Jeff Law
  1 sibling, 1 reply; 16+ messages in thread
From: Kumar, Venkataramanan @ 2015-08-02 11:03 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Beiner (richard.guenther@gmail.com), gcc-patches

[-- Attachment #1: Type: text/plain, Size: 5033 bytes --]

Hi Jakub, 

Thank you for reviewing the patch.

I have incorporated your comments in the attached patch.


> -----Original Message-----
> From: Jakub Jelinek [mailto:jakub@redhat.com]
> Sent: Wednesday, July 29, 2015 1:24 AM
> To: Kumar, Venkataramanan
> Cc: Richard Beiner (richard.guenther@gmail.com); gcc-patches@gcc.gnu.org
> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with
> power 2 integer constant.
> 
> Hi!
> 
> > This is just an initial patch and tries to optimize integer type power
> > 2 constants.  I wanted to get feedback on this .  I bootstrapped and
> > reg tested on aarch64-none-linux-gnu .
> 
> Thanks for working on it.
> ChangeLog entry for the patch is missing, probably also some testcases.
> 
> > @@ -90,6 +94,7 @@ static vect_recog_func_ptr
> vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
> >  	vect_recog_rotate_pattern,
> >  	vect_recog_vector_vector_shift_pattern,
> >  	vect_recog_divmod_pattern,
> > +        vect_recog_multconst_pattern,
> >  	vect_recog_mixed_size_cond_pattern,
> >  	vect_recog_bool_pattern};
> 
> Please watch formatting, the other lines are tab indented, so please use a
> tab rather than 8 spaces.
> 
> > @@ -2147,6 +2152,87 @@ vect_recog_vector_vector_shift_pattern
> (vec<gimple> *stmts,
> >    return pattern_stmt;
> >  }
> >
> 
> Function comment is missing here.
> 
> > +static gimple
> > +vect_recog_multconst_pattern (vec<gimple> *stmts,
> > +                           tree *type_in, tree *type_out)
> 
> About the function name, wonder if just vect_recog_mult_pattern wouldn't
> be enough.
> 
> > +  rhs_code = gimple_assign_rhs_code (last_stmt);  switch (rhs_code)
> > +    {
> > +    case MULT_EXPR:
> > +      break;
> > +    default:
> > +      return NULL;
> > +    }
> 
> This looks too weird, I'd just do
>   if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
>     return NULL;
> (you handle just one pattern).
> 
> > +  /* If the target can handle vectorized multiplication natively,
> > +     don't attempt to optimize this.  */  optab = optab_for_tree_code
> > + (rhs_code, vectype, optab_default);
> 
> Supposedly you can use MULT_EXPR directly here.
> 
> > +  /* If target cannot handle vector left shift then we cannot
> > +     optimize and bail out.  */
> > +  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
> > + if (!optab
> > +      || optab_handler (optab, TYPE_MODE (vectype)) ==
> CODE_FOR_nothing)
> > +        return NULL;
> > +
> > +  if (integer_pow2p (oprnd1))
> > +    {
> > +      /* Pattern detected.  */
> > +      if (dump_enabled_p ())
> > +	dump_printf_loc (MSG_NOTE, vect_location,
> > +			 "vect_recog_multconst_pattern: detected:\n");
> > +
> > +      tree shift;
> > +      shift = build_int_cst (itype, tree_log2 (oprnd1));
> > +      pattern_stmt = gimple_build_assign (vect_recog_temp_ssa_var
> (itype, NULL),
> > +					  LSHIFT_EXPR, oprnd0, shift);
> > +      if (dump_enabled_p ())
> > +	dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
> pattern_stmt,
> > +                              0);
> > +      stmts->safe_push (last_stmt);
> > +      *type_in = vectype;
> > +      *type_out = vectype;
> > +      return pattern_stmt;
> > +    }
> 
> Trailing whitespace.
> The integer_pow2p case (have you checked signed multiply by INT_MIN?) is
> only one of the cases you can actually handle, you can look at expand_mult
> for many other cases - e.g. multiplication by negated powers of 2, or call
> choose_mult_variant and handle whatever it returns.

I have added patterns that detect mults with power of 2 constants and convert them to shifts in this patch.
I will do a follow up patch for other variants as done in "expand_mult" and "choose_mult_variant".

INT_MIN case, I have not yet checked. 

 I thought of adding like this. 
if (wi::eq_p (oprnd1, HOST_WIDE_INT_MIN))
    return NULL;

But wanted to confirm with you since I don't understand  this clearly.  Is this because Var * INT_MIN !=  Var << 31  ?

 I tried this case for "int" or long int type arr  

void test_vector_shifts()
{
        for(i=0; i<=99;i++)
        arr[i]=arr[i]*INT_MIN;
}

Int type-  Looks like  without my patch aarch64 vectorizes it using shifts.
        ldr     q0, [x0]
        shl     v0.4s, v0.4s, 31
        str     q0, [x0], 16

For long int  type -  without my patch it converts to shifts but vectorization did not happen.
        ldr     x1, [x0]
        neg     x1, x1, lsl 31
        str     x1, [x0], 8

> 
> 	Jakub

gcc/ChangeLog
2015-08-02  Venkataramanan Kumar  <Venkataramanan.kumar@amd.com>
     * tree-vect-patterns.c (vect_recog_mult_pattern): New function for vectorizing
        multiplication patterns.
     * tree-vectorizer.h: Adjust the number of patterns.

gcc/testsuite/ChangeLog
2015-08-02  Venkataramanan Kumar  <Venkataramanan.kumar@amd.com>
     * gcc.dg/vect/vect-mult-patterns.c: New
 

Regards,
Venkat.
 

[-- Attachment #2: vectorize_mults_via_shift.diff.txt --]
[-- Type: text/plain, Size: 6706 bytes --]

diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c b/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
new file mode 100644
index 0000000..ad1760c
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+unsigned  long int __attribute__ ((aligned (64)))arr[100];
+int i;
+
+void test_vector_shifts ()
+{
+  for (i=0; i<=99; i++)
+    arr[i] = arr[i] * 4;
+}
+
+void test_vectorshift_via_mul ()
+{
+  for (i=0; i<=99; i++)
+    arr[i] = arr[i] * (-4);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect"  {target  { ! { vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_mult_pattern: detected" 2 "vect" {target  { ! { vect_int_mult } } } } } */
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index f034635..5cbb49e 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -76,6 +76,10 @@ static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
 						      tree *, tree *);
 static gimple vect_recog_divmod_pattern (vec<gimple> *,
 					 tree *, tree *);
+
+static gimple vect_recog_mult_pattern (vec<gimple> *,
+				       tree *, tree *);
+
 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
 						  tree *, tree *);
 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
@@ -90,6 +94,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
 	vect_recog_rotate_pattern,
 	vect_recog_vector_vector_shift_pattern,
 	vect_recog_divmod_pattern,
+	vect_recog_mult_pattern,
 	vect_recog_mixed_size_cond_pattern,
 	vect_recog_bool_pattern};
 
@@ -2147,6 +2152,140 @@ vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
   return pattern_stmt;
 }
 
+/* Detect multiplication by constant which are postive or negatives of power 2,
+   and convert them to shift patterns.
+
+   Mult with constants that are postive power of two.
+   type a_t;
+   type b_t
+   S1: b_t = a_t * n
+
+   or
+
+   Mult with constants that are negative power of two.
+   S2: b_t = a_t * -n
+
+   Input/Output:
+
+   STMTS: Contains a stmt from which the pattern search begins,
+   i.e. the mult stmt.  Convert the mult operation to LSHIFT if
+   constant operand is a power of 2.
+   type a_t, b_t
+   S1': b_t = a_t << log2 (n)
+
+   Convert the mult operation to LSHIFT and followed by a NEGATE
+   if constant operand is a negative power of 2.
+   type a_t, b_t, res_T;
+   S2': b_t = a_t << log2 (n)
+   S3': res_T  = - (b_t)
+
+ Output:
+
+  * TYPE_IN: The type of the input arguments to the pattern.
+
+  * TYPE_OUT: The type of the output of this pattern.
+
+  * Return value: A new stmt that will be used to replace the multiplication
+    S1 or S2 stmt.  */
+
+static gimple
+vect_recog_mult_pattern (vec<gimple> *stmts,
+			 tree *type_in, tree *type_out)
+{
+  gimple last_stmt = stmts->pop ();
+  tree oprnd0, oprnd1, vectype, itype;
+  gimple pattern_stmt, def_stmt;
+  optab optab;
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+
+  if (!is_gimple_assign (last_stmt))
+    return NULL;
+
+  if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
+    return NULL;
+
+  oprnd0 = gimple_assign_rhs1 (last_stmt);
+  oprnd1 = gimple_assign_rhs2 (last_stmt);
+  itype = TREE_TYPE (oprnd0);
+
+  if (TREE_CODE (oprnd0) != SSA_NAME
+      || TREE_CODE (oprnd1) != INTEGER_CST
+      || TREE_CODE (itype) != INTEGER_TYPE
+      || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
+    return NULL;
+
+  vectype = get_vectype_for_scalar_type (itype);
+  if (vectype == NULL_TREE)
+    return NULL;
+
+  /* If the target can handle vectorized multiplication natively,
+     don't attempt to optimize this.  */
+  optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
+  if (optab != unknown_optab)
+    {
+      machine_mode vec_mode = TYPE_MODE (vectype);
+      int icode = (int) optab_handler (optab, vec_mode);
+      if (icode != CODE_FOR_nothing)
+	return NULL;
+    }
+
+  /* If target cannot handle vector left shift then we cannot
+     optimize and bail out.  */
+  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
+  if (!optab
+      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+	return NULL;
+
+  /* Handle constant operands that are postive or negative powers of 2.  */
+  if ( wi::exact_log2 (oprnd1) != -1  ||
+       wi::exact_log2 (wi::neg (oprnd1)) != -1)
+    {
+      tree shift;
+
+      if (wi::exact_log2 (oprnd1) != -1)
+	{
+	  shift = build_int_cst (itype, wi::exact_log2 (oprnd1));
+	  pattern_stmt
+	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+				   LSHIFT_EXPR, oprnd0, shift);
+	}
+      else
+	{
+	  /* If the target cannot handle vector NEGATE then we cannot
+	     do the optimization.  */
+	  optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
+	  if (!optab
+	      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+	    return NULL;
+
+	  shift = build_int_cst (itype, wi::exact_log2 (wi::neg (oprnd1)));
+	  def_stmt
+	     = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+				    LSHIFT_EXPR, oprnd0, shift);
+	  new_pattern_def_seq (stmt_vinfo, def_stmt);
+	  pattern_stmt
+	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+				   NEGATE_EXPR, gimple_assign_lhs (def_stmt));
+	}
+
+      /* Pattern detected.  */
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "vect_recog_mult_pattern: detected:\n");
+
+      if (dump_enabled_p ())
+	dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
+			      pattern_stmt,0);
+
+      stmts->safe_push (last_stmt);
+      *type_in = vectype;
+      *type_out = vectype;
+      return pattern_stmt;
+    }
+
+  return NULL;
+}
+
 /* Detect a signed division by a constant that wouldn't be
    otherwise vectorized:
 
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index dfa8795..b490af4 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1132,7 +1132,7 @@ extern void vect_slp_transform_bb (basic_block);
    Additional pattern recognition functions can (and will) be added
    in the future.  */
 typedef gimple (* vect_recog_func_ptr) (vec<gimple> *, tree *, tree *);
-#define NUM_PATTERNS 12
+#define NUM_PATTERNS 13
 void vect_pattern_recog (loop_vec_info, bb_vec_info);
 
 /* In tree-vectorizer.c.  */

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with power 2 integer constant.
  2015-08-02 11:03   ` Kumar, Venkataramanan
@ 2015-08-03 18:11     ` Jeff Law
  2015-08-04  8:52       ` Kumar, Venkataramanan
  0 siblings, 1 reply; 16+ messages in thread
From: Jeff Law @ 2015-08-03 18:11 UTC (permalink / raw)
  To: Kumar, Venkataramanan, Jakub Jelinek
  Cc: Richard Beiner (richard.guenther@gmail.com), gcc-patches

On 08/02/2015 05:03 AM, Kumar, Venkataramanan wrote:
> Hi Jakub,
>
> Thank you for reviewing the patch.
>
> I have incorporated your comments in the attached patch.
Note Jakub is on PTO for the next 3 weeks.


>
>
>
> vectorize_mults_via_shift.diff.txt
>
>
> diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c b/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
Jakub would probably like more testcases :-)

The most obvious thing to test would be other shift factors.

A negative test to verify we don't try to turn a multiply by 
non-constant or multiply by a constant that is not a power of 2 into shifts.

[ Would it make sense, for example, to turn a multiply by 3 into a 
shift-add sequence?  As Jakub said, choose_mult_variant can be your 
friend. ]



> @@ -2147,6 +2152,140 @@ vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
>     return pattern_stmt;
>   }
>
> +/* Detect multiplication by constant which are postive or negatives of power 2,
s/postive/positive/


Jeff

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [RFC] [Patch]: Try and vectorize with shift for mult expr with power 2 integer constant.
  2015-08-03 18:11     ` Jeff Law
@ 2015-08-04  8:52       ` Kumar, Venkataramanan
  2015-08-04 10:37         ` Richard Biener
  0 siblings, 1 reply; 16+ messages in thread
From: Kumar, Venkataramanan @ 2015-08-04  8:52 UTC (permalink / raw)
  To: Jeff Law, Jakub Jelinek
  Cc: Richard Beiner (richard.guenther@gmail.com), gcc-patches

[-- Attachment #1: Type: text/plain, Size: 2263 bytes --]

Hi Jeff, 

> -----Original Message-----
> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
> owner@gcc.gnu.org] On Behalf Of Jeff Law
> Sent: Monday, August 03, 2015 11:42 PM
> To: Kumar, Venkataramanan; Jakub Jelinek
> Cc: Richard Beiner (richard.guenther@gmail.com); gcc-patches@gcc.gnu.org
> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with
> power 2 integer constant.
> 
> On 08/02/2015 05:03 AM, Kumar, Venkataramanan wrote:
> > Hi Jakub,
> >
> > Thank you for reviewing the patch.
> >
> > I have incorporated your comments in the attached patch.
> Note Jakub is on PTO for the next 3 weeks.

 Thank you for this information.

> 
> 
> >
> >
> >
> > vectorize_mults_via_shift.diff.txt
> >
> >
> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
> > b/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
> Jakub would probably like more testcases :-)
> 
> The most obvious thing to test would be other shift factors.
> 
> A negative test to verify we don't try to turn a multiply by non-constant or
> multiply by a constant that is not a power of 2 into shifts.

I have added negative test in the attached patch.


> 
> [ Would it make sense, for example, to turn a multiply by 3 into a shift-add
> sequence?  As Jakub said, choose_mult_variant can be your friend. ]

Yes I will do that in a follow up patch.   

The new change log becomes 

gcc/ChangeLog
2015-08-04  Venkataramanan Kumar  <Venkataramanan.kumar@amd.com>
     * tree-vect-patterns.c (vect_recog_mult_pattern): New function for vectorizing
        multiplication patterns.
     * tree-vectorizer.h: Adjust the number of patterns.

gcc/testsuite/ChangeLog
2015-08-04  Venkataramanan Kumar  <Venkataramanan.kumar@amd.com>
     * gcc.dg/vect/vect-mult-pattern-1.c: New
    * gcc.dg/vect/vect-mult-pattern-2.c: New

Bootstrapped and reg tested on aarch64-unknown-linux-gnu.

Ok for trunk ?

> 
> 
> 
> > @@ -2147,6 +2152,140 @@ vect_recog_vector_vector_shift_pattern
> (vec<gimple> *stmts,
> >     return pattern_stmt;
> >   }
> >
> > +/* Detect multiplication by constant which are postive or negatives
> > +of power 2,
> s/postive/positive/
> 
> 
> Jeff

Regards,
Venkat.


[-- Attachment #2: vectorize_mults_via_shift.diff.txt --]
[-- Type: text/plain, Size: 7850 bytes --]

diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-1.c b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-1.c
new file mode 100644
index 0000000..764d0e3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+unsigned  long int __attribute__ ((aligned (64)))arr[100];
+int i;
+
+void test_for_vectorshifts_via_mul_with_power2_const ()
+{
+  for (i=0; i<=99; i++)
+    arr[i] = arr[i] * 4;
+}
+
+void test_for_vectorshifts_via_mul_with_negative_power2_const ()
+{
+  for (i=0; i<=99; i++)
+    arr[i] = arr[i] * (-4);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"  {target  { ! { vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_mult_pattern: detected" 2 "vect" {target  { ! { vect_int_mult } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-2.c b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-2.c
new file mode 100644
index 0000000..77e8cff
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-2.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+unsigned  long int __attribute__ ((aligned (64)))arr[100];
+int i;
+
+void negative_test_for_vectorshifts_via_mul_with_const ()
+{
+  for (i=0; i<=99; i++)
+    arr[i] = arr[i] * 123;
+}
+
+void negative_test_for_vectorshifts_via_mul_with_negative_const ()
+{
+  for (i=0; i<=99; i++)
+    arr[i] = arr[i] * (-123);
+}
+
+void negative_test_for_vectorshifts_via_mul_with_varable (int x)
+{
+  for (i=0; i<=99; i++)
+    arr[i] = arr[i] * x;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 3 "vect"  {target  { ! { vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-not "vect_recog_mult_pattern: detected" "vect" {target  { ! { vect_int_mult } } } } } */
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index f034635..5cbb49e 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -76,6 +76,10 @@ static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
 						      tree *, tree *);
 static gimple vect_recog_divmod_pattern (vec<gimple> *,
 					 tree *, tree *);
+
+static gimple vect_recog_mult_pattern (vec<gimple> *,
+				       tree *, tree *);
+
 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
 						  tree *, tree *);
 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
@@ -90,6 +94,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
 	vect_recog_rotate_pattern,
 	vect_recog_vector_vector_shift_pattern,
 	vect_recog_divmod_pattern,
+	vect_recog_mult_pattern,
 	vect_recog_mixed_size_cond_pattern,
 	vect_recog_bool_pattern};
 
@@ -2147,6 +2152,140 @@ vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
   return pattern_stmt;
 }
 
+/* Detect multiplication by constant which are postive or negatives of power 2,
+   and convert them to shift patterns.
+
+   Mult with constants that are postive power of two.
+   type a_t;
+   type b_t
+   S1: b_t = a_t * n
+
+   or
+
+   Mult with constants that are negative power of two.
+   S2: b_t = a_t * -n
+
+   Input/Output:
+
+   STMTS: Contains a stmt from which the pattern search begins,
+   i.e. the mult stmt.  Convert the mult operation to LSHIFT if
+   constant operand is a power of 2.
+   type a_t, b_t
+   S1': b_t = a_t << log2 (n)
+
+   Convert the mult operation to LSHIFT and followed by a NEGATE
+   if constant operand is a negative power of 2.
+   type a_t, b_t, res_T;
+   S2': b_t = a_t << log2 (n)
+   S3': res_T  = - (b_t)
+
+ Output:
+
+  * TYPE_IN: The type of the input arguments to the pattern.
+
+  * TYPE_OUT: The type of the output of this pattern.
+
+  * Return value: A new stmt that will be used to replace the multiplication
+    S1 or S2 stmt.  */
+
+static gimple
+vect_recog_mult_pattern (vec<gimple> *stmts,
+			 tree *type_in, tree *type_out)
+{
+  gimple last_stmt = stmts->pop ();
+  tree oprnd0, oprnd1, vectype, itype;
+  gimple pattern_stmt, def_stmt;
+  optab optab;
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+
+  if (!is_gimple_assign (last_stmt))
+    return NULL;
+
+  if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
+    return NULL;
+
+  oprnd0 = gimple_assign_rhs1 (last_stmt);
+  oprnd1 = gimple_assign_rhs2 (last_stmt);
+  itype = TREE_TYPE (oprnd0);
+
+  if (TREE_CODE (oprnd0) != SSA_NAME
+      || TREE_CODE (oprnd1) != INTEGER_CST
+      || TREE_CODE (itype) != INTEGER_TYPE
+      || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
+    return NULL;
+
+  vectype = get_vectype_for_scalar_type (itype);
+  if (vectype == NULL_TREE)
+    return NULL;
+
+  /* If the target can handle vectorized multiplication natively,
+     don't attempt to optimize this.  */
+  optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
+  if (optab != unknown_optab)
+    {
+      machine_mode vec_mode = TYPE_MODE (vectype);
+      int icode = (int) optab_handler (optab, vec_mode);
+      if (icode != CODE_FOR_nothing)
+	return NULL;
+    }
+
+  /* If target cannot handle vector left shift then we cannot
+     optimize and bail out.  */
+  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
+  if (!optab
+      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+	return NULL;
+
+  /* Handle constant operands that are postive or negative powers of 2.  */
+  if ( wi::exact_log2 (oprnd1) != -1  ||
+       wi::exact_log2 (wi::neg (oprnd1)) != -1)
+    {
+      tree shift;
+
+      if (wi::exact_log2 (oprnd1) != -1)
+	{
+	  shift = build_int_cst (itype, wi::exact_log2 (oprnd1));
+	  pattern_stmt
+	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+				   LSHIFT_EXPR, oprnd0, shift);
+	}
+      else
+	{
+	  /* If the target cannot handle vector NEGATE then we cannot
+	     do the optimization.  */
+	  optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
+	  if (!optab
+	      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+	    return NULL;
+
+	  shift = build_int_cst (itype, wi::exact_log2 (wi::neg (oprnd1)));
+	  def_stmt
+	     = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+				    LSHIFT_EXPR, oprnd0, shift);
+	  new_pattern_def_seq (stmt_vinfo, def_stmt);
+	  pattern_stmt
+	    = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+				   NEGATE_EXPR, gimple_assign_lhs (def_stmt));
+	}
+
+      /* Pattern detected.  */
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "vect_recog_mult_pattern: detected:\n");
+
+      if (dump_enabled_p ())
+	dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
+			      pattern_stmt,0);
+
+      stmts->safe_push (last_stmt);
+      *type_in = vectype;
+      *type_out = vectype;
+      return pattern_stmt;
+    }
+
+  return NULL;
+}
+
 /* Detect a signed division by a constant that wouldn't be
    otherwise vectorized:
 
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index dfa8795..b490af4 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1132,7 +1132,7 @@ extern void vect_slp_transform_bb (basic_block);
    Additional pattern recognition functions can (and will) be added
    in the future.  */
 typedef gimple (* vect_recog_func_ptr) (vec<gimple> *, tree *, tree *);
-#define NUM_PATTERNS 12
+#define NUM_PATTERNS 13
 void vect_pattern_recog (loop_vec_info, bb_vec_info);
 
 /* In tree-vectorizer.c.  */

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with power 2 integer constant.
  2015-08-04  8:52       ` Kumar, Venkataramanan
@ 2015-08-04 10:37         ` Richard Biener
       [not found]           ` <87vbcvjg1y.fsf@e105548-lin.cambridge.arm.com>
  2015-08-04 16:49           ` Kumar, Venkataramanan
  0 siblings, 2 replies; 16+ messages in thread
From: Richard Biener @ 2015-08-04 10:37 UTC (permalink / raw)
  To: Kumar, Venkataramanan; +Cc: Jeff Law, Jakub Jelinek, gcc-patches

On Tue, Aug 4, 2015 at 10:52 AM, Kumar, Venkataramanan
<Venkataramanan.Kumar@amd.com> wrote:
> Hi Jeff,
>
>> -----Original Message-----
>> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>> owner@gcc.gnu.org] On Behalf Of Jeff Law
>> Sent: Monday, August 03, 2015 11:42 PM
>> To: Kumar, Venkataramanan; Jakub Jelinek
>> Cc: Richard Beiner (richard.guenther@gmail.com); gcc-patches@gcc.gnu.org
>> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with
>> power 2 integer constant.
>>
>> On 08/02/2015 05:03 AM, Kumar, Venkataramanan wrote:
>> > Hi Jakub,
>> >
>> > Thank you for reviewing the patch.
>> >
>> > I have incorporated your comments in the attached patch.
>> Note Jakub is on PTO for the next 3 weeks.
>
>  Thank you for this information.
>
>>
>>
>> >
>> >
>> >
>> > vectorize_mults_via_shift.diff.txt
>> >
>> >
>> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
>> > b/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
>> Jakub would probably like more testcases :-)
>>
>> The most obvious thing to test would be other shift factors.
>>
>> A negative test to verify we don't try to turn a multiply by non-constant or
>> multiply by a constant that is not a power of 2 into shifts.
>
> I have added negative test in the attached patch.
>
>
>>
>> [ Would it make sense, for example, to turn a multiply by 3 into a shift-add
>> sequence?  As Jakub said, choose_mult_variant can be your friend. ]
>
> Yes I will do that in a follow up patch.
>
> The new change log becomes
>
> gcc/ChangeLog
> 2015-08-04  Venkataramanan Kumar  <Venkataramanan.kumar@amd.com>
>      * tree-vect-patterns.c (vect_recog_mult_pattern): New function for vectorizing
>         multiplication patterns.
>      * tree-vectorizer.h: Adjust the number of patterns.
>
> gcc/testsuite/ChangeLog
> 2015-08-04  Venkataramanan Kumar  <Venkataramanan.kumar@amd.com>
>      * gcc.dg/vect/vect-mult-pattern-1.c: New
>     * gcc.dg/vect/vect-mult-pattern-2.c: New
>
> Bootstrapped and reg tested on aarch64-unknown-linux-gnu.
>
> Ok for trunk ?

+  if (TREE_CODE (oprnd0) != SSA_NAME
+      || TREE_CODE (oprnd1) != INTEGER_CST
+      || TREE_CODE (itype) != INTEGER_TYPE

INTEGRAL_TYPE_P (itype)

+  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
+  if (!optab
+      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+       return NULL;
+

indent of the return stmt looks wrong

+  /* Handle constant operands that are postive or negative powers of 2.  */
+  if ( wi::exact_log2 (oprnd1) != -1  ||
+       wi::exact_log2 (wi::neg (oprnd1)) != -1)

no space after (, || goes to the next line.

+    {
+      tree shift;
+
+      if (wi::exact_log2 (oprnd1) != -1)

please cache wi::exact_log2

in fact the first if () looks redundant if you simply put an else return NULL
after a else if (wi::exact_log2 (wi::neg (oprnd1)) != -1)

Note that the issue with INT_MIN is that wi::neg (INT_MIN) is INT_MIN
again, but it seems that wi::exact_log2 returns -1 in that case so you
are fine (and in fact not handling this case).

Thanks,
Richard.

>>
>>
>>
>> > @@ -2147,6 +2152,140 @@ vect_recog_vector_vector_shift_pattern
>> (vec<gimple> *stmts,
>> >     return pattern_stmt;
>> >   }
>> >
>> > +/* Detect multiplication by constant which are postive or negatives
>> > +of power 2,
>> s/postive/positive/
>>
>>
>> Jeff
>
> Regards,
> Venkat.
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with power 2 integer constant.
       [not found]             ` <CAFiYyc3VLXyjmfCuizFVsQ2nLDjyd_Dr=Po1_qyJ9wcrgCmiWA@mail.gmail.com>
@ 2015-08-04 14:22               ` Richard Biener
  2015-08-04 14:28                 ` Richard Sandiford
  0 siblings, 1 reply; 16+ messages in thread
From: Richard Biener @ 2015-08-04 14:22 UTC (permalink / raw)
  To: Richard Biener, Kumar, Venkataramanan, Jeff Law, Jakub Jelinek,
	gcc-patches, richard.sandiford

On Tue, Aug 4, 2015 at 4:21 PM, Richard Biener
<richard.guenther@gmail.com> wrote:
> On Tue, Aug 4, 2015 at 4:15 PM, Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>> Richard Biener <richard.guenther@gmail.com> writes:
>>> On Tue, Aug 4, 2015 at 10:52 AM, Kumar, Venkataramanan
>>> <Venkataramanan.Kumar@amd.com> wrote:
>>>> Hi Jeff,
>>>>
>>>>> -----Original Message-----
>>>>> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>>>>> owner@gcc.gnu.org] On Behalf Of Jeff Law
>>>>> Sent: Monday, August 03, 2015 11:42 PM
>>>>> To: Kumar, Venkataramanan; Jakub Jelinek
>>>>> Cc: Richard Beiner (richard.guenther@gmail.com); gcc-patches@gcc.gnu.org
>>>>> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with
>>>>> power 2 integer constant.
>>>>>
>>>>> On 08/02/2015 05:03 AM, Kumar, Venkataramanan wrote:
>>>>> > Hi Jakub,
>>>>> >
>>>>> > Thank you for reviewing the patch.
>>>>> >
>>>>> > I have incorporated your comments in the attached patch.
>>>>> Note Jakub is on PTO for the next 3 weeks.
>>>>
>>>>  Thank you for this information.
>>>>
>>>>>
>>>>>
>>>>> >
>>>>> >
>>>>> >
>>>>> > vectorize_mults_via_shift.diff.txt
>>>>> >
>>>>> >
>>>>> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
>>>>> > b/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
>>>>> Jakub would probably like more testcases :-)
>>>>>
>>>>> The most obvious thing to test would be other shift factors.
>>>>>
>>>>> A negative test to verify we don't try to turn a multiply by non-constant or
>>>>> multiply by a constant that is not a power of 2 into shifts.
>>>>
>>>> I have added negative test in the attached patch.
>>>>
>>>>
>>>>>
>>>>> [ Would it make sense, for example, to turn a multiply by 3 into a shift-add
>>>>> sequence?  As Jakub said, choose_mult_variant can be your friend. ]
>>>>
>>>> Yes I will do that in a follow up patch.
>>>>
>>>> The new change log becomes
>>>>
>>>> gcc/ChangeLog
>>>> 2015-08-04  Venkataramanan Kumar  <Venkataramanan.kumar@amd.com>
>>>>      * tree-vect-patterns.c (vect_recog_mult_pattern): New function for vectorizing
>>>>         multiplication patterns.
>>>>      * tree-vectorizer.h: Adjust the number of patterns.
>>>>
>>>> gcc/testsuite/ChangeLog
>>>> 2015-08-04  Venkataramanan Kumar  <Venkataramanan.kumar@amd.com>
>>>>      * gcc.dg/vect/vect-mult-pattern-1.c: New
>>>>     * gcc.dg/vect/vect-mult-pattern-2.c: New
>>>>
>>>> Bootstrapped and reg tested on aarch64-unknown-linux-gnu.
>>>>
>>>> Ok for trunk ?
>>>
>>> +  if (TREE_CODE (oprnd0) != SSA_NAME
>>> +      || TREE_CODE (oprnd1) != INTEGER_CST
>>> +      || TREE_CODE (itype) != INTEGER_TYPE
>>>
>>> INTEGRAL_TYPE_P (itype)
>>>
>>> +  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
>>> +  if (!optab
>>> +      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
>>> +       return NULL;
>>> +
>>>
>>> indent of the return stmt looks wrong
>>>
>>> +  /* Handle constant operands that are postive or negative powers of 2.  */
>>> +  if ( wi::exact_log2 (oprnd1) != -1  ||
>>> +       wi::exact_log2 (wi::neg (oprnd1)) != -1)
>>>
>>> no space after (, || goes to the next line.
>>>
>>> +    {
>>> +      tree shift;
>>> +
>>> +      if (wi::exact_log2 (oprnd1) != -1)
>>>
>>> please cache wi::exact_log2
>>>
>>> in fact the first if () looks redundant if you simply put an else return NULL
>>> after a else if (wi::exact_log2 (wi::neg (oprnd1)) != -1)
>>>
>>> Note that the issue with INT_MIN is that wi::neg (INT_MIN) is INT_MIN
>>> again, but it seems that wi::exact_log2 returns -1 in that case so you
>>> are fine (and in fact not handling this case).
>>
>> Are you sure it returns -1 for INT_MIN?  It isn't supposed to, assuming
>> INT_MIN is shorthand for "minimum value for a signed type".  wide_ints
>> aren't signed, so INT_MIN is indistinguishable from an unsigned
>> 1<<(prec-1).
>
> No, not sure.  I spotted
>
>   /* Reject cases where there are implicit -1 blocks above HIGH.  */
>   if (x.len * HOST_BITS_PER_WIDE_INT < x.precision && x.sign_mask () < 0)
>     return -1;
>
> and thought that would catch it.  I mean the tree value is negative so
> exact_log2 must see it is a negative value.

Now re-sent with Richards company disclaimer stripped...

> Richard.
>
>> wi::exact_log2 (wi::to_widest (INT_MIN)) would return -1, but that's
>> the difference between "infinite" precision and exact precision.
>>
>> Thanks,
>> Richard

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with power 2 integer constant.
  2015-08-04 14:22               ` Richard Biener
@ 2015-08-04 14:28                 ` Richard Sandiford
  2015-08-04 16:07                   ` Richard Biener
  0 siblings, 1 reply; 16+ messages in thread
From: Richard Sandiford @ 2015-08-04 14:28 UTC (permalink / raw)
  To: Richard Biener
  Cc: Kumar, Venkataramanan, Jeff Law, Jakub Jelinek, gcc-patches

Richard Biener <richard.guenther@gmail.com> writes:
> On Tue, Aug 4, 2015 at 4:21 PM, Richard Biener
> <richard.guenther@gmail.com> wrote:
>> On Tue, Aug 4, 2015 at 4:15 PM, Richard Sandiford
>> <richard.sandiford@arm.com> wrote:
>>> Richard Biener <richard.guenther@gmail.com> writes:
>>>> On Tue, Aug 4, 2015 at 10:52 AM, Kumar, Venkataramanan
>>>> <Venkataramanan.Kumar@amd.com> wrote:
>>>>> Hi Jeff,
>>>>>
>>>>>> -----Original Message-----
>>>>>> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>>>>>> owner@gcc.gnu.org] On Behalf Of Jeff Law
>>>>>> Sent: Monday, August 03, 2015 11:42 PM
>>>>>> To: Kumar, Venkataramanan; Jakub Jelinek
>>>>>> Cc: Richard Beiner (richard.guenther@gmail.com); gcc-patches@gcc.gnu.org
>>>>>> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult
>>>>>> expr with
>>>>>> power 2 integer constant.
>>>>>>
>>>>>> On 08/02/2015 05:03 AM, Kumar, Venkataramanan wrote:
>>>>>> > Hi Jakub,
>>>>>> >
>>>>>> > Thank you for reviewing the patch.
>>>>>> >
>>>>>> > I have incorporated your comments in the attached patch.
>>>>>> Note Jakub is on PTO for the next 3 weeks.
>>>>>
>>>>>  Thank you for this information.
>>>>>
>>>>>>
>>>>>>
>>>>>> >
>>>>>> >
>>>>>> >
>>>>>> > vectorize_mults_via_shift.diff.txt
>>>>>> >
>>>>>> >
>>>>>> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
>>>>>> > b/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
>>>>>> Jakub would probably like more testcases :-)
>>>>>>
>>>>>> The most obvious thing to test would be other shift factors.
>>>>>>
>>>>>> A negative test to verify we don't try to turn a multiply by
>>>>>> non-constant or
>>>>>> multiply by a constant that is not a power of 2 into shifts.
>>>>>
>>>>> I have added negative test in the attached patch.
>>>>>
>>>>>
>>>>>>
>>>>>> [ Would it make sense, for example, to turn a multiply by 3 into a
>>>>>> shift-add
>>>>>> sequence?  As Jakub said, choose_mult_variant can be your friend. ]
>>>>>
>>>>> Yes I will do that in a follow up patch.
>>>>>
>>>>> The new change log becomes
>>>>>
>>>>> gcc/ChangeLog
>>>>> 2015-08-04  Venkataramanan Kumar  <Venkataramanan.kumar@amd.com>
>>>>>      * tree-vect-patterns.c (vect_recog_mult_pattern): New function for vectorizing
>>>>>         multiplication patterns.
>>>>>      * tree-vectorizer.h: Adjust the number of patterns.
>>>>>
>>>>> gcc/testsuite/ChangeLog
>>>>> 2015-08-04  Venkataramanan Kumar  <Venkataramanan.kumar@amd.com>
>>>>>      * gcc.dg/vect/vect-mult-pattern-1.c: New
>>>>>     * gcc.dg/vect/vect-mult-pattern-2.c: New
>>>>>
>>>>> Bootstrapped and reg tested on aarch64-unknown-linux-gnu.
>>>>>
>>>>> Ok for trunk ?
>>>>
>>>> +  if (TREE_CODE (oprnd0) != SSA_NAME
>>>> +      || TREE_CODE (oprnd1) != INTEGER_CST
>>>> +      || TREE_CODE (itype) != INTEGER_TYPE
>>>>
>>>> INTEGRAL_TYPE_P (itype)
>>>>
>>>> +  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
>>>> +  if (!optab
>>>> +      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
>>>> +       return NULL;
>>>> +
>>>>
>>>> indent of the return stmt looks wrong
>>>>
>>>> +  /* Handle constant operands that are postive or negative powers of 2.  */
>>>> +  if ( wi::exact_log2 (oprnd1) != -1  ||
>>>> +       wi::exact_log2 (wi::neg (oprnd1)) != -1)
>>>>
>>>> no space after (, || goes to the next line.
>>>>
>>>> +    {
>>>> +      tree shift;
>>>> +
>>>> +      if (wi::exact_log2 (oprnd1) != -1)
>>>>
>>>> please cache wi::exact_log2
>>>>
>>>> in fact the first if () looks redundant if you simply put an else
>>>> return NULL
>>>> after a else if (wi::exact_log2 (wi::neg (oprnd1)) != -1)
>>>>
>>>> Note that the issue with INT_MIN is that wi::neg (INT_MIN) is INT_MIN
>>>> again, but it seems that wi::exact_log2 returns -1 in that case so you
>>>> are fine (and in fact not handling this case).
>>>
>>> Are you sure it returns -1 for INT_MIN?  It isn't supposed to, assuming
>>> INT_MIN is shorthand for "minimum value for a signed type".  wide_ints
>>> aren't signed, so INT_MIN is indistinguishable from an unsigned
>>> 1<<(prec-1).
>>
>> No, not sure.  I spotted
>>
>>   /* Reject cases where there are implicit -1 blocks above HIGH.  */
>>   if (x.len * HOST_BITS_PER_WIDE_INT < x.precision && x.sign_mask () < 0)
>>     return -1;
>>
>> and thought that would catch it.  I mean the tree value is negative so
>> exact_log2 must see it is a negative value.

That's handling the "compressed" format, e.g.:

  {1 << 63}

as a 64-bit short-hand for a 256-bit:

  {1 << 63,-1,-1,-1}

In this case more than one of the low x.precision bits are known to be set.

> Now re-sent with Richards company disclaimer stripped...

Doh.  Sent via the right channels this time...

Thanks,
Richard

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with power 2 integer constant.
  2015-08-04 14:28                 ` Richard Sandiford
@ 2015-08-04 16:07                   ` Richard Biener
  2015-08-04 16:27                     ` Richard Sandiford
  0 siblings, 1 reply; 16+ messages in thread
From: Richard Biener @ 2015-08-04 16:07 UTC (permalink / raw)
  To: Richard Sandiford
  Cc: Kumar, Venkataramanan, Jeff Law, Jakub Jelinek, gcc-patches

On August 4, 2015 4:28:26 PM GMT+02:00, Richard Sandiford <richard.sandiford@arm.com> wrote:
>Richard Biener <richard.guenther@gmail.com> writes:
>> On Tue, Aug 4, 2015 at 4:21 PM, Richard Biener
>> <richard.guenther@gmail.com> wrote:
>>> On Tue, Aug 4, 2015 at 4:15 PM, Richard Sandiford
>>> <richard.sandiford@arm.com> wrote:
>>>> Richard Biener <richard.guenther@gmail.com> writes:
>>>>> On Tue, Aug 4, 2015 at 10:52 AM, Kumar, Venkataramanan
>>>>> <Venkataramanan.Kumar@amd.com> wrote:
>>>>>> Hi Jeff,
>>>>>>
>>>>>>> -----Original Message-----
>>>>>>> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>>>>>>> owner@gcc.gnu.org] On Behalf Of Jeff Law
>>>>>>> Sent: Monday, August 03, 2015 11:42 PM
>>>>>>> To: Kumar, Venkataramanan; Jakub Jelinek
>>>>>>> Cc: Richard Beiner (richard.guenther@gmail.com);
>gcc-patches@gcc.gnu.org
>>>>>>> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for
>mult
>>>>>>> expr with
>>>>>>> power 2 integer constant.
>>>>>>>
>>>>>>> On 08/02/2015 05:03 AM, Kumar, Venkataramanan wrote:
>>>>>>> > Hi Jakub,
>>>>>>> >
>>>>>>> > Thank you for reviewing the patch.
>>>>>>> >
>>>>>>> > I have incorporated your comments in the attached patch.
>>>>>>> Note Jakub is on PTO for the next 3 weeks.
>>>>>>
>>>>>>  Thank you for this information.
>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> >
>>>>>>> >
>>>>>>> >
>>>>>>> > vectorize_mults_via_shift.diff.txt
>>>>>>> >
>>>>>>> >
>>>>>>> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
>>>>>>> > b/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
>>>>>>> Jakub would probably like more testcases :-)
>>>>>>>
>>>>>>> The most obvious thing to test would be other shift factors.
>>>>>>>
>>>>>>> A negative test to verify we don't try to turn a multiply by
>>>>>>> non-constant or
>>>>>>> multiply by a constant that is not a power of 2 into shifts.
>>>>>>
>>>>>> I have added negative test in the attached patch.
>>>>>>
>>>>>>
>>>>>>>
>>>>>>> [ Would it make sense, for example, to turn a multiply by 3 into
>a
>>>>>>> shift-add
>>>>>>> sequence?  As Jakub said, choose_mult_variant can be your
>friend. ]
>>>>>>
>>>>>> Yes I will do that in a follow up patch.
>>>>>>
>>>>>> The new change log becomes
>>>>>>
>>>>>> gcc/ChangeLog
>>>>>> 2015-08-04  Venkataramanan Kumar  <Venkataramanan.kumar@amd.com>
>>>>>>      * tree-vect-patterns.c (vect_recog_mult_pattern): New
>function for vectorizing
>>>>>>         multiplication patterns.
>>>>>>      * tree-vectorizer.h: Adjust the number of patterns.
>>>>>>
>>>>>> gcc/testsuite/ChangeLog
>>>>>> 2015-08-04  Venkataramanan Kumar  <Venkataramanan.kumar@amd.com>
>>>>>>      * gcc.dg/vect/vect-mult-pattern-1.c: New
>>>>>>     * gcc.dg/vect/vect-mult-pattern-2.c: New
>>>>>>
>>>>>> Bootstrapped and reg tested on aarch64-unknown-linux-gnu.
>>>>>>
>>>>>> Ok for trunk ?
>>>>>
>>>>> +  if (TREE_CODE (oprnd0) != SSA_NAME
>>>>> +      || TREE_CODE (oprnd1) != INTEGER_CST
>>>>> +      || TREE_CODE (itype) != INTEGER_TYPE
>>>>>
>>>>> INTEGRAL_TYPE_P (itype)
>>>>>
>>>>> +  optab = optab_for_tree_code (LSHIFT_EXPR, vectype,
>optab_vector);
>>>>> +  if (!optab
>>>>> +      || optab_handler (optab, TYPE_MODE (vectype)) ==
>CODE_FOR_nothing)
>>>>> +       return NULL;
>>>>> +
>>>>>
>>>>> indent of the return stmt looks wrong
>>>>>
>>>>> +  /* Handle constant operands that are postive or negative powers
>of 2.  */
>>>>> +  if ( wi::exact_log2 (oprnd1) != -1  ||
>>>>> +       wi::exact_log2 (wi::neg (oprnd1)) != -1)
>>>>>
>>>>> no space after (, || goes to the next line.
>>>>>
>>>>> +    {
>>>>> +      tree shift;
>>>>> +
>>>>> +      if (wi::exact_log2 (oprnd1) != -1)
>>>>>
>>>>> please cache wi::exact_log2
>>>>>
>>>>> in fact the first if () looks redundant if you simply put an else
>>>>> return NULL
>>>>> after a else if (wi::exact_log2 (wi::neg (oprnd1)) != -1)
>>>>>
>>>>> Note that the issue with INT_MIN is that wi::neg (INT_MIN) is
>INT_MIN
>>>>> again, but it seems that wi::exact_log2 returns -1 in that case so
>you
>>>>> are fine (and in fact not handling this case).
>>>>
>>>> Are you sure it returns -1 for INT_MIN?  It isn't supposed to,
>assuming
>>>> INT_MIN is shorthand for "minimum value for a signed type". 
>wide_ints
>>>> aren't signed, so INT_MIN is indistinguishable from an unsigned
>>>> 1<<(prec-1).
>>>
>>> No, not sure.  I spotted
>>>
>>>   /* Reject cases where there are implicit -1 blocks above HIGH.  */
>>>   if (x.len * HOST_BITS_PER_WIDE_INT < x.precision && x.sign_mask ()
>< 0)
>>>     return -1;
>>>
>>> and thought that would catch it.  I mean the tree value is negative
>so
>>> exact_log2 must see it is a negative value.
>
>That's handling the "compressed" format, e.g.:
>
>  {1 << 63}
>
>as a 64-bit short-hand for a 256-bit:
>
>  {1 << 63,-1,-1,-1}
>
>In this case more than one of the low x.precision bits are known to be
>set.

So you are saying exact_log2 is really exact_log2u?

>> Now re-sent with Richards company disclaimer stripped...
>
>Doh.  Sent via the right channels this time...
>
>Thanks,
>Richard


^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with power 2 integer constant.
  2015-08-04 16:07                   ` Richard Biener
@ 2015-08-04 16:27                     ` Richard Sandiford
  0 siblings, 0 replies; 16+ messages in thread
From: Richard Sandiford @ 2015-08-04 16:27 UTC (permalink / raw)
  To: Richard Biener
  Cc: Kumar, Venkataramanan, Jeff Law, Jakub Jelinek, gcc-patches

Richard Biener <richard.guenther@gmail.com> writes:
> On August 4, 2015 4:28:26 PM GMT+02:00, Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>>Richard Biener <richard.guenther@gmail.com> writes:
>>> On Tue, Aug 4, 2015 at 4:21 PM, Richard Biener
>>> <richard.guenther@gmail.com> wrote:
>>>> On Tue, Aug 4, 2015 at 4:15 PM, Richard Sandiford
>>>> <richard.sandiford@arm.com> wrote:
>>>>> Richard Biener <richard.guenther@gmail.com> writes:
>>>>>> in fact the first if () looks redundant if you simply put an else
>>>>>> return NULL
>>>>>> after a else if (wi::exact_log2 (wi::neg (oprnd1)) != -1)
>>>>>>
>>>>>> Note that the issue with INT_MIN is that wi::neg (INT_MIN) is
>>INT_MIN
>>>>>> again, but it seems that wi::exact_log2 returns -1 in that case so
>>you
>>>>>> are fine (and in fact not handling this case).
>>>>>
>>>>> Are you sure it returns -1 for INT_MIN?  It isn't supposed to,
>>assuming
>>>>> INT_MIN is shorthand for "minimum value for a signed type". 
>>wide_ints
>>>>> aren't signed, so INT_MIN is indistinguishable from an unsigned
>>>>> 1<<(prec-1).
>>>>
>>>> No, not sure.  I spotted
>>>>
>>>>   /* Reject cases where there are implicit -1 blocks above HIGH.  */
>>>>   if (x.len * HOST_BITS_PER_WIDE_INT < x.precision && x.sign_mask ()
>>< 0)
>>>>     return -1;
>>>>
>>>> and thought that would catch it.  I mean the tree value is negative
>>so
>>>> exact_log2 must see it is a negative value.
>>
>>That's handling the "compressed" format, e.g.:
>>
>>  {1 << 63}
>>
>>as a 64-bit short-hand for a 256-bit:
>>
>>  {1 << 63,-1,-1,-1}
>>
>>In this case more than one of the low x.precision bits are known to be
>>set.
>
> So you are saying exact_log2 is really exact_log2u?

Not sure what you mean, sorry.  All I meant was that a number like:

  0xffffffff_ffffffff_ffffffff_80000000

(uing 32-bit rather than 64-bit elements for brevity) is stored as
a single {0x80000000}, with the upper 3 elements being implicit.  And:

  exact_log2 (0xffffffff_ffffffff_ffffffff_80000000)

is obviously -1.  That's the case that the code above is handling.
If there are implicit all-1 blocks, the number is not a power of 2.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [RFC] [Patch]: Try and vectorize with shift for mult expr with power 2 integer constant.
  2015-08-04 10:37         ` Richard Biener
       [not found]           ` <87vbcvjg1y.fsf@e105548-lin.cambridge.arm.com>
@ 2015-08-04 16:49           ` Kumar, Venkataramanan
  2015-08-05  8:51             ` Richard Biener
  2015-08-05 11:41             ` Richard Biener
  1 sibling, 2 replies; 16+ messages in thread
From: Kumar, Venkataramanan @ 2015-08-04 16:49 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, Jakub Jelinek, gcc-patches

[-- Attachment #1: Type: text/plain, Size: 4953 bytes --]

Hi Richard,


> -----Original Message-----
> From: Richard Biener [mailto:richard.guenther@gmail.com]
> Sent: Tuesday, August 04, 2015 4:07 PM
> To: Kumar, Venkataramanan
> Cc: Jeff Law; Jakub Jelinek; gcc-patches@gcc.gnu.org
> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with
> power 2 integer constant.
> 
> On Tue, Aug 4, 2015 at 10:52 AM, Kumar, Venkataramanan
> <Venkataramanan.Kumar@amd.com> wrote:
> > Hi Jeff,
> >
> >> -----Original Message-----
> >> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
> >> owner@gcc.gnu.org] On Behalf Of Jeff Law
> >> Sent: Monday, August 03, 2015 11:42 PM
> >> To: Kumar, Venkataramanan; Jakub Jelinek
> >> Cc: Richard Beiner (richard.guenther@gmail.com);
> >> gcc-patches@gcc.gnu.org
> >> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult
> >> expr with power 2 integer constant.
> >>
> >> On 08/02/2015 05:03 AM, Kumar, Venkataramanan wrote:
> >> > Hi Jakub,
> >> >
> >> > Thank you for reviewing the patch.
> >> >
> >> > I have incorporated your comments in the attached patch.
> >> Note Jakub is on PTO for the next 3 weeks.
> >
> >  Thank you for this information.
> >
> >>
> >>
> >> >
> >> >
> >> >
> >> > vectorize_mults_via_shift.diff.txt
> >> >
> >> >
> >> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
> >> > b/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
> >> Jakub would probably like more testcases :-)
> >>
> >> The most obvious thing to test would be other shift factors.
> >>
> >> A negative test to verify we don't try to turn a multiply by
> >> non-constant or multiply by a constant that is not a power of 2 into shifts.
> >
> > I have added negative test in the attached patch.
> >
> >
> >>
> >> [ Would it make sense, for example, to turn a multiply by 3 into a
> >> shift-add sequence?  As Jakub said, choose_mult_variant can be your
> >> friend. ]
> >
> > Yes I will do that in a follow up patch.
> >
> > The new change log becomes
> >
> > gcc/ChangeLog
> > 2015-08-04  Venkataramanan Kumar
> <Venkataramanan.kumar@amd.com>
> >      * tree-vect-patterns.c (vect_recog_mult_pattern): New function for
> vectorizing
> >         multiplication patterns.
> >      * tree-vectorizer.h: Adjust the number of patterns.
> >
> > gcc/testsuite/ChangeLog
> > 2015-08-04  Venkataramanan Kumar
> <Venkataramanan.kumar@amd.com>
> >      * gcc.dg/vect/vect-mult-pattern-1.c: New
> >     * gcc.dg/vect/vect-mult-pattern-2.c: New
> >
> > Bootstrapped and reg tested on aarch64-unknown-linux-gnu.
> >
> > Ok for trunk ?
> 
> +  if (TREE_CODE (oprnd0) != SSA_NAME
> +      || TREE_CODE (oprnd1) != INTEGER_CST
> +      || TREE_CODE (itype) != INTEGER_TYPE
> 
> INTEGRAL_TYPE_P (itype)
> 
> +  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);  if
> + (!optab
> +      || optab_handler (optab, TYPE_MODE (vectype)) ==
> CODE_FOR_nothing)
> +       return NULL;
> +
> 
> indent of the return stmt looks wrong
> 
> +  /* Handle constant operands that are postive or negative powers of 2.
> + */  if ( wi::exact_log2 (oprnd1) != -1  ||
> +       wi::exact_log2 (wi::neg (oprnd1)) != -1)
> 
> no space after (, || goes to the next line.
> 
> +    {
> +      tree shift;
> +
> +      if (wi::exact_log2 (oprnd1) != -1)
> 
> please cache wi::exact_log2
> 
> in fact the first if () looks redundant if you simply put an else return NULL
> after a else if (wi::exact_log2 (wi::neg (oprnd1)) != -1)
> 
> Note that the issue with INT_MIN is that wi::neg (INT_MIN) is INT_MIN
> again, but it seems that wi::exact_log2 returns -1 in that case so you are fine
> (and in fact not handling this case).
> 

I have updated your review comments in the attached patch. 

For the INT_MIN case, I am getting  vectorized output with the patch.   I believe x86_64 also vectorizes but does not negates the results. 

#include <limits.h>
unsigned long int  __attribute__ ((aligned (64)))arr[100];

int i;
#if 1
void test_vector_shifts()
{
        for(i=0; i<=99;i++)
        arr[i]=arr[i] * INT_MIN;
}
#endif

void test_vectorshift_via_mul()
{
        for(i=0; i<=99;i++)
        arr[i]=arr[i]*(-INT_MIN);

}

Before 
---------
        ldr     x1, [x0]
        neg     x1, x1, lsl 31
        str     x1, [x0], 8
        cmp     x0, x2

After 
-------
        ldr     q0, [x0]
        shl     v0.2d, v0.2d, 31
        neg     v0.2d, v0.2d
        str     q0, [x0], 16
        cmp     x1, x0

is this fine ?  

 > Thanks,
> Richard.
> 
> >>
> >>
> >>
> >> > @@ -2147,6 +2152,140 @@ vect_recog_vector_vector_shift_pattern
> >> (vec<gimple> *stmts,
> >> >     return pattern_stmt;
> >> >   }
> >> >
> >> > +/* Detect multiplication by constant which are postive or
> >> > +negatives of power 2,
> >> s/postive/positive/
> >>
> >>
> >> Jeff
> >
> > Regards,
> > Venkat.
> >

[-- Attachment #2: vectorize_mults_via_shift_latest.diff.txt --]
[-- Type: text/plain, Size: 7843 bytes --]

diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-1.c b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-1.c
new file mode 100644
index 0000000..764d0e3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-1.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+unsigned  long int __attribute__ ((aligned (64)))arr[100];
+int i;
+
+void test_for_vectorshifts_via_mul_with_power2_const ()
+{
+  for (i=0; i<=99; i++)
+    arr[i] = arr[i] * 4;
+}
+
+void test_for_vectorshifts_via_mul_with_negative_power2_const ()
+{
+  for (i=0; i<=99; i++)
+    arr[i] = arr[i] * (-4);
+}
+
+/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect"  {target  { ! { vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-times "vect_recog_mult_pattern: detected" 2 "vect" {target  { ! { vect_int_mult } } } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-2.c b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-2.c
new file mode 100644
index 0000000..77e8cff
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/vect-mult-pattern-2.c
@@ -0,0 +1,28 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target vect_int } */
+/* { dg-require-effective-target vect_shift } */
+
+unsigned  long int __attribute__ ((aligned (64)))arr[100];
+int i;
+
+void negative_test_for_vectorshifts_via_mul_with_const ()
+{
+  for (i=0; i<=99; i++)
+    arr[i] = arr[i] * 123;
+}
+
+void negative_test_for_vectorshifts_via_mul_with_negative_const ()
+{
+  for (i=0; i<=99; i++)
+    arr[i] = arr[i] * (-123);
+}
+
+void negative_test_for_vectorshifts_via_mul_with_varable (int x)
+{
+  for (i=0; i<=99; i++)
+    arr[i] = arr[i] * x;
+}
+
+
+/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 3 "vect"  {target  { ! { vect_int_mult } } } } } */
+/* { dg-final { scan-tree-dump-not "vect_recog_mult_pattern: detected" "vect" {target  { ! { vect_int_mult } } } } } */
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index f034635..bc3117d 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -76,6 +76,10 @@ static gimple vect_recog_vector_vector_shift_pattern (vec<gimple> *,
 						      tree *, tree *);
 static gimple vect_recog_divmod_pattern (vec<gimple> *,
 					 tree *, tree *);
+
+static gimple vect_recog_mult_pattern (vec<gimple> *,
+				       tree *, tree *);
+
 static gimple vect_recog_mixed_size_cond_pattern (vec<gimple> *,
 						  tree *, tree *);
 static gimple vect_recog_bool_pattern (vec<gimple> *, tree *, tree *);
@@ -90,6 +94,7 @@ static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
 	vect_recog_rotate_pattern,
 	vect_recog_vector_vector_shift_pattern,
 	vect_recog_divmod_pattern,
+	vect_recog_mult_pattern,
 	vect_recog_mixed_size_cond_pattern,
 	vect_recog_bool_pattern};
 
@@ -2147,6 +2152,140 @@ vect_recog_vector_vector_shift_pattern (vec<gimple> *stmts,
   return pattern_stmt;
 }
 
+/* Detect multiplication by constant which are postive or negatives of power 2,
+   and convert them to shift patterns.
+
+   Mult with constants that are postive power of two.
+   type a_t;
+   type b_t
+   S1: b_t = a_t * n
+
+   or
+
+   Mult with constants that are negative power of two.
+   S2: b_t = a_t * -n
+
+   Input/Output:
+
+   STMTS: Contains a stmt from which the pattern search begins,
+   i.e. the mult stmt.  Convert the mult operation to LSHIFT if
+   constant operand is a power of 2.
+   type a_t, b_t
+   S1': b_t = a_t << log2 (n)
+
+   Convert the mult operation to LSHIFT and followed by a NEGATE
+   if constant operand is a negative power of 2.
+   type a_t, b_t, res_T;
+   S2': b_t = a_t << log2 (n)
+   S3': res_T  = - (b_t)
+
+ Output:
+
+  * TYPE_IN: The type of the input arguments to the pattern.
+
+  * TYPE_OUT: The type of the output of this pattern.
+
+  * Return value: A new stmt that will be used to replace the multiplication
+    S1 or S2 stmt.  */
+
+static gimple
+vect_recog_mult_pattern (vec<gimple> *stmts,
+			 tree *type_in, tree *type_out)
+{
+  gimple last_stmt = stmts->pop ();
+  tree oprnd0, oprnd1, vectype, itype;
+  gimple pattern_stmt, def_stmt;
+  optab optab;
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+  int power2_val, power2_neg_val;
+  tree shift;
+
+  if (!is_gimple_assign (last_stmt))
+    return NULL;
+
+  if (gimple_assign_rhs_code (last_stmt) != MULT_EXPR)
+    return NULL;
+
+  oprnd0 = gimple_assign_rhs1 (last_stmt);
+  oprnd1 = gimple_assign_rhs2 (last_stmt);
+  itype = TREE_TYPE (oprnd0);
+
+  if (TREE_CODE (oprnd0) != SSA_NAME
+      || TREE_CODE (oprnd1) != INTEGER_CST
+      || !INTEGRAL_TYPE_P (itype)
+      || TYPE_PRECISION (itype) != GET_MODE_PRECISION (TYPE_MODE (itype)))
+    return NULL;
+
+  vectype = get_vectype_for_scalar_type (itype);
+  if (vectype == NULL_TREE)
+    return NULL;
+
+  /* If the target can handle vectorized multiplication natively,
+     don't attempt to optimize this.  */
+  optab = optab_for_tree_code (MULT_EXPR, vectype, optab_default);
+  if (optab != unknown_optab)
+    {
+      machine_mode vec_mode = TYPE_MODE (vectype);
+      int icode = (int) optab_handler (optab, vec_mode);
+      if (icode != CODE_FOR_nothing)
+	return NULL;
+    }
+
+  /* If target cannot handle vector left shift then we cannot
+     optimize and bail out.  */
+  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
+  if (!optab
+      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+    return NULL;
+
+  power2_val = wi::exact_log2 (oprnd1);
+  power2_neg_val = wi::exact_log2 (wi::neg (oprnd1));
+
+  /* Handle constant operands that are postive or negative powers of 2.  */
+  if (power2_val != -1)
+    {
+      shift = build_int_cst (itype, power2_val);
+      pattern_stmt
+	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+			       LSHIFT_EXPR, oprnd0, shift);
+    }
+  else if (power2_neg_val != -1)
+    {
+      /* If the target cannot handle vector NEGATE then we cannot
+	 do the optimization.  */
+      optab = optab_for_tree_code (NEGATE_EXPR, vectype, optab_vector);
+      if (!optab
+	  || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
+	return NULL;
+
+      shift = build_int_cst (itype, power2_neg_val);
+      def_stmt
+	= gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+			       LSHIFT_EXPR, oprnd0, shift);
+      new_pattern_def_seq (stmt_vinfo, def_stmt);
+      pattern_stmt
+	 = gimple_build_assign (vect_recog_temp_ssa_var (itype, NULL),
+				NEGATE_EXPR, gimple_assign_lhs (def_stmt));
+    }
+  else
+    return NULL;
+
+  /* Pattern detected.  */
+  if (dump_enabled_p ())
+    dump_printf_loc (MSG_NOTE, vect_location,
+		     "vect_recog_mult_pattern: detected:\n");
+
+  if (dump_enabled_p ())
+    dump_gimple_stmt_loc (MSG_NOTE, vect_location, TDF_SLIM,
+			  pattern_stmt,0);
+
+  stmts->safe_push (last_stmt);
+  *type_in = vectype;
+  *type_out = vectype;
+
+  return pattern_stmt;
+}
+
 /* Detect a signed division by a constant that wouldn't be
    otherwise vectorized:
 
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index dfa8795..b490af4 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -1132,7 +1132,7 @@ extern void vect_slp_transform_bb (basic_block);
    Additional pattern recognition functions can (and will) be added
    in the future.  */
 typedef gimple (* vect_recog_func_ptr) (vec<gimple> *, tree *, tree *);
-#define NUM_PATTERNS 12
+#define NUM_PATTERNS 13
 void vect_pattern_recog (loop_vec_info, bb_vec_info);
 
 /* In tree-vectorizer.c.  */

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with power 2 integer constant.
  2015-08-04 16:49           ` Kumar, Venkataramanan
@ 2015-08-05  8:51             ` Richard Biener
  2015-08-05  9:34               ` Kumar, Venkataramanan
  2015-08-05 11:41             ` Richard Biener
  1 sibling, 1 reply; 16+ messages in thread
From: Richard Biener @ 2015-08-05  8:51 UTC (permalink / raw)
  To: Kumar, Venkataramanan; +Cc: Jeff Law, Jakub Jelinek, gcc-patches

On Tue, Aug 4, 2015 at 6:49 PM, Kumar, Venkataramanan
<Venkataramanan.Kumar@amd.com> wrote:
> Hi Richard,
>
>
>> -----Original Message-----
>> From: Richard Biener [mailto:richard.guenther@gmail.com]
>> Sent: Tuesday, August 04, 2015 4:07 PM
>> To: Kumar, Venkataramanan
>> Cc: Jeff Law; Jakub Jelinek; gcc-patches@gcc.gnu.org
>> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with
>> power 2 integer constant.
>>
>> On Tue, Aug 4, 2015 at 10:52 AM, Kumar, Venkataramanan
>> <Venkataramanan.Kumar@amd.com> wrote:
>> > Hi Jeff,
>> >
>> >> -----Original Message-----
>> >> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>> >> owner@gcc.gnu.org] On Behalf Of Jeff Law
>> >> Sent: Monday, August 03, 2015 11:42 PM
>> >> To: Kumar, Venkataramanan; Jakub Jelinek
>> >> Cc: Richard Beiner (richard.guenther@gmail.com);
>> >> gcc-patches@gcc.gnu.org
>> >> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult
>> >> expr with power 2 integer constant.
>> >>
>> >> On 08/02/2015 05:03 AM, Kumar, Venkataramanan wrote:
>> >> > Hi Jakub,
>> >> >
>> >> > Thank you for reviewing the patch.
>> >> >
>> >> > I have incorporated your comments in the attached patch.
>> >> Note Jakub is on PTO for the next 3 weeks.
>> >
>> >  Thank you for this information.
>> >
>> >>
>> >>
>> >> >
>> >> >
>> >> >
>> >> > vectorize_mults_via_shift.diff.txt
>> >> >
>> >> >
>> >> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
>> >> > b/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
>> >> Jakub would probably like more testcases :-)
>> >>
>> >> The most obvious thing to test would be other shift factors.
>> >>
>> >> A negative test to verify we don't try to turn a multiply by
>> >> non-constant or multiply by a constant that is not a power of 2 into shifts.
>> >
>> > I have added negative test in the attached patch.
>> >
>> >
>> >>
>> >> [ Would it make sense, for example, to turn a multiply by 3 into a
>> >> shift-add sequence?  As Jakub said, choose_mult_variant can be your
>> >> friend. ]
>> >
>> > Yes I will do that in a follow up patch.
>> >
>> > The new change log becomes
>> >
>> > gcc/ChangeLog
>> > 2015-08-04  Venkataramanan Kumar
>> <Venkataramanan.kumar@amd.com>
>> >      * tree-vect-patterns.c (vect_recog_mult_pattern): New function for
>> vectorizing
>> >         multiplication patterns.
>> >      * tree-vectorizer.h: Adjust the number of patterns.
>> >
>> > gcc/testsuite/ChangeLog
>> > 2015-08-04  Venkataramanan Kumar
>> <Venkataramanan.kumar@amd.com>
>> >      * gcc.dg/vect/vect-mult-pattern-1.c: New
>> >     * gcc.dg/vect/vect-mult-pattern-2.c: New
>> >
>> > Bootstrapped and reg tested on aarch64-unknown-linux-gnu.
>> >
>> > Ok for trunk ?
>>
>> +  if (TREE_CODE (oprnd0) != SSA_NAME
>> +      || TREE_CODE (oprnd1) != INTEGER_CST
>> +      || TREE_CODE (itype) != INTEGER_TYPE
>>
>> INTEGRAL_TYPE_P (itype)
>>
>> +  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);  if
>> + (!optab
>> +      || optab_handler (optab, TYPE_MODE (vectype)) ==
>> CODE_FOR_nothing)
>> +       return NULL;
>> +
>>
>> indent of the return stmt looks wrong
>>
>> +  /* Handle constant operands that are postive or negative powers of 2.
>> + */  if ( wi::exact_log2 (oprnd1) != -1  ||
>> +       wi::exact_log2 (wi::neg (oprnd1)) != -1)
>>
>> no space after (, || goes to the next line.
>>
>> +    {
>> +      tree shift;
>> +
>> +      if (wi::exact_log2 (oprnd1) != -1)
>>
>> please cache wi::exact_log2
>>
>> in fact the first if () looks redundant if you simply put an else return NULL
>> after a else if (wi::exact_log2 (wi::neg (oprnd1)) != -1)
>>
>> Note that the issue with INT_MIN is that wi::neg (INT_MIN) is INT_MIN
>> again, but it seems that wi::exact_log2 returns -1 in that case so you are fine
>> (and in fact not handling this case).
>>
>
> I have updated your review comments in the attached patch.
>
> For the INT_MIN case, I am getting  vectorized output with the patch.   I believe x86_64 also vectorizes but does not negates the results.
>
> #include <limits.h>
> unsigned long int  __attribute__ ((aligned (64)))arr[100];
>
> int i;
> #if 1
> void test_vector_shifts()
> {
>         for(i=0; i<=99;i++)
>         arr[i]=arr[i] * INT_MIN;
> }
> #endif
>
> void test_vectorshift_via_mul()
> {
>         for(i=0; i<=99;i++)
>         arr[i]=arr[i]*(-INT_MIN);
>
> }
>
> Before
> ---------
>         ldr     x1, [x0]
>         neg     x1, x1, lsl 31
>         str     x1, [x0], 8
>         cmp     x0, x2
>
> After
> -------
>         ldr     q0, [x0]
>         shl     v0.2d, v0.2d, 31
>         neg     v0.2d, v0.2d
>         str     q0, [x0], 16
>         cmp     x1, x0
>
> is this fine ?

The interesting case is of course LONG_MIN if you are vectorizing a
long multiplication.
Also check with arr[] being 'long', not 'unsigned long'.  Note that
with 'long' doing
arr[i]*(-LONG_MIN) invokes undefined behavior.

Richard.

>  > Thanks,
>> Richard.
>>
>> >>
>> >>
>> >>
>> >> > @@ -2147,6 +2152,140 @@ vect_recog_vector_vector_shift_pattern
>> >> (vec<gimple> *stmts,
>> >> >     return pattern_stmt;
>> >> >   }
>> >> >
>> >> > +/* Detect multiplication by constant which are postive or
>> >> > +negatives of power 2,
>> >> s/postive/positive/
>> >>
>> >>
>> >> Jeff
>> >
>> > Regards,
>> > Venkat.
>> >

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [RFC] [Patch]: Try and vectorize with shift for mult expr with power 2 integer constant.
  2015-08-05  8:51             ` Richard Biener
@ 2015-08-05  9:34               ` Kumar, Venkataramanan
  0 siblings, 0 replies; 16+ messages in thread
From: Kumar, Venkataramanan @ 2015-08-05  9:34 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, Jakub Jelinek, gcc-patches

Hi Richard,

> -----Original Message-----
> From: Richard Biener [mailto:richard.guenther@gmail.com]
> Sent: Wednesday, August 05, 2015 2:21 PM
> To: Kumar, Venkataramanan
> Cc: Jeff Law; Jakub Jelinek; gcc-patches@gcc.gnu.org
> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with
> power 2 integer constant.
> 
> On Tue, Aug 4, 2015 at 6:49 PM, Kumar, Venkataramanan
> <Venkataramanan.Kumar@amd.com> wrote:
> > Hi Richard,
> >
> >
> >> -----Original Message-----
> >> From: Richard Biener [mailto:richard.guenther@gmail.com]
> >> Sent: Tuesday, August 04, 2015 4:07 PM
> >> To: Kumar, Venkataramanan
> >> Cc: Jeff Law; Jakub Jelinek; gcc-patches@gcc.gnu.org
> >> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult
> >> expr with power 2 integer constant.
> >>
> >> On Tue, Aug 4, 2015 at 10:52 AM, Kumar, Venkataramanan
> >> <Venkataramanan.Kumar@amd.com> wrote:
> >> > Hi Jeff,
> >> >
> >> >> -----Original Message-----
> >> >> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
> >> >> owner@gcc.gnu.org] On Behalf Of Jeff Law
> >> >> Sent: Monday, August 03, 2015 11:42 PM
> >> >> To: Kumar, Venkataramanan; Jakub Jelinek
> >> >> Cc: Richard Beiner (richard.guenther@gmail.com);
> >> >> gcc-patches@gcc.gnu.org
> >> >> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult
> >> >> expr with power 2 integer constant.
> >> >>
> >> >> On 08/02/2015 05:03 AM, Kumar, Venkataramanan wrote:
> >> >> > Hi Jakub,
> >> >> >
> >> >> > Thank you for reviewing the patch.
> >> >> >
> >> >> > I have incorporated your comments in the attached patch.
> >> >> Note Jakub is on PTO for the next 3 weeks.
> >> >
> >> >  Thank you for this information.
> >> >
> >> >>
> >> >>
> >> >> >
> >> >> >
> >> >> >
> >> >> > vectorize_mults_via_shift.diff.txt
> >> >> >
> >> >> >
> >> >> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
> >> >> > b/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
> >> >> Jakub would probably like more testcases :-)
> >> >>
> >> >> The most obvious thing to test would be other shift factors.
> >> >>
> >> >> A negative test to verify we don't try to turn a multiply by
> >> >> non-constant or multiply by a constant that is not a power of 2 into
> shifts.
> >> >
> >> > I have added negative test in the attached patch.
> >> >
> >> >
> >> >>
> >> >> [ Would it make sense, for example, to turn a multiply by 3 into a
> >> >> shift-add sequence?  As Jakub said, choose_mult_variant can be
> >> >> your friend. ]
> >> >
> >> > Yes I will do that in a follow up patch.
> >> >
> >> > The new change log becomes
> >> >
> >> > gcc/ChangeLog
> >> > 2015-08-04  Venkataramanan Kumar
> >> <Venkataramanan.kumar@amd.com>
> >> >      * tree-vect-patterns.c (vect_recog_mult_pattern): New function
> >> > for
> >> vectorizing
> >> >         multiplication patterns.
> >> >      * tree-vectorizer.h: Adjust the number of patterns.
> >> >
> >> > gcc/testsuite/ChangeLog
> >> > 2015-08-04  Venkataramanan Kumar
> >> <Venkataramanan.kumar@amd.com>
> >> >      * gcc.dg/vect/vect-mult-pattern-1.c: New
> >> >     * gcc.dg/vect/vect-mult-pattern-2.c: New
> >> >
> >> > Bootstrapped and reg tested on aarch64-unknown-linux-gnu.
> >> >
> >> > Ok for trunk ?
> >>
> >> +  if (TREE_CODE (oprnd0) != SSA_NAME
> >> +      || TREE_CODE (oprnd1) != INTEGER_CST
> >> +      || TREE_CODE (itype) != INTEGER_TYPE
> >>
> >> INTEGRAL_TYPE_P (itype)
> >>
> >> +  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
> >> + if (!optab
> >> +      || optab_handler (optab, TYPE_MODE (vectype)) ==
> >> CODE_FOR_nothing)
> >> +       return NULL;
> >> +
> >>
> >> indent of the return stmt looks wrong
> >>
> >> +  /* Handle constant operands that are postive or negative powers of 2.
> >> + */  if ( wi::exact_log2 (oprnd1) != -1  ||
> >> +       wi::exact_log2 (wi::neg (oprnd1)) != -1)
> >>
> >> no space after (, || goes to the next line.
> >>
> >> +    {
> >> +      tree shift;
> >> +
> >> +      if (wi::exact_log2 (oprnd1) != -1)
> >>
> >> please cache wi::exact_log2
> >>
> >> in fact the first if () looks redundant if you simply put an else
> >> return NULL after a else if (wi::exact_log2 (wi::neg (oprnd1)) != -1)
> >>
> >> Note that the issue with INT_MIN is that wi::neg (INT_MIN) is INT_MIN
> >> again, but it seems that wi::exact_log2 returns -1 in that case so
> >> you are fine (and in fact not handling this case).
> >>
> >
> > I have updated your review comments in the attached patch.
> >
> > For the INT_MIN case, I am getting  vectorized output with the patch.   I
> believe x86_64 also vectorizes but does not negates the results.
> >
> > #include <limits.h>
> > unsigned long int  __attribute__ ((aligned (64)))arr[100];
> >
> > int i;
> > #if 1
> > void test_vector_shifts()
> > {
> >         for(i=0; i<=99;i++)
> >         arr[i]=arr[i] * INT_MIN;
> > }
> > #endif
> >
> > void test_vectorshift_via_mul()
> > {
> >         for(i=0; i<=99;i++)
> >         arr[i]=arr[i]*(-INT_MIN);
> >
> > }
> >
> > Before
> > ---------
> >         ldr     x1, [x0]
> >         neg     x1, x1, lsl 31
> >         str     x1, [x0], 8
> >         cmp     x0, x2
> >
> > After
> > -------
> >         ldr     q0, [x0]
> >         shl     v0.2d, v0.2d, 31
> >         neg     v0.2d, v0.2d
> >         str     q0, [x0], 16
> >         cmp     x1, x0
> >
> > is this fine ?
> 
> The interesting case is of course LONG_MIN if you are vectorizing a long
> multiplication.
> Also check with arr[] being 'long', not 'unsigned long'.  Note that with 'long'
> doing
> arr[i]*(-LONG_MIN) invokes undefined behavior.
> 
> Richard.
 
I checked this case 

#include<stdlib.h>
#include <limits.h>
long int __attribute__ ((aligned (64))) arr[100]={1};
long int i;
void test_vector_shifts();
void main()
{
        test_vector_shifts();
        if(!(arr[0] == (long int)(1*-LONG_MIN))) abort();
        printf("%x, %x", arr[0], (long int)(1*-LONG_MIN));
}

void test_vector_shifts()
{
        for(i=0; i<=99;i++)
        arr[i]=arr[i]*-LONG_MIN;
}

X86_64 vectorizes this and later converts to shifts 

.L2:
        vmovdqa (%rax), %xmm0
        addq    $16, %rax
        vpsllq  $63, %xmm0, %xmm0
        vmovaps %xmm0, -16(%rax)
        cmpq    $arr+800, %rax
        jne     .L2

On Aarch64 without my patch 
.L2:
        ldr     x1, [x0]
        lsl     x1, x1, 63
        str     x1, [x0], 8
        cmp     x0, x2
        bne     .L2

With my patch 

.L2:
        ldr     q0, [x0]
        shl     v0.2d, v0.2d, 63
        str     q0, [x0], 16
        cmp     x1, x0
        bne     .L2

All these cases outputs 0 for the above test case.


> 
> >  > Thanks,
> >> Richard.
> >>
> >> >>
> >> >>
> >> >>
> >> >> > @@ -2147,6 +2152,140 @@ vect_recog_vector_vector_shift_pattern
> >> >> (vec<gimple> *stmts,
> >> >> >     return pattern_stmt;
> >> >> >   }
> >> >> >
> >> >> > +/* Detect multiplication by constant which are postive or
> >> >> > +negatives of power 2,
> >> >> s/postive/positive/
> >> >>
> >> >>
> >> >> Jeff
> >> >
> >> > Regards,
> >> > Venkat.
> >> >

Regards,
Venkat.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with power 2 integer constant.
  2015-08-04 16:49           ` Kumar, Venkataramanan
  2015-08-05  8:51             ` Richard Biener
@ 2015-08-05 11:41             ` Richard Biener
  2015-08-06 12:04               ` Kumar, Venkataramanan
  1 sibling, 1 reply; 16+ messages in thread
From: Richard Biener @ 2015-08-05 11:41 UTC (permalink / raw)
  To: Kumar, Venkataramanan; +Cc: Jeff Law, Jakub Jelinek, gcc-patches

On Tue, Aug 4, 2015 at 6:49 PM, Kumar, Venkataramanan
<Venkataramanan.Kumar@amd.com> wrote:
> Hi Richard,
>
>
>> -----Original Message-----
>> From: Richard Biener [mailto:richard.guenther@gmail.com]
>> Sent: Tuesday, August 04, 2015 4:07 PM
>> To: Kumar, Venkataramanan
>> Cc: Jeff Law; Jakub Jelinek; gcc-patches@gcc.gnu.org
>> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with
>> power 2 integer constant.
>>
>> On Tue, Aug 4, 2015 at 10:52 AM, Kumar, Venkataramanan
>> <Venkataramanan.Kumar@amd.com> wrote:
>> > Hi Jeff,
>> >
>> >> -----Original Message-----
>> >> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
>> >> owner@gcc.gnu.org] On Behalf Of Jeff Law
>> >> Sent: Monday, August 03, 2015 11:42 PM
>> >> To: Kumar, Venkataramanan; Jakub Jelinek
>> >> Cc: Richard Beiner (richard.guenther@gmail.com);
>> >> gcc-patches@gcc.gnu.org
>> >> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult
>> >> expr with power 2 integer constant.
>> >>
>> >> On 08/02/2015 05:03 AM, Kumar, Venkataramanan wrote:
>> >> > Hi Jakub,
>> >> >
>> >> > Thank you for reviewing the patch.
>> >> >
>> >> > I have incorporated your comments in the attached patch.
>> >> Note Jakub is on PTO for the next 3 weeks.
>> >
>> >  Thank you for this information.
>> >
>> >>
>> >>
>> >> >
>> >> >
>> >> >
>> >> > vectorize_mults_via_shift.diff.txt
>> >> >
>> >> >
>> >> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
>> >> > b/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
>> >> Jakub would probably like more testcases :-)
>> >>
>> >> The most obvious thing to test would be other shift factors.
>> >>
>> >> A negative test to verify we don't try to turn a multiply by
>> >> non-constant or multiply by a constant that is not a power of 2 into shifts.
>> >
>> > I have added negative test in the attached patch.
>> >
>> >
>> >>
>> >> [ Would it make sense, for example, to turn a multiply by 3 into a
>> >> shift-add sequence?  As Jakub said, choose_mult_variant can be your
>> >> friend. ]
>> >
>> > Yes I will do that in a follow up patch.
>> >
>> > The new change log becomes
>> >
>> > gcc/ChangeLog
>> > 2015-08-04  Venkataramanan Kumar
>> <Venkataramanan.kumar@amd.com>
>> >      * tree-vect-patterns.c (vect_recog_mult_pattern): New function for
>> vectorizing
>> >         multiplication patterns.
>> >      * tree-vectorizer.h: Adjust the number of patterns.
>> >
>> > gcc/testsuite/ChangeLog
>> > 2015-08-04  Venkataramanan Kumar
>> <Venkataramanan.kumar@amd.com>
>> >      * gcc.dg/vect/vect-mult-pattern-1.c: New
>> >     * gcc.dg/vect/vect-mult-pattern-2.c: New
>> >
>> > Bootstrapped and reg tested on aarch64-unknown-linux-gnu.
>> >
>> > Ok for trunk ?
>>
>> +  if (TREE_CODE (oprnd0) != SSA_NAME
>> +      || TREE_CODE (oprnd1) != INTEGER_CST
>> +      || TREE_CODE (itype) != INTEGER_TYPE
>>
>> INTEGRAL_TYPE_P (itype)
>>
>> +  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);  if
>> + (!optab
>> +      || optab_handler (optab, TYPE_MODE (vectype)) ==
>> CODE_FOR_nothing)
>> +       return NULL;
>> +
>>
>> indent of the return stmt looks wrong
>>
>> +  /* Handle constant operands that are postive or negative powers of 2.
>> + */  if ( wi::exact_log2 (oprnd1) != -1  ||
>> +       wi::exact_log2 (wi::neg (oprnd1)) != -1)
>>
>> no space after (, || goes to the next line.
>>
>> +    {
>> +      tree shift;
>> +
>> +      if (wi::exact_log2 (oprnd1) != -1)
>>
>> please cache wi::exact_log2
>>
>> in fact the first if () looks redundant if you simply put an else return NULL
>> after a else if (wi::exact_log2 (wi::neg (oprnd1)) != -1)
>>
>> Note that the issue with INT_MIN is that wi::neg (INT_MIN) is INT_MIN
>> again, but it seems that wi::exact_log2 returns -1 in that case so you are fine
>> (and in fact not handling this case).
>>
>
> I have updated your review comments in the attached patch.
>
> For the INT_MIN case, I am getting  vectorized output with the patch.   I believe x86_64 also vectorizes but does not negates the results.
>
> #include <limits.h>
> unsigned long int  __attribute__ ((aligned (64)))arr[100];
>
> int i;
> #if 1
> void test_vector_shifts()
> {
>         for(i=0; i<=99;i++)
>         arr[i]=arr[i] * INT_MIN;
> }
> #endif
>
> void test_vectorshift_via_mul()
> {
>         for(i=0; i<=99;i++)
>         arr[i]=arr[i]*(-INT_MIN);
>
> }
>
> Before
> ---------
>         ldr     x1, [x0]
>         neg     x1, x1, lsl 31
>         str     x1, [x0], 8
>         cmp     x0, x2
>
> After
> -------
>         ldr     q0, [x0]
>         shl     v0.2d, v0.2d, 31
>         neg     v0.2d, v0.2d
>         str     q0, [x0], 16
>         cmp     x1, x0
>
> is this fine ?

Btw, the patch is ok for trunk.  It looks like it does the correct
thing for LONG_MIN.

Thanks,
Richard.

>  > Thanks,
>> Richard.
>>
>> >>
>> >>
>> >>
>> >> > @@ -2147,6 +2152,140 @@ vect_recog_vector_vector_shift_pattern
>> >> (vec<gimple> *stmts,
>> >> >     return pattern_stmt;
>> >> >   }
>> >> >
>> >> > +/* Detect multiplication by constant which are postive or
>> >> > +negatives of power 2,
>> >> s/postive/positive/
>> >>
>> >>
>> >> Jeff
>> >
>> > Regards,
>> > Venkat.
>> >

^ permalink raw reply	[flat|nested] 16+ messages in thread

* RE: [RFC] [Patch]: Try and vectorize with shift for mult expr with power 2 integer constant.
  2015-08-05 11:41             ` Richard Biener
@ 2015-08-06 12:04               ` Kumar, Venkataramanan
  0 siblings, 0 replies; 16+ messages in thread
From: Kumar, Venkataramanan @ 2015-08-06 12:04 UTC (permalink / raw)
  To: Richard Biener; +Cc: Jeff Law, Jakub Jelinek, gcc-patches

Hi Richard, 

> -----Original Message-----
> From: Richard Biener [mailto:richard.guenther@gmail.com]
> Sent: Wednesday, August 05, 2015 5:11 PM
> To: Kumar, Venkataramanan
> Cc: Jeff Law; Jakub Jelinek; gcc-patches@gcc.gnu.org
> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult expr with
> power 2 integer constant.
> 
> On Tue, Aug 4, 2015 at 6:49 PM, Kumar, Venkataramanan
> <Venkataramanan.Kumar@amd.com> wrote:
> > Hi Richard,
> >
> >
> >> -----Original Message-----
> >> From: Richard Biener [mailto:richard.guenther@gmail.com]
> >> Sent: Tuesday, August 04, 2015 4:07 PM
> >> To: Kumar, Venkataramanan
> >> Cc: Jeff Law; Jakub Jelinek; gcc-patches@gcc.gnu.org
> >> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult
> >> expr with power 2 integer constant.
> >>
> >> On Tue, Aug 4, 2015 at 10:52 AM, Kumar, Venkataramanan
> >> <Venkataramanan.Kumar@amd.com> wrote:
> >> > Hi Jeff,
> >> >
> >> >> -----Original Message-----
> >> >> From: gcc-patches-owner@gcc.gnu.org [mailto:gcc-patches-
> >> >> owner@gcc.gnu.org] On Behalf Of Jeff Law
> >> >> Sent: Monday, August 03, 2015 11:42 PM
> >> >> To: Kumar, Venkataramanan; Jakub Jelinek
> >> >> Cc: Richard Beiner (richard.guenther@gmail.com);
> >> >> gcc-patches@gcc.gnu.org
> >> >> Subject: Re: [RFC] [Patch]: Try and vectorize with shift for mult
> >> >> expr with power 2 integer constant.
> >> >>
> >> >> On 08/02/2015 05:03 AM, Kumar, Venkataramanan wrote:
> >> >> > Hi Jakub,
> >> >> >
> >> >> > Thank you for reviewing the patch.
> >> >> >
> >> >> > I have incorporated your comments in the attached patch.
> >> >> Note Jakub is on PTO for the next 3 weeks.
> >> >
> >> >  Thank you for this information.
> >> >
> >> >>
> >> >>
> >> >> >
> >> >> >
> >> >> >
> >> >> > vectorize_mults_via_shift.diff.txt
> >> >> >
> >> >> >
> >> >> > diff --git a/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
> >> >> > b/gcc/testsuite/gcc.dg/vect/vect-mult-patterns.c
> >> >> Jakub would probably like more testcases :-)
> >> >>
> >> >> The most obvious thing to test would be other shift factors.
> >> >>
> >> >> A negative test to verify we don't try to turn a multiply by
> >> >> non-constant or multiply by a constant that is not a power of 2 into
> shifts.
> >> >
> >> > I have added negative test in the attached patch.
> >> >
> >> >
> >> >>
> >> >> [ Would it make sense, for example, to turn a multiply by 3 into a
> >> >> shift-add sequence?  As Jakub said, choose_mult_variant can be
> >> >> your friend. ]
> >> >
> >> > Yes I will do that in a follow up patch.
> >> >
> >> > The new change log becomes
> >> >
> >> > gcc/ChangeLog
> >> > 2015-08-04  Venkataramanan Kumar
> >> <Venkataramanan.kumar@amd.com>
> >> >      * tree-vect-patterns.c (vect_recog_mult_pattern): New function
> >> > for
> >> vectorizing
> >> >         multiplication patterns.
> >> >      * tree-vectorizer.h: Adjust the number of patterns.
> >> >
> >> > gcc/testsuite/ChangeLog
> >> > 2015-08-04  Venkataramanan Kumar
> >> <Venkataramanan.kumar@amd.com>
> >> >      * gcc.dg/vect/vect-mult-pattern-1.c: New
> >> >     * gcc.dg/vect/vect-mult-pattern-2.c: New
> >> >
> >> > Bootstrapped and reg tested on aarch64-unknown-linux-gnu.
> >> >
> >> > Ok for trunk ?
> >>
> >> +  if (TREE_CODE (oprnd0) != SSA_NAME
> >> +      || TREE_CODE (oprnd1) != INTEGER_CST
> >> +      || TREE_CODE (itype) != INTEGER_TYPE
> >>
> >> INTEGRAL_TYPE_P (itype)
> >>
> >> +  optab = optab_for_tree_code (LSHIFT_EXPR, vectype, optab_vector);
> >> + if (!optab
> >> +      || optab_handler (optab, TYPE_MODE (vectype)) ==
> >> CODE_FOR_nothing)
> >> +       return NULL;
> >> +
> >>
> >> indent of the return stmt looks wrong
> >>
> >> +  /* Handle constant operands that are postive or negative powers of 2.
> >> + */  if ( wi::exact_log2 (oprnd1) != -1  ||
> >> +       wi::exact_log2 (wi::neg (oprnd1)) != -1)
> >>
> >> no space after (, || goes to the next line.
> >>
> >> +    {
> >> +      tree shift;
> >> +
> >> +      if (wi::exact_log2 (oprnd1) != -1)
> >>
> >> please cache wi::exact_log2
> >>
> >> in fact the first if () looks redundant if you simply put an else
> >> return NULL after a else if (wi::exact_log2 (wi::neg (oprnd1)) != -1)
> >>
> >> Note that the issue with INT_MIN is that wi::neg (INT_MIN) is INT_MIN
> >> again, but it seems that wi::exact_log2 returns -1 in that case so
> >> you are fine (and in fact not handling this case).
> >>
> >
> > I have updated your review comments in the attached patch.
> >
> > For the INT_MIN case, I am getting  vectorized output with the patch.   I
> believe x86_64 also vectorizes but does not negates the results.
> >
> > #include <limits.h>
> > unsigned long int  __attribute__ ((aligned (64)))arr[100];
> >
> > int i;
> > #if 1
> > void test_vector_shifts()
> > {
> >         for(i=0; i<=99;i++)
> >         arr[i]=arr[i] * INT_MIN;
> > }
> > #endif
> >
> > void test_vectorshift_via_mul()
> > {
> >         for(i=0; i<=99;i++)
> >         arr[i]=arr[i]*(-INT_MIN);
> >
> > }
> >
> > Before
> > ---------
> >         ldr     x1, [x0]
> >         neg     x1, x1, lsl 31
> >         str     x1, [x0], 8
> >         cmp     x0, x2
> >
> > After
> > -------
> >         ldr     q0, [x0]
> >         shl     v0.2d, v0.2d, 31
> >         neg     v0.2d, v0.2d
> >         str     q0, [x0], 16
> >         cmp     x1, x0
> >
> > is this fine ?
> 
> Btw, the patch is ok for trunk.  It looks like it does the correct thing for
> LONG_MIN.
> 
> Thanks,
> Richard.

Thanks you. I have committed the patch after a GCC bootstrap and regression testing  on x86_64-unknown-linux-gnu machine.
Ref: https://gcc.gnu.org/viewcvs/gcc?view=revision&revision=226675
 

> 
> >  > Thanks,
> >> Richard.
> >>
> >> >>
> >> >>
> >> >>
> >> >> > @@ -2147,6 +2152,140 @@ vect_recog_vector_vector_shift_pattern
> >> >> (vec<gimple> *stmts,
> >> >> >     return pattern_stmt;
> >> >> >   }
> >> >> >
> >> >> > +/* Detect multiplication by constant which are postive or
> >> >> > +negatives of power 2,
> >> >> s/postive/positive/
> >> >>
> >> >>
> >> >> Jeff
> >> >
> >> > Regards,
> >> > Venkat.
> >> >

Regards,
Venkat.

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2015-08-06 12:04 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2015-07-28 15:30 [RFC] [Patch]: Try and vectorize with shift for mult expr with power 2 integer constant Kumar, Venkataramanan
2015-07-28 20:24 ` Jakub Jelinek
2015-07-28 20:34   ` Jakub Jelinek
2015-08-02 11:03   ` Kumar, Venkataramanan
2015-08-03 18:11     ` Jeff Law
2015-08-04  8:52       ` Kumar, Venkataramanan
2015-08-04 10:37         ` Richard Biener
     [not found]           ` <87vbcvjg1y.fsf@e105548-lin.cambridge.arm.com>
     [not found]             ` <CAFiYyc3VLXyjmfCuizFVsQ2nLDjyd_Dr=Po1_qyJ9wcrgCmiWA@mail.gmail.com>
2015-08-04 14:22               ` Richard Biener
2015-08-04 14:28                 ` Richard Sandiford
2015-08-04 16:07                   ` Richard Biener
2015-08-04 16:27                     ` Richard Sandiford
2015-08-04 16:49           ` Kumar, Venkataramanan
2015-08-05  8:51             ` Richard Biener
2015-08-05  9:34               ` Kumar, Venkataramanan
2015-08-05 11:41             ` Richard Biener
2015-08-06 12:04               ` Kumar, Venkataramanan

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).