public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* Constant-fold vector comparisons
@ 2012-09-29 15:16 Marc Glisse
  2012-10-01 11:40 ` Richard Guenther
  2012-10-12 14:10 ` Constant-fold vector comparisons Marc Glisse
  0 siblings, 2 replies; 16+ messages in thread
From: Marc Glisse @ 2012-09-29 15:16 UTC (permalink / raw)
  To: gcc-patches

[-- Attachment #1: Type: TEXT/PLAIN, Size: 1479 bytes --]

Hello,

this patch does 2 things (I should have split it in 2, but the questions 
go together):

1) it handles constant folding of vector comparisons,

2) it fixes another place where vectors are not expected (I'll probably 
wait to have front-end support and testcases to do more of those, but 
there is something to discuss).

I wasn't sure what integer_truep should test exactly. For integer: == 1 or 
!= 0? For vectors: == -1 or < 0? I chose the one that worked best for the 
forwprop case where I used it.

It seems that before this patch, the middle-end didn't know how comparison 
results were encoded (a good reason for VEC_COND_EXPR to require a 
comparison as its first argument). I am using the OpenCL encoding that 
what matters is the high bit of each vector element. I am not quite sure 
what happens for targets (are there any?) that use a different encoding. 
When expanding vcond, they can do the comparison as they like. When 
expanding an isolated comparison, I expect they have to expand it as 
vcond(a<b,-1,0). So it should be ok, but I could easily have missed 
something.


2012-10-01  Marc Glisse  <marc.glisse@inria.fr>

gcc/
 	* tree.c (integer_truep): New function.
 	* tree.h (integer_truep): Declare.
 	* tree-ssa-forwprop.c (forward_propagate_into_cond): Call it.
 	Don't use boolean_type_node for vectors.
 	* fold-const.c (fold_relational_const): Handle VECTOR_CST.

gcc/testsuite/
 	* gcc.dg/tree-ssa/foldconst-6.c: New testcase.

-- 
Marc Glisse

[-- Attachment #2: Type: TEXT/PLAIN, Size: 6700 bytes --]

Index: gcc/tree.h
===================================================================
--- gcc/tree.h	(revision 191850)
+++ gcc/tree.h	(working copy)
@@ -5272,20 +5272,25 @@ extern int integer_zerop (const_tree);
 
 /* integer_onep (tree x) is nonzero if X is an integer constant of value 1.  */
 
 extern int integer_onep (const_tree);
 
 /* integer_all_onesp (tree x) is nonzero if X is an integer constant
    all of whose significant bits are 1.  */
 
 extern int integer_all_onesp (const_tree);
 
+/* integer_truep (tree x) is nonzero if X is an integer constant of value 1,
+   or a vector constant of value < 0.  */
+
+extern bool integer_truep (const_tree);
+
 /* integer_pow2p (tree x) is nonzero is X is an integer constant with
    exactly one bit 1.  */
 
 extern int integer_pow2p (const_tree);
 
 /* integer_nonzerop (tree x) is nonzero if X is an integer constant
    with a nonzero value.  */
 
 extern int integer_nonzerop (const_tree);
 
Index: gcc/tree-ssa-forwprop.c
===================================================================
--- gcc/tree-ssa-forwprop.c	(revision 191850)
+++ gcc/tree-ssa-forwprop.c	(working copy)
@@ -564,46 +564,46 @@ forward_propagate_into_cond (gimple_stmt
       enum tree_code code;
       tree name = cond;
       gimple def_stmt = get_prop_source_stmt (name, true, NULL);
       if (!def_stmt || !can_propagate_from (def_stmt))
 	return 0;
 
       code = gimple_assign_rhs_code (def_stmt);
       if (TREE_CODE_CLASS (code) == tcc_comparison)
 	tmp = fold_build2_loc (gimple_location (def_stmt),
 			       code,
-			       boolean_type_node,
+			       TREE_TYPE (cond),
 			       gimple_assign_rhs1 (def_stmt),
 			       gimple_assign_rhs2 (def_stmt));
       else if ((code == BIT_NOT_EXPR
 		&& TYPE_PRECISION (TREE_TYPE (cond)) == 1)
 	       || (code == BIT_XOR_EXPR
-		   && integer_onep (gimple_assign_rhs2 (def_stmt))))
+		   && integer_truep (gimple_assign_rhs2 (def_stmt))))
 	{
 	  tmp = gimple_assign_rhs1 (def_stmt);
 	  swap = true;
 	}
     }
 
   if (tmp
       && is_gimple_condexpr (tmp))
     {
       if (dump_file && tmp)
 	{
 	  fprintf (dump_file, "  Replaced '");
 	  print_generic_expr (dump_file, cond, 0);
 	  fprintf (dump_file, "' with '");
 	  print_generic_expr (dump_file, tmp, 0);
 	  fprintf (dump_file, "'\n");
 	}
 
-      if (integer_onep (tmp))
+      if (integer_truep (tmp))
 	gimple_assign_set_rhs_from_tree (gsi_p, gimple_assign_rhs2 (stmt));
       else if (integer_zerop (tmp))
 	gimple_assign_set_rhs_from_tree (gsi_p, gimple_assign_rhs3 (stmt));
       else
 	{
 	  gimple_assign_set_rhs1 (stmt, unshare_expr (tmp));
 	  if (swap)
 	    {
 	      tree t = gimple_assign_rhs2 (stmt);
 	      gimple_assign_set_rhs2 (stmt, gimple_assign_rhs3 (stmt));
Index: gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c	(revision 0)
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+
+typedef long vec __attribute__ ((vector_size (2 * sizeof(long))));
+
+vec f ()
+{
+  vec a = { -2, 666 };
+  vec b = { 3, 2 };
+  return a < b;
+}
+
+/* { dg-final { scan-tree-dump-not "666" "ccp1"} } */
+/* { dg-final { cleanup-tree-dump "ccp1" } } */

Property changes on: gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native

Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 191850)
+++ gcc/fold-const.c	(working copy)
@@ -16084,20 +16084,44 @@ fold_relational_const (enum tree_code co
 					  TREE_IMAGPART (op0),
 					  TREE_IMAGPART (op1));
       if (code == EQ_EXPR)
 	return fold_build2 (TRUTH_ANDIF_EXPR, type, rcond, icond);
       else if (code == NE_EXPR)
 	return fold_build2 (TRUTH_ORIF_EXPR, type, rcond, icond);
       else
 	return NULL_TREE;
     }
 
+  if (TREE_CODE (op0) == VECTOR_CST && TREE_CODE (op1) == VECTOR_CST)
+    {
+      int count = VECTOR_CST_NELTS (op0);
+      tree *elts =  XALLOCAVEC (tree, count);
+      gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
+
+      for (int i = 0; i < count; i++)
+	{
+	  tree elem_type = TREE_TYPE (type);
+	  tree elem0 = VECTOR_CST_ELT (op0, i);
+	  tree elem1 = VECTOR_CST_ELT (op1, i);
+
+	  elts[i] = fold_relational_const (code, elem_type,
+					   elem0, elem1);
+
+	  if(elts[i] == NULL_TREE)
+	    return NULL_TREE;
+
+	  elts[i] = fold_negate_const (elts[i], elem_type);
+	}
+
+      return build_vector (type, elts);
+    }
+
   /* From here on we only handle LT, LE, GT, GE, EQ and NE.
 
      To compute GT, swap the arguments and do LT.
      To compute GE, do LT and invert the result.
      To compute LE, swap the arguments, do LT and invert the result.
      To compute NE, do EQ and invert the result.
 
      Therefore, the code below must handle only EQ and LT.  */
 
   if (code == LE_EXPR || code == GT_EXPR)
Index: gcc/tree.c
===================================================================
--- gcc/tree.c	(revision 191850)
+++ gcc/tree.c	(working copy)
@@ -1835,20 +1835,48 @@ integer_all_onesp (const_tree expr)
       else
 	high_value = ((HOST_WIDE_INT) 1 << shift_amount) - 1;
 
       return (TREE_INT_CST_LOW (expr) == ~(unsigned HOST_WIDE_INT) 0
 	      && TREE_INT_CST_HIGH (expr) == high_value);
     }
   else
     return TREE_INT_CST_LOW (expr) == ((unsigned HOST_WIDE_INT) 1 << prec) - 1;
 }
 
+/* Return true if EXPR is an integer constant representing true.  */
+
+bool
+integer_truep (const_tree expr)
+{
+  STRIP_NOPS (expr);
+
+  switch (TREE_CODE (expr))
+    {
+    case INTEGER_CST:
+      /* Do not just test != 0, some places expect the value 1.  */
+      return (TREE_INT_CST_LOW (expr) == 1
+	      && TREE_INT_CST_HIGH (expr) == 0);
+    case VECTOR_CST:
+      {
+	for (unsigned i = 0; i < VECTOR_CST_NELTS (expr); ++i)
+	  {
+	    tree elm = VECTOR_CST_ELT (expr, i);
+	    if (TREE_CODE (elm) != INTEGER_CST || !tree_int_cst_sign_bit (elm))
+	      return false;
+	  }
+	return true;
+      }
+    default:
+      return false;
+    }
+}
+
 /* Return 1 if EXPR is an integer constant that is a power of 2 (i.e., has only
    one bit on).  */
 
 int
 integer_pow2p (const_tree expr)
 {
   int prec;
   unsigned HOST_WIDE_INT high, low;
 
   STRIP_NOPS (expr);

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: Constant-fold vector comparisons
  2012-09-29 15:16 Constant-fold vector comparisons Marc Glisse
@ 2012-10-01 11:40 ` Richard Guenther
  2012-10-01 15:58   ` vec_cond_expr adjustments Marc Glisse
  2012-10-12 14:10 ` Constant-fold vector comparisons Marc Glisse
  1 sibling, 1 reply; 16+ messages in thread
From: Richard Guenther @ 2012-10-01 11:40 UTC (permalink / raw)
  To: Marc Glisse; +Cc: gcc-patches

On Sat, Sep 29, 2012 at 3:25 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> Hello,
>
> this patch does 2 things (I should have split it in 2, but the questions go
> together):
>
> 1) it handles constant folding of vector comparisons,
>
> 2) it fixes another place where vectors are not expected (I'll probably wait
> to have front-end support and testcases to do more of those, but there is
> something to discuss).
>
> I wasn't sure what integer_truep should test exactly. For integer: == 1 or
> != 0? For vectors: == -1 or < 0? I chose the one that worked best for the
> forwprop case where I used it.
>
> It seems that before this patch, the middle-end didn't know how comparison
> results were encoded (a good reason for VEC_COND_EXPR to require a
> comparison as its first argument). I am using the OpenCL encoding that what
> matters is the high bit of each vector element. I am not quite sure what
> happens for targets (are there any?) that use a different encoding. When
> expanding vcond, they can do the comparison as they like. When expanding an
> isolated comparison, I expect they have to expand it as vcond(a<b,-1,0). So
> it should be ok, but I could easily have missed something.

Comments below

>
> 2012-10-01  Marc Glisse  <marc.glisse@inria.fr>
>
> gcc/
>         * tree.c (integer_truep): New function.
>         * tree.h (integer_truep): Declare.
>         * tree-ssa-forwprop.c (forward_propagate_into_cond): Call it.
>         Don't use boolean_type_node for vectors.
>         * fold-const.c (fold_relational_const): Handle VECTOR_CST.
>
> gcc/testsuite/
>         * gcc.dg/tree-ssa/foldconst-6.c: New testcase.
>
> --
> Marc Glisse
> Index: gcc/tree.h
> ===================================================================
> --- gcc/tree.h  (revision 191850)
> +++ gcc/tree.h  (working copy)
> @@ -5272,20 +5272,25 @@ extern int integer_zerop (const_tree);
>
>  /* integer_onep (tree x) is nonzero if X is an integer constant of value 1.
> */
>
>  extern int integer_onep (const_tree);
>
>  /* integer_all_onesp (tree x) is nonzero if X is an integer constant
>     all of whose significant bits are 1.  */
>
>  extern int integer_all_onesp (const_tree);
>
> +/* integer_truep (tree x) is nonzero if X is an integer constant of value
> 1,
> +   or a vector constant of value < 0.  */
> +
> +extern bool integer_truep (const_tree);
> +
>  /* integer_pow2p (tree x) is nonzero is X is an integer constant with
>     exactly one bit 1.  */
>
>  extern int integer_pow2p (const_tree);
>
>  /* integer_nonzerop (tree x) is nonzero if X is an integer constant
>     with a nonzero value.  */
>
>  extern int integer_nonzerop (const_tree);
>
> Index: gcc/tree-ssa-forwprop.c
> ===================================================================
> --- gcc/tree-ssa-forwprop.c     (revision 191850)
> +++ gcc/tree-ssa-forwprop.c     (working copy)
> @@ -564,46 +564,46 @@ forward_propagate_into_cond (gimple_stmt
>        enum tree_code code;
>        tree name = cond;
>        gimple def_stmt = get_prop_source_stmt (name, true, NULL);
>        if (!def_stmt || !can_propagate_from (def_stmt))
>         return 0;
>
>        code = gimple_assign_rhs_code (def_stmt);
>        if (TREE_CODE_CLASS (code) == tcc_comparison)
>         tmp = fold_build2_loc (gimple_location (def_stmt),
>                                code,
> -                              boolean_type_node,
> +                              TREE_TYPE (cond),

That's obvious.

>                                gimple_assign_rhs1 (def_stmt),
>                                gimple_assign_rhs2 (def_stmt));
>        else if ((code == BIT_NOT_EXPR
>                 && TYPE_PRECISION (TREE_TYPE (cond)) == 1)
>                || (code == BIT_XOR_EXPR
> -                  && integer_onep (gimple_assign_rhs2 (def_stmt))))
> +                  && integer_truep (gimple_assign_rhs2 (def_stmt))))

See below.

>         {
>           tmp = gimple_assign_rhs1 (def_stmt);
>           swap = true;
>         }
>      }
>
>    if (tmp
>        && is_gimple_condexpr (tmp))
>      {
>        if (dump_file && tmp)
>         {
>           fprintf (dump_file, "  Replaced '");
>           print_generic_expr (dump_file, cond, 0);
>           fprintf (dump_file, "' with '");
>           print_generic_expr (dump_file, tmp, 0);
>           fprintf (dump_file, "'\n");
>         }
>
> -      if (integer_onep (tmp))
> +      if (integer_truep (tmp))
>         gimple_assign_set_rhs_from_tree (gsi_p, gimple_assign_rhs2 (stmt));
>        else if (integer_zerop (tmp))
>         gimple_assign_set_rhs_from_tree (gsi_p, gimple_assign_rhs3 (stmt));
>        else
>         {
>           gimple_assign_set_rhs1 (stmt, unshare_expr (tmp));
>           if (swap)
>             {
>               tree t = gimple_assign_rhs2 (stmt);
>               gimple_assign_set_rhs2 (stmt, gimple_assign_rhs3 (stmt));
> Index: gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c (revision 0)
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-ccp1" } */
> +
> +typedef long vec __attribute__ ((vector_size (2 * sizeof(long))));
> +
> +vec f ()
> +{
> +  vec a = { -2, 666 };
> +  vec b = { 3, 2 };
> +  return a < b;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "666" "ccp1"} } */
> +/* { dg-final { cleanup-tree-dump "ccp1" } } */
>
> Property changes on: gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c
> ___________________________________________________________________
> Added: svn:keywords
>    + Author Date Id Revision URL
> Added: svn:eol-style
>    + native
>
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c    (revision 191850)
> +++ gcc/fold-const.c    (working copy)
> @@ -16084,20 +16084,44 @@ fold_relational_const (enum tree_code co
>                                           TREE_IMAGPART (op0),
>                                           TREE_IMAGPART (op1));
>        if (code == EQ_EXPR)
>         return fold_build2 (TRUTH_ANDIF_EXPR, type, rcond, icond);
>        else if (code == NE_EXPR)
>         return fold_build2 (TRUTH_ORIF_EXPR, type, rcond, icond);
>        else
>         return NULL_TREE;
>      }
>
> +  if (TREE_CODE (op0) == VECTOR_CST && TREE_CODE (op1) == VECTOR_CST)
> +    {
> +      int count = VECTOR_CST_NELTS (op0);
> +      tree *elts =  XALLOCAVEC (tree, count);
> +      gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
> +
> +      for (int i = 0; i < count; i++)
> +       {
> +         tree elem_type = TREE_TYPE (type);
> +         tree elem0 = VECTOR_CST_ELT (op0, i);
> +         tree elem1 = VECTOR_CST_ELT (op1, i);
> +
> +         elts[i] = fold_relational_const (code, elem_type,
> +                                          elem0, elem1);
> +
> +         if(elts[i] == NULL_TREE)
> +           return NULL_TREE;
> +
> +         elts[i] = fold_negate_const (elts[i], elem_type);

I think you need to invent something new similar to STORE_FLAG_VALUE
or use STORE_FLAG_VALUE here.  With the above you try to map
{0, 1} to {0, -1} which is only true if the operation on the element types
returns {0, 1} (thus, STORE_FLAG_VALUE is 1).

> +       }
> +
> +      return build_vector (type, elts);
> +    }
> +
>    /* From here on we only handle LT, LE, GT, GE, EQ and NE.
>
>       To compute GT, swap the arguments and do LT.
>       To compute GE, do LT and invert the result.
>       To compute LE, swap the arguments, do LT and invert the result.
>       To compute NE, do EQ and invert the result.
>
>       Therefore, the code below must handle only EQ and LT.  */
>
>    if (code == LE_EXPR || code == GT_EXPR)
> Index: gcc/tree.c
> ===================================================================
> --- gcc/tree.c  (revision 191850)
> +++ gcc/tree.c  (working copy)
> @@ -1835,20 +1835,48 @@ integer_all_onesp (const_tree expr)
>        else
>         high_value = ((HOST_WIDE_INT) 1 << shift_amount) - 1;
>
>        return (TREE_INT_CST_LOW (expr) == ~(unsigned HOST_WIDE_INT) 0
>               && TREE_INT_CST_HIGH (expr) == high_value);
>      }
>    else
>      return TREE_INT_CST_LOW (expr) == ((unsigned HOST_WIDE_INT) 1 << prec)
> - 1;
>  }
>
> +/* Return true if EXPR is an integer constant representing true.  */
> +
> +bool
> +integer_truep (const_tree expr)
> +{
> +  STRIP_NOPS (expr);
> +
> +  switch (TREE_CODE (expr))
> +    {
> +    case INTEGER_CST:
> +      /* Do not just test != 0, some places expect the value 1.  */
> +      return (TREE_INT_CST_LOW (expr) == 1
> +             && TREE_INT_CST_HIGH (expr) == 0);

I wonder if using STORE_FLAG_VALUE is better here (note that it
usually differs for FP vs. integral comparisons and the mode passed
to STORE_FLAG_VALUE is that of the comparison result).

That said, until we are sure what semantics we want here (forwprop
for example doesn't look at 'comparisons' but operations on special
values and types) I'd prefer to not introduce integer_truep ().

Thanks,
Richard.

> +    case VECTOR_CST:
> +      {
> +       for (unsigned i = 0; i < VECTOR_CST_NELTS (expr); ++i)
> +         {
> +           tree elm = VECTOR_CST_ELT (expr, i);
> +           if (TREE_CODE (elm) != INTEGER_CST || !tree_int_cst_sign_bit
> (elm))
> +             return false;
> +         }
> +       return true;
> +      }
> +    default:
> +      return false;
> +    }
> +}
> +
>  /* Return 1 if EXPR is an integer constant that is a power of 2 (i.e., has
> only
>     one bit on).  */
>
>  int
>  integer_pow2p (const_tree expr)
>  {
>    int prec;
>    unsigned HOST_WIDE_INT high, low;
>
>    STRIP_NOPS (expr);
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: vec_cond_expr adjustments
  2012-10-01 11:40 ` Richard Guenther
@ 2012-10-01 15:58   ` Marc Glisse
  2012-10-02 12:41     ` Richard Guenther
  0 siblings, 1 reply; 16+ messages in thread
From: Marc Glisse @ 2012-10-01 15:58 UTC (permalink / raw)
  To: Richard Guenther; +Cc: gcc-patches

[merging both threads, thanks for the answers]

On Mon, 1 Oct 2012, Richard Guenther wrote:

>>> optabs should be fixed instead, an is_gimple_val condition is implicitely
>>> val != 0.
>>
>> For vectors, I think it should be val < 0 (with an appropriate cast of val
>> to a signed integer vector type if necessary). Or (val & highbit) != 0, but
>> that's longer.
>
> I don't think so.  Throughout the compiler we generally assume false == 0
> and anything else is true.  (yes, for FP there is STORE_FLAG_VALUE, but
> it's scope is quite limited - if we want sth similar for vectors we'd have to
> invent it).

See below.

>>> If we for example have
>>>
>>> predicate = a < b;
>>> x = predicate ? d : e;
>>> y = predicate ? f : g;
>>>
>>> we ideally want to re-use the predicate computation on targets where
>>> that would be optimal (and combine should be able to recover the
>>> case where it is not).
>>
>> That I don't understand. The vcond instruction implemented by targets takes
>> as arguments d, e, cmp, a, b and emits the comparison itself. I don't see
>> how I can avoid sending to the targets both (d,e,<,a,b) and (f,g,<,a,b).
>> They will notice eventually that a<b is computed twice and remove one of the
>> two, but I don't see how to do that in optabs.c. Or I can compute x = a < b,
>> use x < 0 as the comparison passed to the targets, and expect targets (those
>> for which it is true) to recognize that < 0 is useless in a vector condition
>> (PR54700), or is useless on a comparison result.
>
> But that's a limitation of how vcond works.  ISTR there is/was a vselect
> instruction as well, taking a "mask" and two vectors to select from.  At least
> that's how vcond works internally for some sub-targets.

vselect seems to only appear in config/. Would it be defined as:
vselect(m,a,b)=(a&m)|(b&~m) ? I would almost be tempted to just define a 
pattern in .md files and let combine handle it, although it might be one 
instruction too long for that (and if m is x<y, ~m might look like x>=y).
Or would it match the OpenCL select: "For each component of a vector type,
result[i] = if MSB of c[i] is set ? b[i] : a[i]."? Or the pattern with &
and | but with a precondition that the value of each element of the mask
must be 0 or ±1?

I don't find vcond that bad, as long as targets check for trivial 
comparisons in the expansion (what trivial means may depend on the 
platform). It is quite flexible for targets.


On Mon, 1 Oct 2012, Richard Guenther wrote:

>>         tmp = fold_build2_loc (gimple_location (def_stmt),
>>                                code,
>> -                              boolean_type_node,
>> +                              TREE_TYPE (cond),
>
> That's obvious.

Ok, I'll test and commit that line separately.

>> +  if (TREE_CODE (op0) == VECTOR_CST && TREE_CODE (op1) == VECTOR_CST)
>> +    {
>> +      int count = VECTOR_CST_NELTS (op0);
>> +      tree *elts =  XALLOCAVEC (tree, count);
>> +      gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
>> +
>> +      for (int i = 0; i < count; i++)
>> +       {
>> +         tree elem_type = TREE_TYPE (type);
>> +         tree elem0 = VECTOR_CST_ELT (op0, i);
>> +         tree elem1 = VECTOR_CST_ELT (op1, i);
>> +
>> +         elts[i] = fold_relational_const (code, elem_type,
>> +                                          elem0, elem1);
>> +
>> +         if(elts[i] == NULL_TREE)
>> +           return NULL_TREE;
>> +
>> +         elts[i] = fold_negate_const (elts[i], elem_type);
>
> I think you need to invent something new similar to STORE_FLAG_VALUE
> or use STORE_FLAG_VALUE here.  With the above you try to map
> {0, 1} to {0, -1} which is only true if the operation on the element types
> returns {0, 1} (thus, STORE_FLAG_VALUE is 1).

Er, seems to me that constant folding of a scalar comparison in the
front/middle-end only returns {0, 1}.

>> +/* Return true if EXPR is an integer constant representing true.  */
>> +
>> +bool
>> +integer_truep (const_tree expr)
>> +{
>> +  STRIP_NOPS (expr);
>> +
>> +  switch (TREE_CODE (expr))
>> +    {
>> +    case INTEGER_CST:
>> +      /* Do not just test != 0, some places expect the value 1.  */
>> +      return (TREE_INT_CST_LOW (expr) == 1
>> +             && TREE_INT_CST_HIGH (expr) == 0);
>
> I wonder if using STORE_FLAG_VALUE is better here (note that it
> usually differs for FP vs. integral comparisons and the mode passed
> to STORE_FLAG_VALUE is that of the comparison result).

I notice there is already a VECTOR_STORE_FLAG_VALUE (used only once in
simplify-rtx, in a way that seems a bit strange but I'll try to
understand that later). Thanks for showing me this macro, it seems
important indeed. However the STORE_FLAG_VALUE mechanism seems to be for
the RTL level.

It looks like it would be possible to have 3 different semantics:
source code is OpenCL, middle-end whatever we want (0 / 1 for instance),
and back-end is whatever the target wants. The front-end would generate
for a<b : vec_cond_expr(a<b,-1,0) and for a?b:c : vec_cond_expr(a<0,b,c)
and there is no need for the middle-end to use the same representation
of comparisons as the front-ends or targets (expand of a vec_cond_expr
whose first argument is not a comparison would use != 0 if we chose a 0
/ 1 encoding).

However, since the front-ends and many targets agree on the OpenCL
semantics, it means introducing a conversion back and forth in the
middle-end, which may complicate things a bit. Note also that we already
have constant_boolean_node that returns a vector of -1 for true, and
that the front-end doesn't currently generate a vec_cond_expr for a<b.

It is true though that using 1 for true would fit better with the scalar
ops.

Well, if someone feels like taking a decision... I am happy with either
choice, as long as I know where to go.

> That said, until we are sure what semantics we want here (forwprop
> for example doesn't look at 'comparisons' but operations on special
> values and types) I'd prefer to not introduce integer_truep ().

I completely agree that defining the semantics comes first :-)

-- 
Marc Glisse

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: vec_cond_expr adjustments
  2012-10-01 15:58   ` vec_cond_expr adjustments Marc Glisse
@ 2012-10-02 12:41     ` Richard Guenther
  2012-10-05 15:01       ` Marc Glisse
  0 siblings, 1 reply; 16+ messages in thread
From: Richard Guenther @ 2012-10-02 12:41 UTC (permalink / raw)
  To: Marc Glisse; +Cc: gcc-patches

On Mon, Oct 1, 2012 at 5:57 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> [merging both threads, thanks for the answers]
>
>
> On Mon, 1 Oct 2012, Richard Guenther wrote:
>
>>>> optabs should be fixed instead, an is_gimple_val condition is
>>>> implicitely
>>>> val != 0.
>>>
>>>
>>> For vectors, I think it should be val < 0 (with an appropriate cast of
>>> val
>>> to a signed integer vector type if necessary). Or (val & highbit) != 0,
>>> but
>>> that's longer.
>>
>>
>> I don't think so.  Throughout the compiler we generally assume false == 0
>> and anything else is true.  (yes, for FP there is STORE_FLAG_VALUE, but
>> it's scope is quite limited - if we want sth similar for vectors we'd have
>> to
>> invent it).
>
>
> See below.
>
>
>>>> If we for example have
>>>>
>>>> predicate = a < b;
>>>> x = predicate ? d : e;
>>>> y = predicate ? f : g;
>>>>
>>>> we ideally want to re-use the predicate computation on targets where
>>>> that would be optimal (and combine should be able to recover the
>>>> case where it is not).
>>>
>>>
>>> That I don't understand. The vcond instruction implemented by targets
>>> takes
>>> as arguments d, e, cmp, a, b and emits the comparison itself. I don't see
>>> how I can avoid sending to the targets both (d,e,<,a,b) and (f,g,<,a,b).
>>> They will notice eventually that a<b is computed twice and remove one of
>>> the
>>> two, but I don't see how to do that in optabs.c. Or I can compute x = a <
>>> b,
>>> use x < 0 as the comparison passed to the targets, and expect targets
>>> (those
>>> for which it is true) to recognize that < 0 is useless in a vector
>>> condition
>>> (PR54700), or is useless on a comparison result.
>>
>>
>> But that's a limitation of how vcond works.  ISTR there is/was a vselect
>> instruction as well, taking a "mask" and two vectors to select from.  At
>> least
>> that's how vcond works internally for some sub-targets.
>
>
> vselect seems to only appear in config/. Would it be defined as:
> vselect(m,a,b)=(a&m)|(b&~m) ? I would almost be tempted to just define a
> pattern in .md files and let combine handle it, although it might be one
> instruction too long for that (and if m is x<y, ~m might look like x>=y).
> Or would it match the OpenCL select: "For each component of a vector type,
> result[i] = if MSB of c[i] is set ? b[i] : a[i]."? Or the pattern with &
> and | but with a precondition that the value of each element of the mask
> must be 0 or ±1?
>
> I don't find vcond that bad, as long as targets check for trivial
> comparisons in the expansion (what trivial means may depend on the
> platform). It is quite flexible for targets.

Well, ok.

>
> On Mon, 1 Oct 2012, Richard Guenther wrote:
>
>>>         tmp = fold_build2_loc (gimple_location (def_stmt),
>>>                                code,
>>> -                              boolean_type_node,
>>> +                              TREE_TYPE (cond),
>>
>>
>> That's obvious.
>
>
> Ok, I'll test and commit that line separately.
>
>>> +  if (TREE_CODE (op0) == VECTOR_CST && TREE_CODE (op1) == VECTOR_CST)
>>> +    {
>>> +      int count = VECTOR_CST_NELTS (op0);
>>> +      tree *elts =  XALLOCAVEC (tree, count);
>>> +      gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
>>> +
>>> +      for (int i = 0; i < count; i++)
>>> +       {
>>> +         tree elem_type = TREE_TYPE (type);
>>> +         tree elem0 = VECTOR_CST_ELT (op0, i);
>>> +         tree elem1 = VECTOR_CST_ELT (op1, i);
>>> +
>>> +         elts[i] = fold_relational_const (code, elem_type,
>>> +                                          elem0, elem1);
>>> +
>>> +         if(elts[i] == NULL_TREE)
>>> +           return NULL_TREE;
>>> +
>>> +         elts[i] = fold_negate_const (elts[i], elem_type);
>>
>>
>> I think you need to invent something new similar to STORE_FLAG_VALUE
>> or use STORE_FLAG_VALUE here.  With the above you try to map
>> {0, 1} to {0, -1} which is only true if the operation on the element types
>> returns {0, 1} (thus, STORE_FLAG_VALUE is 1).
>
>
> Er, seems to me that constant folding of a scalar comparison in the
> front/middle-end only returns {0, 1}.

The point is we need to define some semantics for vector comparison
results.  One variant is to make it target independent which in turn
would inhibit (or make it more difficult) to exploit some target features.
You for example use {0, -1} for truth values - probably to exploit target
features - even though the most natural middle-end way would be to
use {0, 1} as for everything else (caveat: there may be both signed
and unsigned bools, we don't allow vector components with non-mode precision,
thus you could argue that a signed bool : 1 is just "sign-extended"
for your solution).  A different variant is to make it target dependent
to leverage optimization opportunities - that's why STORE_FLAG_VALUE
exists.  For example with vector comparisons a < v result, when
performing bitwise operations on it, you either have to make the target
expand code to produce {0, -1} even if the natural compare instruction
would, say, produce {0, 0x80000} - or not constrain the possible values
of its result (like forwprop would do with your patch).  In general we
want constant folding to yield the same results as if the HW carried
out the operation to make -O0 code not diverge from -O1.  Thus,

v4si g;
int main() { g = { 1, 2, 3, 4 } < { 4, 3, 2, 1}; }

should not assign different values to g dependent on constant propagation
performed or not.  The easiest way out is something like STORE_FLAG_VALUE
if there does not exist a middle-end choice for vector true / false components
that can be easily generated from what the target produces.

Like if you perform a FP comparison

int main () { double x = 1.0; static _Bool b; b = x < 3.0; }

you get without CCP on x86_64:

        ucomisd -8(%rbp), %xmm0
        seta    %al
        movb    %al, b.1715(%rip)

thus the equivalent of

    flag_reg = x < 3.0;
    b = flag_reg ? 1 : 0;

for vector compares you get something similar:

    flag_vec = x < y;
    res = flag_vec ? { 1, ... } : { 0, ... };

which I think you can see being produced by generic vector lowering
(in do_compare).  Where I can see we indeed use {0, -1} ... which
would match your constant folding behavior.

We may not be able to easily recover from this intermediate step
with combine (I'm not sure), so a target dependent value may
be prefered.


>>> +/* Return true if EXPR is an integer constant representing true.  */
>>> +
>>> +bool
>>> +integer_truep (const_tree expr)
>>> +{
>>> +  STRIP_NOPS (expr);
>>> +
>>> +  switch (TREE_CODE (expr))
>>> +    {
>>> +    case INTEGER_CST:
>>> +      /* Do not just test != 0, some places expect the value 1.  */
>>> +      return (TREE_INT_CST_LOW (expr) == 1
>>> +             && TREE_INT_CST_HIGH (expr) == 0);
>>
>>
>> I wonder if using STORE_FLAG_VALUE is better here (note that it
>> usually differs for FP vs. integral comparisons and the mode passed
>> to STORE_FLAG_VALUE is that of the comparison result).
>
>
> I notice there is already a VECTOR_STORE_FLAG_VALUE (used only once in
> simplify-rtx, in a way that seems a bit strange but I'll try to
> understand that later). Thanks for showing me this macro, it seems
> important indeed. However the STORE_FLAG_VALUE mechanism seems to be for
> the RTL level.
>
> It looks like it would be possible to have 3 different semantics:
> source code is OpenCL, middle-end whatever we want (0 / 1 for instance),
> and back-end is whatever the target wants. The front-end would generate
> for a<b : vec_cond_expr(a<b,-1,0)

seems like the middle-end uses this for lowering vector compares,
a < b -> { a[0] < b[0] ? -1 : 0, ... }

> and for a?b:c : vec_cond_expr(a<0,b,c)

it looks like ?: is not generally handled by tree-vect-generic, so it must
be either not supported by the frontend or lowered therein (ISTR
it is forced to appear as a != {0,...} ? ... : ...)

> and there is no need for the middle-end to use the same representation
> of comparisons as the front-ends or targets (expand of a vec_cond_expr
> whose first argument is not a comparison would use != 0 if we chose a 0
> / 1 encoding).
>
> However, since the front-ends and many targets agree on the OpenCL
> semantics, it means introducing a conversion back and forth in the
> middle-end, which may complicate things a bit. Note also that we already
> have constant_boolean_node that returns a vector of -1 for true, and
> that the front-end doesn't currently generate a vec_cond_expr for a<b.

Yeah, I realized that now.

> It is true though that using 1 for true would fit better with the scalar
> ops.
>
> Well, if someone feels like taking a decision... I am happy with either
> choice, as long as I know where to go.

I'd say adjust your fold-const patch to not negate the scalar result
but build a proper -1 / 0 value based on integer_zerop().

Thanks,
Richard.

>> That said, until we are sure what semantics we want here (forwprop
>> for example doesn't look at 'comparisons' but operations on special
>> values and types) I'd prefer to not introduce integer_truep ().
>
>
> I completely agree that defining the semantics comes first :-)
>
> --
> Marc Glisse

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: vec_cond_expr adjustments
  2012-10-02 12:41     ` Richard Guenther
@ 2012-10-05 15:01       ` Marc Glisse
  2012-10-08  9:04         ` Richard Guenther
  0 siblings, 1 reply; 16+ messages in thread
From: Marc Glisse @ 2012-10-05 15:01 UTC (permalink / raw)
  To: Richard Guenther; +Cc: gcc-patches

[I am still a little confused, sorry for the long email...]

On Tue, 2 Oct 2012, Richard Guenther wrote:

>>>> +  if (TREE_CODE (op0) == VECTOR_CST && TREE_CODE (op1) == VECTOR_CST)
>>>> +    {
>>>> +      int count = VECTOR_CST_NELTS (op0);
>>>> +      tree *elts =  XALLOCAVEC (tree, count);
>>>> +      gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
>>>> +
>>>> +      for (int i = 0; i < count; i++)
>>>> +       {
>>>> +         tree elem_type = TREE_TYPE (type);
>>>> +         tree elem0 = VECTOR_CST_ELT (op0, i);
>>>> +         tree elem1 = VECTOR_CST_ELT (op1, i);
>>>> +
>>>> +         elts[i] = fold_relational_const (code, elem_type,
>>>> +                                          elem0, elem1);
>>>> +
>>>> +         if(elts[i] == NULL_TREE)
>>>> +           return NULL_TREE;
>>>> +
>>>> +         elts[i] = fold_negate_const (elts[i], elem_type);
>>>
>>>
>>> I think you need to invent something new similar to STORE_FLAG_VALUE
>>> or use STORE_FLAG_VALUE here.  With the above you try to map
>>> {0, 1} to {0, -1} which is only true if the operation on the element types
>>> returns {0, 1} (thus, STORE_FLAG_VALUE is 1).
>>
>> Er, seems to me that constant folding of a scalar comparison in the
>> front/middle-end only returns {0, 1}.
[and later]
> I'd say adjust your fold-const patch to not negate the scalar result
> but build a proper -1 / 0 value based on integer_zerop().

I don't mind doing it that way, but I would like to understand first. 
LT_EXPR on scalars is guaranteed (in generic.texi) to be 0 or 1. So 
negating should be the same as testing with integer_zerop to build -1 or 
0. Is it just a matter of style (then I am ok), or am I missing a reason 
which makes the negation wrong?

> The point is we need to define some semantics for vector comparison
> results.

Yes. I think a documentation patch should come first: generic.texi is 
missing an entry for VEC_COND_EXPR and the entry for LT_EXPR doesn't 
mention vectors. But before that we need to decide what to put there...

> One variant is to make it target independent which in turn
> would inhibit (or make it more difficult) to exploit some target features.
> You for example use {0, -1} for truth values - probably to exploit target
> features -

Actually it was mostly because that is the meaning in the language. OpenCL 
says that a<b is a vector of 0 and -1, and that ?: only looks at the MSB 
of the elements in the condition. The fact that it matches what some 
targets do is a simple consequence of the fact that OpenCL was based on 
what hardware already did.

> even though the most natural middle-end way would be to
> use {0, 1} as for everything else

I agree that it would be natural and convenient in a number of places.

> (caveat: there may be both signed and unsigned bools, we don't allow 
> vector components with non-mode precision, thus you could argue that a 
> signed bool : 1 is just "sign-extended" for your solution).

Not sure how that would translate in the code.

> A different variant is to make it target dependent to leverage 
> optimization opportunities

That's an interesting possibility...

> that's why STORE_FLAG_VALUE exists.

AFAICS it only appears when we go from gimple to rtl, not before (and 
there is already a VECTOR_STORE_FLAG_VALUE, although no target defines 
it). Which doesn't mean we couldn't make it appear earlier for vectors.

> For example with vector comparisons a < v result, when
> performing bitwise operations on it, you either have to make the target
> expand code to produce {0, -1} even if the natural compare instruction
> would, say, produce {0, 0x80000} - or not constrain the possible values
> of its result (like forwprop would do with your patch).  In general we
> want constant folding to yield the same results as if the HW carried
> out the operation to make -O0 code not diverge from -O1.  Thus,
>
> v4si g;
> int main() { g = { 1, 2, 3, 4 } < { 4, 3, 2, 1}; }
>
> should not assign different values to g dependent on constant propagation
> performed or not.

That one is clear, OpenCL constrains the answer to be {-1,-1,0,0}, whether 
your target likes it or not. Depending on how things are handled, 
comparisons could be constrained internally to only appear (possibly 
indirectly) in the first argument of a vec_cond_expr.

> The easiest way out is something like STORE_FLAG_VALUE
> if there does not exist a middle-end choice for vector true / false components
> that can be easily generated from what the target produces.
>
> Like if you perform a FP comparison
>
> int main () { double x = 1.0; static _Bool b; b = x < 3.0; }
>
> you get without CCP on x86_64:
>
>        ucomisd -8(%rbp), %xmm0
>        seta    %al
>        movb    %al, b.1715(%rip)
>
> thus the equivalent of
>
>    flag_reg = x < 3.0;
>    b = flag_reg ? 1 : 0;

where this expansion happens in the back-end.

> for vector compares you get something similar:
>
>    flag_vec = x < y;
>    res = flag_vec ? { 1, ... } : { 0, ... };
>
> which I think you can see being produced by generic vector lowering
> (in do_compare).  Where I can see we indeed use {0, -1} ... which
> would match your constant folding behavior.
>
> We may not be able to easily recover from this intermediate step
> with combine (I'm not sure), so a target dependent value may
> be prefered.

Being able to optimize it is indeed a key point. Let's try on an example 
(not assuming any specific representation in the middle-end for now). Say 
I write this C/OpenCL code: ((a<b)&&(c<d))?x:y (not currently supported)

The front-end gives to the middle-end: ((((a<b)?-1:0)&((c<d)?-1:0))<0)?x:y

On an architecture like sse, neon or altivec where VECTOR_STORE_FLAG_VALUE 
is -1 (well, should be), expansion of (a<b)?-1:0 would just be a<b. The <0 
can also disappear if the vcond instruction only looks at the MSB (x86). 
And we are left in the back-end with ((a<b)&(c<d))?x:y, as desired.

On other architectures, expecting the back-end to simplify everything does 
seem hard. But it isn't obvious how to handle it in the middle end either.
Some other forms we could imagine the middle-end producing:
(a<b)?(c<d)?x:y:y
or assuming that VECTOR_STORE_FLAG_VALUE is defined:
(((a<b)&(c<d))!=0)?x:y (back-end would remove the != 0 on altivec)
Both would require special code to happen.

But then how do we handle for instance sparc, where IIUC comparing 2 
vectors returns an integer, where bits 0, 1, etc of the integer represent 
true/false for the comparisons of elements 0, 1, etc of the vectors (as in 
vec_merge, but not constant)? Defining VECTOR_STORE_FLAG_VALUE is not 
possible since comparisons don't return a vector, but we would still want 
to compute a<b, c<d, and perform an AND of those 2 integers before calling 
the usual code for the selection.


If we assume a -1/0 and MSB representation in the middle-end, the 
front-end could just pass ((a<b)&(c<d))?x:y to the middle-end. When 
moving to the back-end, "nothing" would happen on x86.


Comparing x86, neon and altivec, they all have comparisons that return a 
vector of -1 and 0. On the other hand, they have different selection 
instructions. x86 uses <0, altivec uses !=0 and neon has a bitwise select 
and thus requires exactly -1 or 0. It thus seems to me that we should 
decide in the middle-end that vector comparisons return vectors of -1 and 
0. VEC_COND_EXPR is more complicated. We could for instance require that 
it takes as first argument a vector of -1 and 0 (thus <0, !=0 and the neon 
thing are equivalent). Which would leave to decide what the expansion of 
vec_cond_expr passes to the targets when the first argument is not a 
comparison, between !=0, <0, ==-1 or others (I vote for <0 because of 
opencl). One issue is that targets wouldn't know if it was a dummy 
comparison that can safely be ignored because the other part is the result 
of logical operations on comparisons (thus composed of -1 and 0) or a 
genuine comparison with an arbitrary vector, so a new optimization would 
be needed (in the back-end I guess or we would need an alternate 
instruction to vcond) to detect if a vector is a "signed boolean" vector.
We could instead say that vec_cond_expr really follows OpenCL's semantics 
and looks at the MSB of each element. I am not sure that would change 
much, it would mostly delay the apparition of <0 to RTL expansion time 
(and thus make gimple slightly lighter).


>>>> +/* Return true if EXPR is an integer constant representing true.  */
>>>> +
>>>> +bool
>>>> +integer_truep (const_tree expr)
>>>> +{
>>>> +  STRIP_NOPS (expr);
>>>> +
>>>> +  switch (TREE_CODE (expr))
>>>> +    {
>>>> +    case INTEGER_CST:
>>>> +      /* Do not just test != 0, some places expect the value 1.  */
>>>> +      return (TREE_INT_CST_LOW (expr) == 1
>>>> +             && TREE_INT_CST_HIGH (expr) == 0);
>>>
>>>
>>> I wonder if using STORE_FLAG_VALUE is better here (note that it
>>> usually differs for FP vs. integral comparisons and the mode passed
>>> to STORE_FLAG_VALUE is that of the comparison result).
>>
>>
>> I notice there is already a VECTOR_STORE_FLAG_VALUE (used only once in
>> simplify-rtx, in a way that seems a bit strange but I'll try to
>> understand that later). Thanks for showing me this macro, it seems
>> important indeed. However the STORE_FLAG_VALUE mechanism seems to be for
>> the RTL level.
>>
>> It looks like it would be possible to have 3 different semantics:
>> source code is OpenCL, middle-end whatever we want (0 / 1 for instance),
>> and back-end is whatever the target wants. The front-end would generate
>> for a<b : vec_cond_expr(a<b,-1,0)
>
> seems like the middle-end uses this for lowering vector compares,
> a < b -> { a[0] < b[0] ? -1 : 0, ... }
>
>> and for a?b:c : vec_cond_expr(a<0,b,c)
>
> it looks like ?: is not generally handled by tree-vect-generic, so it must
> be either not supported by the frontend or lowered therein (ISTR
> it is forced to appear as a != {0,...} ? ... : ...)

Not supported by the front-end yet (not even by the gimplifier), I have 
(bad) patches but I can't really finish them before this conversation is 
done.



I think there are quite few places in the middle-end that assume that 
comparisons return a vector of -1/0 and even fewer that vec_cond_expr only 
looks at the MSB of each element. So it is still time to change that if 
you want to. But if we want to change it, I think it should happen now 
before even more vector code gets in (not particularly my patches, I am 
thinking of cilk and others too).


Ok, that's long enough, I need to send it now...

-- 
Marc Glisse

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: vec_cond_expr adjustments
  2012-10-05 15:01       ` Marc Glisse
@ 2012-10-08  9:04         ` Richard Guenther
  2012-10-08  9:34           ` Marc Glisse
  0 siblings, 1 reply; 16+ messages in thread
From: Richard Guenther @ 2012-10-08  9:04 UTC (permalink / raw)
  To: Marc Glisse; +Cc: gcc-patches

On Fri, Oct 5, 2012 at 5:01 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> [I am still a little confused, sorry for the long email...]
>
>
> On Tue, 2 Oct 2012, Richard Guenther wrote:
>
>>>>> +  if (TREE_CODE (op0) == VECTOR_CST && TREE_CODE (op1) == VECTOR_CST)
>>>>> +    {
>>>>> +      int count = VECTOR_CST_NELTS (op0);
>>>>> +      tree *elts =  XALLOCAVEC (tree, count);
>>>>> +      gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
>>>>> +
>>>>> +      for (int i = 0; i < count; i++)
>>>>> +       {
>>>>> +         tree elem_type = TREE_TYPE (type);
>>>>> +         tree elem0 = VECTOR_CST_ELT (op0, i);
>>>>> +         tree elem1 = VECTOR_CST_ELT (op1, i);
>>>>> +
>>>>> +         elts[i] = fold_relational_const (code, elem_type,
>>>>> +                                          elem0, elem1);
>>>>> +
>>>>> +         if(elts[i] == NULL_TREE)
>>>>> +           return NULL_TREE;
>>>>> +
>>>>> +         elts[i] = fold_negate_const (elts[i], elem_type);
>>>>
>>>>
>>>>
>>>> I think you need to invent something new similar to STORE_FLAG_VALUE
>>>> or use STORE_FLAG_VALUE here.  With the above you try to map
>>>> {0, 1} to {0, -1} which is only true if the operation on the element
>>>> types
>>>> returns {0, 1} (thus, STORE_FLAG_VALUE is 1).
>>>
>>>
>>> Er, seems to me that constant folding of a scalar comparison in the
>>> front/middle-end only returns {0, 1}.
>
> [and later]
>
>> I'd say adjust your fold-const patch to not negate the scalar result
>> but build a proper -1 / 0 value based on integer_zerop().
>
>
> I don't mind doing it that way, but I would like to understand first.
> LT_EXPR on scalars is guaranteed (in generic.texi) to be 0 or 1. So negating
> should be the same as testing with integer_zerop to build -1 or 0. Is it
> just a matter of style (then I am ok), or am I missing a reason which makes
> the negation wrong?

Just a matter of style.  Negating is a lot less descriptive for the actual
set of return values we produce.

>> The point is we need to define some semantics for vector comparison
>> results.
>
>
> Yes. I think a documentation patch should come first: generic.texi is
> missing an entry for VEC_COND_EXPR and the entry for LT_EXPR doesn't mention
> vectors. But before that we need to decide what to put there...
>
>
>> One variant is to make it target independent which in turn
>> would inhibit (or make it more difficult) to exploit some target features.
>> You for example use {0, -1} for truth values - probably to exploit target
>> features -
>
>
> Actually it was mostly because that is the meaning in the language. OpenCL
> says that a<b is a vector of 0 and -1, and that ?: only looks at the MSB of
> the elements in the condition. The fact that it matches what some targets do
> is a simple consequence of the fact that OpenCL was based on what hardware
> already did.

Yes, it seems that the {0, -1} choice is most reasonable for GENERIC.  So
let's document that.

>
>> even though the most natural middle-end way would be to
>> use {0, 1} as for everything else
>
>
> I agree that it would be natural and convenient in a number of places.
>
>
>> (caveat: there may be both signed and unsigned bools, we don't allow
>> vector components with non-mode precision, thus you could argue that a
>> signed bool : 1 is just "sign-extended" for your solution).
>
>
> Not sure how that would translate in the code.
>
>
>> A different variant is to make it target dependent to leverage
>> optimization opportunities
>
>
> That's an interesting possibility...
>
>
>> that's why STORE_FLAG_VALUE exists.
>
>
> AFAICS it only appears when we go from gimple to rtl, not before (and there
> is already a VECTOR_STORE_FLAG_VALUE, although no target defines it). Which
> doesn't mean we couldn't make it appear earlier for vectors.
>
>
>> For example with vector comparisons a < v result, when
>> performing bitwise operations on it, you either have to make the target
>> expand code to produce {0, -1} even if the natural compare instruction
>> would, say, produce {0, 0x80000} - or not constrain the possible values
>> of its result (like forwprop would do with your patch).  In general we
>> want constant folding to yield the same results as if the HW carried
>> out the operation to make -O0 code not diverge from -O1.  Thus,
>>
>> v4si g;
>> int main() { g = { 1, 2, 3, 4 } < { 4, 3, 2, 1}; }
>>
>> should not assign different values to g dependent on constant propagation
>> performed or not.
>
>
> That one is clear, OpenCL constrains the answer to be {-1,-1,0,0}, whether
> your target likes it or not. Depending on how things are handled,
> comparisons could be constrained internally to only appear (possibly
> indirectly) in the first argument of a vec_cond_expr.

Yes, I realized that later.

>
>> The easiest way out is something like STORE_FLAG_VALUE
>> if there does not exist a middle-end choice for vector true / false
>> components
>> that can be easily generated from what the target produces.
>>
>> Like if you perform a FP comparison
>>
>> int main () { double x = 1.0; static _Bool b; b = x < 3.0; }
>>
>> you get without CCP on x86_64:
>>
>>        ucomisd -8(%rbp), %xmm0
>>        seta    %al
>>        movb    %al, b.1715(%rip)
>>
>> thus the equivalent of
>>
>>    flag_reg = x < 3.0;
>>    b = flag_reg ? 1 : 0;
>
>
> where this expansion happens in the back-end.

In the target specific expander for the comparison.

>
>> for vector compares you get something similar:
>>
>>    flag_vec = x < y;
>>    res = flag_vec ? { 1, ... } : { 0, ... };
>>
>> which I think you can see being produced by generic vector lowering
>> (in do_compare).  Where I can see we indeed use {0, -1} ... which
>> would match your constant folding behavior.
>>
>> We may not be able to easily recover from this intermediate step
>> with combine (I'm not sure), so a target dependent value may
>> be prefered.
>
>
> Being able to optimize it is indeed a key point. Let's try on an example
> (not assuming any specific representation in the middle-end for now). Say I
> write this C/OpenCL code: ((a<b)&&(c<d))?x:y (not currently supported)
>
> The front-end gives to the middle-end: ((((a<b)?-1:0)&((c<d)?-1:0))<0)?x:y
>
> On an architecture like sse, neon or altivec where VECTOR_STORE_FLAG_VALUE
> is -1 (well, should be), expansion of (a<b)?-1:0 would just be a<b. The <0
> can also disappear if the vcond instruction only looks at the MSB (x86). And
> we are left in the back-end with ((a<b)&(c<d))?x:y, as desired.
>
> On other architectures, expecting the back-end to simplify everything does
> seem hard. But it isn't obvious how to handle it in the middle end either.
> Some other forms we could imagine the middle-end producing:
> (a<b)?(c<d)?x:y:y
> or assuming that VECTOR_STORE_FLAG_VALUE is defined:
> (((a<b)&(c<d))!=0)?x:y (back-end would remove the != 0 on altivec)
> Both would require special code to happen.

True.

> But then how do we handle for instance sparc, where IIUC comparing 2 vectors
> returns an integer, where bits 0, 1, etc of the integer represent true/false
> for the comparisons of elements 0, 1, etc of the vectors (as in vec_merge,
> but not constant)? Defining VECTOR_STORE_FLAG_VALUE is not possible since
> comparisons don't return a vector, but we would still want to compute a<b,
> c<d, and perform an AND of those 2 integers before calling the usual code
> for the selection.

Yeah ... :/

>
> If we assume a -1/0 and MSB representation in the middle-end, the front-end
> could just pass ((a<b)&(c<d))?x:y to the middle-end. When moving to the
> back-end, "nothing" would happen on x86.

But the frontend needs to follow a language standard (which seems to
be OpenCL for C).  It of course could see the conditions are only
used in a COND_EXPR and try to optimize that.

> Comparing x86, neon and altivec, they all have comparisons that return a
> vector of -1 and 0. On the other hand, they have different selection
> instructions. x86 uses <0, altivec uses !=0 and neon has a bitwise select
> and thus requires exactly -1 or 0. It thus seems to me that we should decide
> in the middle-end that vector comparisons return vectors of -1 and 0.

Yes, I think -1 and 0 are indeed the best choice.

> VEC_COND_EXPR is more complicated. We could for instance require that it
> takes as first argument a vector of -1 and 0 (thus <0, !=0 and the neon
> thing are equivalent). Which would leave to decide what the expansion of
> vec_cond_expr passes to the targets when the first argument is not a
> comparison, between !=0, <0, ==-1 or others (I vote for <0 because of
> opencl). One issue is that targets wouldn't know if it was a dummy
> comparison that can safely be ignored because the other part is the result
> of logical operations on comparisons (thus composed of -1 and 0) or a
> genuine comparison with an arbitrary vector, so a new optimization would be
> needed (in the back-end I guess or we would need an alternate instruction to
> vcond) to detect if a vector is a "signed boolean" vector.
> We could instead say that vec_cond_expr really follows OpenCL's semantics
> and looks at the MSB of each element. I am not sure that would change much,
> it would mostly delay the apparition of <0 to RTL expansion time (and thus
> make gimple slightly lighter).

I think we should delay the decision on how to optimize this.  It's indeed
not trivial and the GIMPLE middle-end aggressively forwards feeding
comparisons into the VEC_COND_EXPR expressions already (somewhat
defeating any CSE that might be possible here) in forwprop.

>
>
>>>>> +/* Return true if EXPR is an integer constant representing true.  */
>>>>> +
>>>>> +bool
>>>>> +integer_truep (const_tree expr)
>>>>> +{
>>>>> +  STRIP_NOPS (expr);
>>>>> +
>>>>> +  switch (TREE_CODE (expr))
>>>>> +    {
>>>>> +    case INTEGER_CST:
>>>>> +      /* Do not just test != 0, some places expect the value 1.  */
>>>>> +      return (TREE_INT_CST_LOW (expr) == 1
>>>>> +             && TREE_INT_CST_HIGH (expr) == 0);
>>>>
>>>>
>>>>
>>>> I wonder if using STORE_FLAG_VALUE is better here (note that it
>>>> usually differs for FP vs. integral comparisons and the mode passed
>>>> to STORE_FLAG_VALUE is that of the comparison result).
>>>
>>>
>>>
>>> I notice there is already a VECTOR_STORE_FLAG_VALUE (used only once in
>>> simplify-rtx, in a way that seems a bit strange but I'll try to
>>> understand that later). Thanks for showing me this macro, it seems
>>> important indeed. However the STORE_FLAG_VALUE mechanism seems to be for
>>> the RTL level.
>>>
>>> It looks like it would be possible to have 3 different semantics:
>>> source code is OpenCL, middle-end whatever we want (0 / 1 for instance),
>>> and back-end is whatever the target wants. The front-end would generate
>>> for a<b : vec_cond_expr(a<b,-1,0)
>>
>>
>> seems like the middle-end uses this for lowering vector compares,
>> a < b -> { a[0] < b[0] ? -1 : 0, ... }
>>
>>> and for a?b:c : vec_cond_expr(a<0,b,c)
>>
>>
>> it looks like ?: is not generally handled by tree-vect-generic, so it must
>> be either not supported by the frontend or lowered therein (ISTR
>> it is forced to appear as a != {0,...} ? ... : ...)
>
>
> Not supported by the front-end yet (not even by the gimplifier), I have
> (bad) patches but I can't really finish them before this conversation is
> done.
>
>
>
> I think there are quite few places in the middle-end that assume that
> comparisons return a vector of -1/0 and even fewer that vec_cond_expr only
> looks at the MSB of each element. So it is still time to change that if you
> want to. But if we want to change it, I think it should happen now before
> even more vector code gets in (not particularly my patches, I am thinking of
> cilk and others too).

I think we should document the -1/0 fact and stick to it.

Thanks,
Richard.

>
> Ok, that's long enough, I need to send it now...
>
> --
> Marc Glisse

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: vec_cond_expr adjustments
  2012-10-08  9:04         ` Richard Guenther
@ 2012-10-08  9:34           ` Marc Glisse
  2012-10-08  9:44             ` Richard Guenther
  0 siblings, 1 reply; 16+ messages in thread
From: Marc Glisse @ 2012-10-08  9:34 UTC (permalink / raw)
  To: Richard Guenther; +Cc: gcc-patches

On Mon, 8 Oct 2012, Richard Guenther wrote:

>> VEC_COND_EXPR is more complicated. We could for instance require that it
>> takes as first argument a vector of -1 and 0 (thus <0, !=0 and the neon
>> thing are equivalent). Which would leave to decide what the expansion of
>> vec_cond_expr passes to the targets when the first argument is not a
>> comparison, between !=0, <0, ==-1 or others (I vote for <0 because of
>> opencl). One issue is that targets wouldn't know if it was a dummy
>> comparison that can safely be ignored because the other part is the result
>> of logical operations on comparisons (thus composed of -1 and 0) or a
>> genuine comparison with an arbitrary vector, so a new optimization would be
>> needed (in the back-end I guess or we would need an alternate instruction to
>> vcond) to detect if a vector is a "signed boolean" vector.
>> We could instead say that vec_cond_expr really follows OpenCL's semantics
>> and looks at the MSB of each element. I am not sure that would change much,
>> it would mostly delay the apparition of <0 to RTL expansion time (and thus
>> make gimple slightly lighter).
>
> I think we should delay the decision on how to optimize this.  It's indeed
> not trivial and the GIMPLE middle-end aggressively forwards feeding
> comparisons into the VEC_COND_EXPR expressions already (somewhat
> defeating any CSE that might be possible here) in forwprop.

Thanks for going through the long email :-)

What does that imply for the first argument of VEC_COND_EXPR? Currently, 
the expander asserts that it is a comparison, but that is not reflected in 
the gimple checkers.

If we document that VEC_COND_EXPR takes a vector of -1 and 0 (which is the 
case for a comparison), I don't think it prevents from later relaxing that 
to <0 or !=0. But then I don't know how to handle expansion when the 
argument is neither a comparison (vcond) nor a constant (vec_merge? I 
haven't tried but that should be doable), I would have to pass <0 or !=0 
to the target. So is the best choice to document that VEC_COND_EXPR takes 
as first argument a comparison and make gimple checking reflect that? 
(seems sad, but at least that would tell me what I can/can't do)

By the way, since we are documenting comparisons as returning 0 and -1, 
does that bring back the integer_truep predicate?

-- 
Marc Glisse

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: vec_cond_expr adjustments
  2012-10-08  9:34           ` Marc Glisse
@ 2012-10-08  9:44             ` Richard Guenther
  2012-10-10 23:37               ` Marc Glisse
  0 siblings, 1 reply; 16+ messages in thread
From: Richard Guenther @ 2012-10-08  9:44 UTC (permalink / raw)
  To: Marc Glisse; +Cc: gcc-patches

On Mon, Oct 8, 2012 at 11:34 AM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Mon, 8 Oct 2012, Richard Guenther wrote:
>
>>> VEC_COND_EXPR is more complicated. We could for instance require that it
>>> takes as first argument a vector of -1 and 0 (thus <0, !=0 and the neon
>>> thing are equivalent). Which would leave to decide what the expansion of
>>> vec_cond_expr passes to the targets when the first argument is not a
>>> comparison, between !=0, <0, ==-1 or others (I vote for <0 because of
>>> opencl). One issue is that targets wouldn't know if it was a dummy
>>> comparison that can safely be ignored because the other part is the
>>> result
>>> of logical operations on comparisons (thus composed of -1 and 0) or a
>>> genuine comparison with an arbitrary vector, so a new optimization would
>>> be
>>> needed (in the back-end I guess or we would need an alternate instruction
>>> to
>>> vcond) to detect if a vector is a "signed boolean" vector.
>>> We could instead say that vec_cond_expr really follows OpenCL's semantics
>>> and looks at the MSB of each element. I am not sure that would change
>>> much,
>>> it would mostly delay the apparition of <0 to RTL expansion time (and
>>> thus
>>> make gimple slightly lighter).
>>
>>
>> I think we should delay the decision on how to optimize this.  It's indeed
>> not trivial and the GIMPLE middle-end aggressively forwards feeding
>> comparisons into the VEC_COND_EXPR expressions already (somewhat
>> defeating any CSE that might be possible here) in forwprop.
>
>
> Thanks for going through the long email :-)
>
> What does that imply for the first argument of VEC_COND_EXPR? Currently, the
> expander asserts that it is a comparison, but that is not reflected in the
> gimple checkers.

And I don't think we should reflect that in the gimple checkers rather fixup the
expander (transparently use p != 0 or p < 0).

> If we document that VEC_COND_EXPR takes a vector of -1 and 0 (which is the
> case for a comparison), I don't think it prevents from later relaxing that
> to <0 or !=0. But then I don't know how to handle expansion when the
> argument is neither a comparison (vcond) nor a constant (vec_merge? I
> haven't tried but that should be doable), I would have to pass <0 or !=0 to
> the target.

Yes.

> So is the best choice to document that VEC_COND_EXPR takes as
> first argument a comparison and make gimple checking reflect that? (seems
> sad, but at least that would tell me what I can/can't do)

No, that would just mean that in GIMPLE you'd add this p != 0 or p < 0.
And at some point in the future I really really want to push this embedded
expression to a separate statement so I have a SSA definition for it.

> By the way, since we are documenting comparisons as returning 0 and -1, does
> that bring back the integer_truep predicate?

Not sure, true would still be != 0 or all_onesp (all bits of the
precision are 1), no?

Richard.

> --
> Marc Glisse

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: vec_cond_expr adjustments
  2012-10-08  9:44             ` Richard Guenther
@ 2012-10-10 23:37               ` Marc Glisse
  2012-10-11 12:58                 ` Richard Biener
  0 siblings, 1 reply; 16+ messages in thread
From: Marc Glisse @ 2012-10-10 23:37 UTC (permalink / raw)
  To: Richard Guenther; +Cc: gcc-patches

On Mon, 8 Oct 2012, Richard Guenther wrote:

> On Mon, Oct 8, 2012 at 11:34 AM, Marc Glisse <marc.glisse@inria.fr> wrote:
>> On Mon, 8 Oct 2012, Richard Guenther wrote:
>>
>>>> VEC_COND_EXPR is more complicated. We could for instance require that it
>>>> takes as first argument a vector of -1 and 0 (thus <0, !=0 and the neon
>>>> thing are equivalent). Which would leave to decide what the expansion of
>>>> vec_cond_expr passes to the targets when the first argument is not a
>>>> comparison, between !=0, <0, ==-1 or others (I vote for <0 because of
>>>> opencl). One issue is that targets wouldn't know if it was a dummy
>>>> comparison that can safely be ignored because the other part is the
>>>> result
>>>> of logical operations on comparisons (thus composed of -1 and 0) or a
>>>> genuine comparison with an arbitrary vector, so a new optimization would
>>>> be
>>>> needed (in the back-end I guess or we would need an alternate instruction
>>>> to
>>>> vcond) to detect if a vector is a "signed boolean" vector.
>>>> We could instead say that vec_cond_expr really follows OpenCL's semantics
>>>> and looks at the MSB of each element. I am not sure that would change
>>>> much,
>>>> it would mostly delay the apparition of <0 to RTL expansion time (and
>>>> thus
>>>> make gimple slightly lighter).
>>>
>>>
>>> I think we should delay the decision on how to optimize this.  It's indeed
>>> not trivial and the GIMPLE middle-end aggressively forwards feeding
>>> comparisons into the VEC_COND_EXPR expressions already (somewhat
>>> defeating any CSE that might be possible here) in forwprop.
>>
>>
>> Thanks for going through the long email :-)
>>
>> What does that imply for the first argument of VEC_COND_EXPR? Currently, the
>> expander asserts that it is a comparison, but that is not reflected in the
>> gimple checkers.
>
> And I don't think we should reflect that in the gimple checkers rather fixup the
> expander (transparently use p != 0 or p < 0).

I guess I'll pick p < 0 then (just because I am more interested in x86 and 
it makes the optimization easier on x86). Having another expander than 
vcond (one that takes the mask directly instead of a comparison, and for 
which we promise that the argument will be a vector of -1/0) would be 
convenient...

>> So is the best choice to document that VEC_COND_EXPR takes as
>> first argument a comparison and make gimple checking reflect that? (seems
>> sad, but at least that would tell me what I can/can't do)
>
> No, that would just mean that in GIMPLE you'd add this p != 0 or p < 0.
> And at some point in the future I really really want to push this embedded
> expression to a separate statement so I have a SSA definition for it.

Once the expander is ready to accept it, ok. It seems to me that the 
scalar COND_EXPR may also have an embedded expression, so I assume 
COND_EXPR and VEC_COND_EXPR are meant to diverge (or maybe you also want 
to do the same for COND_EXPR?).

>> By the way, since we are documenting comparisons as returning 0 and -1, does
>> that bring back the integer_truep predicate?
>
> Not sure, true would still be != 0 or all_onesp (all bits of the
> precision are 1), no?

I was going to make truep equivalent to onep for scalars and all_onesp for 
vectors (since -1 will be the only value documented as "true" for 
vectors). I guess it can wait, I can manually inline it for now.


Since we are documenting that comparisons of vectors return -1 and 0 in 
the middle-end, I was wondering whether the comparison expanders would 
need updating so they forward to vcond(...,-1,0), at least on platforms 
that don't define VECTOR_STORE_FLAG_VALUE to constm1_rtx for this mode. 
But a simple test on sparc shows it is already fine :-)

-- 
Marc Glisse

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: vec_cond_expr adjustments
  2012-10-10 23:37               ` Marc Glisse
@ 2012-10-11 12:58                 ` Richard Biener
  0 siblings, 0 replies; 16+ messages in thread
From: Richard Biener @ 2012-10-11 12:58 UTC (permalink / raw)
  To: Marc Glisse; +Cc: gcc-patches

On Thu, Oct 11, 2012 at 1:20 AM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Mon, 8 Oct 2012, Richard Guenther wrote:
>
>> On Mon, Oct 8, 2012 at 11:34 AM, Marc Glisse <marc.glisse@inria.fr> wrote:
>>>
>>> On Mon, 8 Oct 2012, Richard Guenther wrote:
>>>
>>>>> VEC_COND_EXPR is more complicated. We could for instance require that
>>>>> it
>>>>> takes as first argument a vector of -1 and 0 (thus <0, !=0 and the neon
>>>>> thing are equivalent). Which would leave to decide what the expansion
>>>>> of
>>>>> vec_cond_expr passes to the targets when the first argument is not a
>>>>> comparison, between !=0, <0, ==-1 or others (I vote for <0 because of
>>>>> opencl). One issue is that targets wouldn't know if it was a dummy
>>>>> comparison that can safely be ignored because the other part is the
>>>>> result
>>>>> of logical operations on comparisons (thus composed of -1 and 0) or a
>>>>> genuine comparison with an arbitrary vector, so a new optimization
>>>>> would
>>>>> be
>>>>> needed (in the back-end I guess or we would need an alternate
>>>>> instruction
>>>>> to
>>>>> vcond) to detect if a vector is a "signed boolean" vector.
>>>>> We could instead say that vec_cond_expr really follows OpenCL's
>>>>> semantics
>>>>> and looks at the MSB of each element. I am not sure that would change
>>>>> much,
>>>>> it would mostly delay the apparition of <0 to RTL expansion time (and
>>>>> thus
>>>>> make gimple slightly lighter).
>>>>
>>>>
>>>>
>>>> I think we should delay the decision on how to optimize this.  It's
>>>> indeed
>>>> not trivial and the GIMPLE middle-end aggressively forwards feeding
>>>> comparisons into the VEC_COND_EXPR expressions already (somewhat
>>>> defeating any CSE that might be possible here) in forwprop.
>>>
>>>
>>>
>>> Thanks for going through the long email :-)
>>>
>>> What does that imply for the first argument of VEC_COND_EXPR? Currently,
>>> the
>>> expander asserts that it is a comparison, but that is not reflected in
>>> the
>>> gimple checkers.
>>
>>
>> And I don't think we should reflect that in the gimple checkers rather
>> fixup the
>> expander (transparently use p != 0 or p < 0).
>
>
> I guess I'll pick p < 0 then (just because I am more interested in x86 and
> it makes the optimization easier on x86). Having another expander than vcond
> (one that takes the mask directly instead of a comparison, and for which we
> promise that the argument will be a vector of -1/0) would be convenient...
>
>
>>> So is the best choice to document that VEC_COND_EXPR takes as
>>> first argument a comparison and make gimple checking reflect that? (seems
>>> sad, but at least that would tell me what I can/can't do)
>>
>>
>> No, that would just mean that in GIMPLE you'd add this p != 0 or p < 0.
>> And at some point in the future I really really want to push this embedded
>> expression to a separate statement so I have a SSA definition for it.
>
>
> Once the expander is ready to accept it, ok. It seems to me that the scalar
> COND_EXPR may also have an embedded expression, so I assume COND_EXPR and
> VEC_COND_EXPR are meant to diverge (or maybe you also want to do the same
> for COND_EXPR?).

Yes, I want the same for COND_EXPR and even GIMPLE_COND.  I had
patches to do this about two years ago but was too lazy to fixup all the
fallout.  My plan was to eventually return to this and first tackle COND_EXPR
and VEC_COND_EXRP only, leaving GIMPLE_COND in place.

>>> By the way, since we are documenting comparisons as returning 0 and -1,
>>> does
>>> that bring back the integer_truep predicate?
>>
>>
>> Not sure, true would still be != 0 or all_onesp (all bits of the
>> precision are 1), no?
>
>
> I was going to make truep equivalent to onep for scalars and all_onesp for
> vectors (since -1 will be the only value documented as "true" for vectors).
> I guess it can wait, I can manually inline it for now.

Yes please.

> Since we are documenting that comparisons of vectors return -1 and 0 in the
> middle-end, I was wondering whether the comparison expanders would need
> updating so they forward to vcond(...,-1,0), at least on platforms that
> don't define VECTOR_STORE_FLAG_VALUE to constm1_rtx for this mode. But a
> simple test on sparc shows it is already fine :-)

Heh, good.

Richard.

> --
> Marc Glisse

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: Constant-fold vector comparisons
  2012-09-29 15:16 Constant-fold vector comparisons Marc Glisse
  2012-10-01 11:40 ` Richard Guenther
@ 2012-10-12 14:10 ` Marc Glisse
  2012-10-15 10:34   ` Richard Biener
  1 sibling, 1 reply; 16+ messages in thread
From: Marc Glisse @ 2012-10-12 14:10 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Biener

[-- Attachment #1: Type: TEXT/PLAIN, Size: 570 bytes --]

On Sat, 29 Sep 2012, Marc Glisse wrote:

> 1) it handles constant folding of vector comparisons,
>
> 2) it fixes another place where vectors are not expected

Here is a new version of this patch.

In a first try, I got bitten by the operator priorities in "a && b?c:d", 
which g++ doesn't warn about.


2012-10-12  Marc Glisse  <marc.glisse@inria.fr>

gcc/
 	* tree-ssa-forwprop.c (forward_propagate_into_cond): Handle vectors.
 	* fold-const.c (fold_relational_const): Handle VECTOR_CST.

gcc/testsuite/
 	* gcc.dg/tree-ssa/foldconst-6.c: New testcase.

-- 
Marc Glisse

[-- Attachment #2: Type: TEXT/PLAIN, Size: 4275 bytes --]

Index: gcc/tree-ssa-forwprop.c
===================================================================
--- gcc/tree-ssa-forwprop.c	(revision 192400)
+++ gcc/tree-ssa-forwprop.c	(working copy)
@@ -570,40 +570,43 @@ forward_propagate_into_cond (gimple_stmt
       code = gimple_assign_rhs_code (def_stmt);
       if (TREE_CODE_CLASS (code) == tcc_comparison)
 	tmp = fold_build2_loc (gimple_location (def_stmt),
 			       code,
 			       TREE_TYPE (cond),
 			       gimple_assign_rhs1 (def_stmt),
 			       gimple_assign_rhs2 (def_stmt));
       else if ((code == BIT_NOT_EXPR
 		&& TYPE_PRECISION (TREE_TYPE (cond)) == 1)
 	       || (code == BIT_XOR_EXPR
-		   && integer_onep (gimple_assign_rhs2 (def_stmt))))
+		   && ((gimple_assign_rhs_code (stmt) == VEC_COND_EXPR)
+		       ? integer_all_onesp (gimple_assign_rhs2 (def_stmt))
+		       : integer_onep (gimple_assign_rhs2 (def_stmt)))))
 	{
 	  tmp = gimple_assign_rhs1 (def_stmt);
 	  swap = true;
 	}
     }
 
   if (tmp
       && is_gimple_condexpr (tmp))
     {
       if (dump_file && tmp)
 	{
 	  fprintf (dump_file, "  Replaced '");
 	  print_generic_expr (dump_file, cond, 0);
 	  fprintf (dump_file, "' with '");
 	  print_generic_expr (dump_file, tmp, 0);
 	  fprintf (dump_file, "'\n");
 	}
 
-      if (integer_onep (tmp))
+      if ((gimple_assign_rhs_code (stmt) == VEC_COND_EXPR)
+	  ? integer_all_onesp (tmp) : integer_onep (tmp))
 	gimple_assign_set_rhs_from_tree (gsi_p, gimple_assign_rhs2 (stmt));
       else if (integer_zerop (tmp))
 	gimple_assign_set_rhs_from_tree (gsi_p, gimple_assign_rhs3 (stmt));
       else
 	{
 	  gimple_assign_set_rhs1 (stmt, unshare_expr (tmp));
 	  if (swap)
 	    {
 	      tree t = gimple_assign_rhs2 (stmt);
 	      gimple_assign_set_rhs2 (stmt, gimple_assign_rhs3 (stmt));
Index: gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c	(revision 0)
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+
+typedef long vec __attribute__ ((vector_size (2 * sizeof(long))));
+
+vec f ()
+{
+  vec a = { -2, 666 };
+  vec b = { 3, 2 };
+  return a < b;
+}
+
+/* { dg-final { scan-tree-dump-not "666" "ccp1"} } */
+/* { dg-final { cleanup-tree-dump "ccp1" } } */

Property changes on: gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native

Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 192400)
+++ gcc/fold-const.c	(working copy)
@@ -16121,20 +16121,44 @@ fold_relational_const (enum tree_code co
 					  TREE_IMAGPART (op0),
 					  TREE_IMAGPART (op1));
       if (code == EQ_EXPR)
 	return fold_build2 (TRUTH_ANDIF_EXPR, type, rcond, icond);
       else if (code == NE_EXPR)
 	return fold_build2 (TRUTH_ORIF_EXPR, type, rcond, icond);
       else
 	return NULL_TREE;
     }
 
+  if (TREE_CODE (op0) == VECTOR_CST && TREE_CODE (op1) == VECTOR_CST)
+    {
+      int count = VECTOR_CST_NELTS (op0);
+      tree *elts =  XALLOCAVEC (tree, count);
+      gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
+
+      for (int i = 0; i < count; i++)
+	{
+	  tree elem_type = TREE_TYPE (type);
+	  tree elem0 = VECTOR_CST_ELT (op0, i);
+	  tree elem1 = VECTOR_CST_ELT (op1, i);
+
+	  tree tem = fold_relational_const (code, elem_type,
+					    elem0, elem1);
+
+	  if (tem == NULL_TREE)
+	    return NULL_TREE;
+
+	  elts[i] = build_int_cst (elem_type, integer_zerop (tem) ? 0 : -1);
+	}
+
+      return build_vector (type, elts);
+    }
+
   /* From here on we only handle LT, LE, GT, GE, EQ and NE.
 
      To compute GT, swap the arguments and do LT.
      To compute GE, do LT and invert the result.
      To compute LE, swap the arguments, do LT and invert the result.
      To compute NE, do EQ and invert the result.
 
      Therefore, the code below must handle only EQ and LT.  */
 
   if (code == LE_EXPR || code == GT_EXPR)

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: Constant-fold vector comparisons
  2012-10-12 14:10 ` Constant-fold vector comparisons Marc Glisse
@ 2012-10-15 10:34   ` Richard Biener
  2012-10-15 14:19     ` Marc Glisse
  0 siblings, 1 reply; 16+ messages in thread
From: Richard Biener @ 2012-10-15 10:34 UTC (permalink / raw)
  To: Marc Glisse; +Cc: gcc-patches

On Fri, Oct 12, 2012 at 4:07 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Sat, 29 Sep 2012, Marc Glisse wrote:
>
>> 1) it handles constant folding of vector comparisons,
>>
>> 2) it fixes another place where vectors are not expected
>
>
> Here is a new version of this patch.
>
> In a first try, I got bitten by the operator priorities in "a && b?c:d",
> which g++ doesn't warn about.
>
>
> 2012-10-12  Marc Glisse  <marc.glisse@inria.fr>
>
> gcc/
>         * tree-ssa-forwprop.c (forward_propagate_into_cond): Handle vectors.
>
>         * fold-const.c (fold_relational_const): Handle VECTOR_CST.
>
> gcc/testsuite/
>         * gcc.dg/tree-ssa/foldconst-6.c: New testcase.
>
> --
> Marc Glisse
>
> Index: gcc/tree-ssa-forwprop.c
> ===================================================================
> --- gcc/tree-ssa-forwprop.c     (revision 192400)
> +++ gcc/tree-ssa-forwprop.c     (working copy)
> @@ -570,40 +570,43 @@ forward_propagate_into_cond (gimple_stmt
>        code = gimple_assign_rhs_code (def_stmt);
>        if (TREE_CODE_CLASS (code) == tcc_comparison)
>         tmp = fold_build2_loc (gimple_location (def_stmt),
>                                code,
>                                TREE_TYPE (cond),
>                                gimple_assign_rhs1 (def_stmt),
>                                gimple_assign_rhs2 (def_stmt));
>        else if ((code == BIT_NOT_EXPR
>                 && TYPE_PRECISION (TREE_TYPE (cond)) == 1)
>                || (code == BIT_XOR_EXPR
> -                  && integer_onep (gimple_assign_rhs2 (def_stmt))))
> +                  && ((gimple_assign_rhs_code (stmt) == VEC_COND_EXPR)
> +                      ? integer_all_onesp (gimple_assign_rhs2 (def_stmt))
> +                      : integer_onep (gimple_assign_rhs2 (def_stmt)))))

I don't think that we can do anything for vectors here.  The non-vector
path assumes that the type is a boolean type (thus two-valued), but
for vectors we can have arbitrary integer value input.  Thus, as we
defined true to -1 and false to 0 we cannot, unless relaxing what
VEC_COND_EXRP treats as true or false, optimize any of ~ or ^ -1
away.

Which means I'd prefer if you simply condition the existing ~ and ^
handling on COND_EXPR.

>         {
>           tmp = gimple_assign_rhs1 (def_stmt);
>           swap = true;
>         }
>      }
>
>    if (tmp
>        && is_gimple_condexpr (tmp))
>      {
>        if (dump_file && tmp)
>         {
>           fprintf (dump_file, "  Replaced '");
>           print_generic_expr (dump_file, cond, 0);
>           fprintf (dump_file, "' with '");
>           print_generic_expr (dump_file, tmp, 0);
>           fprintf (dump_file, "'\n");
>         }
>
> -      if (integer_onep (tmp))
> +      if ((gimple_assign_rhs_code (stmt) == VEC_COND_EXPR)
> +         ? integer_all_onesp (tmp) : integer_onep (tmp))

and cache gimple_assign_rhs_code as a 'code' variable at the beginning
of the function.

>         gimple_assign_set_rhs_from_tree (gsi_p, gimple_assign_rhs2 (stmt));
>        else if (integer_zerop (tmp))
>         gimple_assign_set_rhs_from_tree (gsi_p, gimple_assign_rhs3 (stmt));
>        else
>         {
>           gimple_assign_set_rhs1 (stmt, unshare_expr (tmp));
>           if (swap)
>             {
>               tree t = gimple_assign_rhs2 (stmt);
>               gimple_assign_set_rhs2 (stmt, gimple_assign_rhs3 (stmt));
> Index: gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c (revision 0)
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-ccp1" } */
> +
> +typedef long vec __attribute__ ((vector_size (2 * sizeof(long))));
> +
> +vec f ()
> +{
> +  vec a = { -2, 666 };
> +  vec b = { 3, 2 };
> +  return a < b;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "666" "ccp1"} } */
> +/* { dg-final { cleanup-tree-dump "ccp1" } } */
>
> Property changes on: gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c
> ___________________________________________________________________
> Added: svn:keywords
>    + Author Date Id Revision URL
> Added: svn:eol-style
>    + native
>
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c    (revision 192400)
> +++ gcc/fold-const.c    (working copy)
> @@ -16121,20 +16121,44 @@ fold_relational_const (enum tree_code co
>                                           TREE_IMAGPART (op0),
>                                           TREE_IMAGPART (op1));
>        if (code == EQ_EXPR)
>         return fold_build2 (TRUTH_ANDIF_EXPR, type, rcond, icond);
>        else if (code == NE_EXPR)
>         return fold_build2 (TRUTH_ORIF_EXPR, type, rcond, icond);
>        else
>         return NULL_TREE;
>      }
>
> +  if (TREE_CODE (op0) == VECTOR_CST && TREE_CODE (op1) == VECTOR_CST)
> +    {
> +      int count = VECTOR_CST_NELTS (op0);
> +      tree *elts =  XALLOCAVEC (tree, count);
> +      gcc_assert (TREE_CODE (type) == VECTOR_TYPE);

A better check would be that VECTOR_CST_NELTS of type is the same
as that of op0.

Ok with these changes.

Thanks,
Richard.

> +
> +      for (int i = 0; i < count; i++)
> +       {
> +         tree elem_type = TREE_TYPE (type);
> +         tree elem0 = VECTOR_CST_ELT (op0, i);
> +         tree elem1 = VECTOR_CST_ELT (op1, i);
> +
> +         tree tem = fold_relational_const (code, elem_type,
> +                                           elem0, elem1);
> +
> +         if (tem == NULL_TREE)
> +           return NULL_TREE;
> +
> +         elts[i] = build_int_cst (elem_type, integer_zerop (tem) ? 0 : -1);
> +       }
> +
> +      return build_vector (type, elts);
> +    }
> +
>    /* From here on we only handle LT, LE, GT, GE, EQ and NE.
>
>       To compute GT, swap the arguments and do LT.
>       To compute GE, do LT and invert the result.
>       To compute LE, swap the arguments, do LT and invert the result.
>       To compute NE, do EQ and invert the result.
>
>       Therefore, the code below must handle only EQ and LT.  */
>
>    if (code == LE_EXPR || code == GT_EXPR)
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: Constant-fold vector comparisons
  2012-10-15 10:34   ` Richard Biener
@ 2012-10-15 14:19     ` Marc Glisse
  2012-10-16 11:38       ` Richard Biener
  0 siblings, 1 reply; 16+ messages in thread
From: Marc Glisse @ 2012-10-15 14:19 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

On Mon, 15 Oct 2012, Richard Biener wrote:

>>        else if ((code == BIT_NOT_EXPR
>>                 && TYPE_PRECISION (TREE_TYPE (cond)) == 1)
>>                || (code == BIT_XOR_EXPR
>> -                  && integer_onep (gimple_assign_rhs2 (def_stmt))))
>> +                  && ((gimple_assign_rhs_code (stmt) == VEC_COND_EXPR)
>> +                      ? integer_all_onesp (gimple_assign_rhs2 (def_stmt))
>> +                      : integer_onep (gimple_assign_rhs2 (def_stmt)))))
>
> I don't think that we can do anything for vectors here.  The non-vector
> path assumes that the type is a boolean type (thus two-valued), but
> for vectors we can have arbitrary integer value input.

Actually, we just defined VEC_COND_EXPR as taking only vectors of -1 and 0 
as its first argument. So if it takes X^-1 or ~X as first argument (looks 
like I forgot the ~ case), then X is also a vector of -1 and 0.

I liked your idea of the signed boolean vector, as a way to express that 
we know some vector can only have values 0 and -1, but I am not sure how 
to use it.

> Thus, as we defined true to -1 and false to 0 we cannot, unless relaxing 
> what VEC_COND_EXRP treats as true or false, optimize any of ~ or ^ -1 
> away.

It seems to me that what prevents from optimizing is if we want to keep 
the door open for a future relaxation of what VEC_COND_EXPR accepts as its 
first argument. Which means: produce only -1 and 0, but don't assume we 
are only reading -1 and 0 (unless we have a reason to know it, for 
instance because it is the result of a comparison), and don't assume any 
specific interpretation on those other values. Not sure how much that 
limits possible optimizations.

> Which means I'd prefer if you simply condition the existing ~ and ^
> handling on COND_EXPR.

Ok, that situation probably won't happen soon anyway, I just wanted to do 
something while I was looking at this region of code.

Thanks,

-- 
Marc Glisse

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: Constant-fold vector comparisons
  2012-10-15 14:19     ` Marc Glisse
@ 2012-10-16 11:38       ` Richard Biener
  2012-10-22 20:33         ` Marc Glisse
  0 siblings, 1 reply; 16+ messages in thread
From: Richard Biener @ 2012-10-16 11:38 UTC (permalink / raw)
  To: Marc Glisse; +Cc: gcc-patches

On Mon, Oct 15, 2012 at 3:51 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Mon, 15 Oct 2012, Richard Biener wrote:
>
>>>        else if ((code == BIT_NOT_EXPR
>>>                 && TYPE_PRECISION (TREE_TYPE (cond)) == 1)
>>>                || (code == BIT_XOR_EXPR
>>> -                  && integer_onep (gimple_assign_rhs2 (def_stmt))))
>>> +                  && ((gimple_assign_rhs_code (stmt) == VEC_COND_EXPR)
>>> +                      ? integer_all_onesp (gimple_assign_rhs2
>>> (def_stmt))
>>> +                      : integer_onep (gimple_assign_rhs2 (def_stmt)))))
>>
>>
>> I don't think that we can do anything for vectors here.  The non-vector
>> path assumes that the type is a boolean type (thus two-valued), but
>> for vectors we can have arbitrary integer value input.
>
>
> Actually, we just defined VEC_COND_EXPR as taking only vectors of -1 and 0
> as its first argument. So if it takes X^-1 or ~X as first argument (looks
> like I forgot the ~ case), then X is also a vector of -1 and 0.

Ok, indeed.

> I liked your idea of the signed boolean vector, as a way to express that we
> know some vector can only have values 0 and -1, but I am not sure how to use
> it.

Ah no, I didn't mean to suggest that ;)

>
>> Thus, as we defined true to -1 and false to 0 we cannot, unless relaxing
>> what VEC_COND_EXRP treats as true or false, optimize any of ~ or ^ -1 away.
>
>
> It seems to me that what prevents from optimizing is if we want to keep the
> door open for a future relaxation of what VEC_COND_EXPR accepts as its first
> argument. Which means: produce only -1 and 0, but don't assume we are only
> reading -1 and 0 (unless we have a reason to know it, for instance because
> it is the result of a comparison), and don't assume any specific
> interpretation on those other values. Not sure how much that limits possible
> optimizations.

I'm not sure either - I'd rather leave the possibility open until we see
a compelling reason to go either way (read: a testcase where it matters
in practice).

>> Which means I'd prefer if you simply condition the existing ~ and ^
>> handling on COND_EXPR.
>
>
> Ok, that situation probably won't happen soon anyway, I just wanted to do
> something while I was looking at this region of code.

Yes, guarding against VEC_COND_EXPR is definitely needed.

Richard.

> Thanks,
>
> --
> Marc Glisse

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: Constant-fold vector comparisons
  2012-10-16 11:38       ` Richard Biener
@ 2012-10-22 20:33         ` Marc Glisse
  2012-10-23 10:05           ` Richard Biener
  0 siblings, 1 reply; 16+ messages in thread
From: Marc Glisse @ 2012-10-22 20:33 UTC (permalink / raw)
  To: Richard Biener; +Cc: gcc-patches

[-- Attachment #1: Type: TEXT/PLAIN, Size: 3128 bytes --]

On Mon, 15 Oct 2012, Richard Biener wrote:

> On Fri, Oct 12, 2012 at 4:07 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
>> On Sat, 29 Sep 2012, Marc Glisse wrote:
>>
>>> 1) it handles constant folding of vector comparisons,
>>>
>>> 2) it fixes another place where vectors are not expected
>>
>>
>> Here is a new version of this patch.
>>
>> In a first try, I got bitten by the operator priorities in "a && b?c:d",
>> which g++ doesn't warn about.
>>
>>
>> 2012-10-12  Marc Glisse  <marc.glisse@inria.fr>
>>
>> gcc/
>>         * tree-ssa-forwprop.c (forward_propagate_into_cond): Handle vectors.
>>
>>         * fold-const.c (fold_relational_const): Handle VECTOR_CST.
>>
>> gcc/testsuite/
>>         * gcc.dg/tree-ssa/foldconst-6.c: New testcase.

Here is a new version, with the same ChangeLog plus

 	* doc/generic.texi (VEC_COND_EXPR): Document current policy.

> Which means I'd prefer if you simply condition the existing ~ and ^
> handling on COND_EXPR.

Done.

>> -      if (integer_onep (tmp))
>> +      if ((gimple_assign_rhs_code (stmt) == VEC_COND_EXPR)
>> +         ? integer_all_onesp (tmp) : integer_onep (tmp))
>
> and cache gimple_assign_rhs_code as a 'code' variable at the beginning
> of the function.

Done.

>> +  if (TREE_CODE (op0) == VECTOR_CST && TREE_CODE (op1) == VECTOR_CST)
>> +    {
>> +      int count = VECTOR_CST_NELTS (op0);
>> +      tree *elts =  XALLOCAVEC (tree, count);
>> +      gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
>
> A better check would be that VECTOR_CST_NELTS of type is the same
> as that of op0.

I wasn't sure which check you meant, so I added both possibilities. I am 
fine with removing either or both, actually.

> Ok with these changes.

A few too many changes, I prefer to re-post, in case.


On Tue, 16 Oct 2012, Richard Biener wrote:

>> I liked your idea of the signed boolean vector, as a way to express that we
>> know some vector can only have values 0 and -1, but I am not sure how to use
>> it.
>
> Ah no, I didn't mean to suggest that ;)

Maybe you didn't, but I still took the idea from your words ;-)

>>> Thus, as we defined true to -1 and false to 0 we cannot, unless relaxing
>>> what VEC_COND_EXRP treats as true or false, optimize any of ~ or ^ -1 away.
>>
>> It seems to me that what prevents from optimizing is if we want to keep the
>> door open for a future relaxation of what VEC_COND_EXPR accepts as its first
>> argument. Which means: produce only -1 and 0, but don't assume we are only
>> reading -1 and 0 (unless we have a reason to know it, for instance because
>> it is the result of a comparison), and don't assume any specific
>> interpretation on those other values. Not sure how much that limits possible
>> optimizations.
>
> I'm not sure either - I'd rather leave the possibility open until we see
> a compelling reason to go either way (read: a testcase where it matters
> in practice).

Ok, I implemented the safe way. My current opinion is that we should go 
with a VEC_COND_EXPR that only accepts 0 and -1 (it is easy to pass a 
LT_EXPR or NE_EXPR as first argument if that is what one wants), but it 
can wait.

-- 
Marc Glisse

[-- Attachment #2: Type: TEXT/PLAIN, Size: 6830 bytes --]

Index: gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c
===================================================================
--- gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c	(revision 0)
+++ gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c	(revision 0)
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-ccp1" } */
+
+typedef long vec __attribute__ ((vector_size (2 * sizeof(long))));
+
+vec f ()
+{
+  vec a = { -2, 666 };
+  vec b = { 3, 2 };
+  return a < b;
+}
+
+/* { dg-final { scan-tree-dump-not "666" "ccp1"} } */
+/* { dg-final { cleanup-tree-dump "ccp1" } } */

Property changes on: gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native

Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 192695)
+++ gcc/fold-const.c	(working copy)
@@ -16123,20 +16123,45 @@ fold_relational_const (enum tree_code co
 					  TREE_IMAGPART (op0),
 					  TREE_IMAGPART (op1));
       if (code == EQ_EXPR)
 	return fold_build2 (TRUTH_ANDIF_EXPR, type, rcond, icond);
       else if (code == NE_EXPR)
 	return fold_build2 (TRUTH_ORIF_EXPR, type, rcond, icond);
       else
 	return NULL_TREE;
     }
 
+  if (TREE_CODE (op0) == VECTOR_CST && TREE_CODE (op1) == VECTOR_CST)
+    {
+      unsigned count = VECTOR_CST_NELTS (op0);
+      tree *elts =  XALLOCAVEC (tree, count);
+      gcc_assert (VECTOR_CST_NELTS (op1) == count);
+      gcc_assert (TYPE_VECTOR_SUBPARTS (type) == count);
+
+      for (unsigned i = 0; i < count; i++)
+	{
+	  tree elem_type = TREE_TYPE (type);
+	  tree elem0 = VECTOR_CST_ELT (op0, i);
+	  tree elem1 = VECTOR_CST_ELT (op1, i);
+
+	  tree tem = fold_relational_const (code, elem_type,
+					    elem0, elem1);
+
+	  if (tem == NULL_TREE)
+	    return NULL_TREE;
+
+	  elts[i] = build_int_cst (elem_type, integer_zerop (tem) ? 0 : -1);
+	}
+
+      return build_vector (type, elts);
+    }
+
   /* From here on we only handle LT, LE, GT, GE, EQ and NE.
 
      To compute GT, swap the arguments and do LT.
      To compute GE, do LT and invert the result.
      To compute LE, swap the arguments, do LT and invert the result.
      To compute NE, do EQ and invert the result.
 
      Therefore, the code below must handle only EQ and LT.  */
 
   if (code == LE_EXPR || code == GT_EXPR)
Index: gcc/doc/generic.texi
===================================================================
--- gcc/doc/generic.texi	(revision 192695)
+++ gcc/doc/generic.texi	(working copy)
@@ -1773,22 +1773,23 @@ elements of the two vectors are merged (
 vector.
 
 @item VEC_COND_EXPR
 These nodes represent @code{?:} expressions.  The three operands must be
 vectors of the same size and number of elements.  The second and third
 operands must have the same type as the entire expression.  The first
 operand is of signed integral vector type.  If an element of the first
 operand evaluates to a zero value, the corresponding element of the
 result is taken from the third operand. If it evaluates to a minus one
 value, it is taken from the second operand. It should never evaluate to
-any other value. In contrast with a @code{COND_EXPR}, all operands are
-always evaluated.
+any other value currently, but optimizations should not rely on that
+property. In contrast with a @code{COND_EXPR}, all operands are always
+evaluated.
 @end table
 
 
 @c ---------------------------------------------------------------------
 @c Statements
 @c ---------------------------------------------------------------------
 
 @node Statements
 @section Statements
 @cindex Statements
Index: gcc/tree-ssa-forwprop.c
===================================================================
--- gcc/tree-ssa-forwprop.c	(revision 192695)
+++ gcc/tree-ssa-forwprop.c	(working copy)
@@ -544,66 +544,69 @@ forward_propagate_into_gimple_cond (gimp
 /* Propagate from the ssa name definition statements of COND_EXPR
    in the rhs of statement STMT into the conditional if that simplifies it.
    Returns true zero if the stmt was changed.  */
 
 static bool
 forward_propagate_into_cond (gimple_stmt_iterator *gsi_p)
 {
   gimple stmt = gsi_stmt (*gsi_p);
   tree tmp = NULL_TREE;
   tree cond = gimple_assign_rhs1 (stmt);
+  enum tree_code code = gimple_assign_rhs_code (stmt);
   bool swap = false;
 
   /* We can do tree combining on SSA_NAME and comparison expressions.  */
   if (COMPARISON_CLASS_P (cond))
     tmp = forward_propagate_into_comparison_1 (stmt, TREE_CODE (cond),
 					       TREE_TYPE (cond),
 					       TREE_OPERAND (cond, 0),
 					       TREE_OPERAND (cond, 1));
   else if (TREE_CODE (cond) == SSA_NAME)
     {
-      enum tree_code code;
+      enum tree_code def_code;
       tree name = cond;
       gimple def_stmt = get_prop_source_stmt (name, true, NULL);
       if (!def_stmt || !can_propagate_from (def_stmt))
 	return 0;
 
-      code = gimple_assign_rhs_code (def_stmt);
-      if (TREE_CODE_CLASS (code) == tcc_comparison)
+      def_code = gimple_assign_rhs_code (def_stmt);
+      if (TREE_CODE_CLASS (def_code) == tcc_comparison)
 	tmp = fold_build2_loc (gimple_location (def_stmt),
-			       code,
+			       def_code,
 			       TREE_TYPE (cond),
 			       gimple_assign_rhs1 (def_stmt),
 			       gimple_assign_rhs2 (def_stmt));
-      else if ((code == BIT_NOT_EXPR
-		&& TYPE_PRECISION (TREE_TYPE (cond)) == 1)
-	       || (code == BIT_XOR_EXPR
-		   && integer_onep (gimple_assign_rhs2 (def_stmt))))
+      else if (code == COND_EXPR
+	       && ((def_code == BIT_NOT_EXPR
+		    && TYPE_PRECISION (TREE_TYPE (cond)) == 1)
+		   || (def_code == BIT_XOR_EXPR
+		       && integer_onep (gimple_assign_rhs2 (def_stmt)))))
 	{
 	  tmp = gimple_assign_rhs1 (def_stmt);
 	  swap = true;
 	}
     }
 
   if (tmp
       && is_gimple_condexpr (tmp))
     {
       if (dump_file && tmp)
 	{
 	  fprintf (dump_file, "  Replaced '");
 	  print_generic_expr (dump_file, cond, 0);
 	  fprintf (dump_file, "' with '");
 	  print_generic_expr (dump_file, tmp, 0);
 	  fprintf (dump_file, "'\n");
 	}
 
-      if (integer_onep (tmp))
+      if ((code == VEC_COND_EXPR) ? integer_all_onesp (tmp)
+				  : integer_onep (tmp))
 	gimple_assign_set_rhs_from_tree (gsi_p, gimple_assign_rhs2 (stmt));
       else if (integer_zerop (tmp))
 	gimple_assign_set_rhs_from_tree (gsi_p, gimple_assign_rhs3 (stmt));
       else
 	{
 	  gimple_assign_set_rhs1 (stmt, unshare_expr (tmp));
 	  if (swap)
 	    {
 	      tree t = gimple_assign_rhs2 (stmt);
 	      gimple_assign_set_rhs2 (stmt, gimple_assign_rhs3 (stmt));

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: Constant-fold vector comparisons
  2012-10-22 20:33         ` Marc Glisse
@ 2012-10-23 10:05           ` Richard Biener
  0 siblings, 0 replies; 16+ messages in thread
From: Richard Biener @ 2012-10-23 10:05 UTC (permalink / raw)
  To: Marc Glisse; +Cc: gcc-patches

On Mon, Oct 22, 2012 at 10:31 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> On Mon, 15 Oct 2012, Richard Biener wrote:
>
>> On Fri, Oct 12, 2012 at 4:07 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
>>>
>>> On Sat, 29 Sep 2012, Marc Glisse wrote:
>>>
>>>> 1) it handles constant folding of vector comparisons,
>>>>
>>>> 2) it fixes another place where vectors are not expected
>>>
>>>
>>>
>>> Here is a new version of this patch.
>>>
>>> In a first try, I got bitten by the operator priorities in "a && b?c:d",
>>> which g++ doesn't warn about.
>>>
>>>
>>> 2012-10-12  Marc Glisse  <marc.glisse@inria.fr>
>>>
>>> gcc/
>>>         * tree-ssa-forwprop.c (forward_propagate_into_cond): Handle
>>> vectors.
>>>
>>>         * fold-const.c (fold_relational_const): Handle VECTOR_CST.
>>>
>>> gcc/testsuite/
>>>         * gcc.dg/tree-ssa/foldconst-6.c: New testcase.
>
>
> Here is a new version, with the same ChangeLog plus
>
>         * doc/generic.texi (VEC_COND_EXPR): Document current policy.
>
>
>> Which means I'd prefer if you simply condition the existing ~ and ^
>> handling on COND_EXPR.
>
>
> Done.
>
>
>>> -      if (integer_onep (tmp))
>>> +      if ((gimple_assign_rhs_code (stmt) == VEC_COND_EXPR)
>>> +         ? integer_all_onesp (tmp) : integer_onep (tmp))
>>
>>
>> and cache gimple_assign_rhs_code as a 'code' variable at the beginning
>> of the function.
>
>
> Done.
>
>
>>> +  if (TREE_CODE (op0) == VECTOR_CST && TREE_CODE (op1) == VECTOR_CST)
>>> +    {
>>> +      int count = VECTOR_CST_NELTS (op0);
>>> +      tree *elts =  XALLOCAVEC (tree, count);
>>> +      gcc_assert (TREE_CODE (type) == VECTOR_TYPE);
>>
>>
>> A better check would be that VECTOR_CST_NELTS of type is the same
>> as that of op0.
>
>
> I wasn't sure which check you meant, so I added both possibilities. I am
> fine with removing either or both, actually.
>
>> Ok with these changes.
>
>
> A few too many changes, I prefer to re-post, in case.
>
>
>
> On Tue, 16 Oct 2012, Richard Biener wrote:
>
>>> I liked your idea of the signed boolean vector, as a way to express that
>>> we
>>> know some vector can only have values 0 and -1, but I am not sure how to
>>> use
>>> it.
>>
>>
>> Ah no, I didn't mean to suggest that ;)
>
>
> Maybe you didn't, but I still took the idea from your words ;-)
>
>
>>>> Thus, as we defined true to -1 and false to 0 we cannot, unless relaxing
>>>> what VEC_COND_EXRP treats as true or false, optimize any of ~ or ^ -1
>>>> away.
>>>
>>>
>>> It seems to me that what prevents from optimizing is if we want to keep
>>> the
>>> door open for a future relaxation of what VEC_COND_EXPR accepts as its
>>> first
>>> argument. Which means: produce only -1 and 0, but don't assume we are
>>> only
>>> reading -1 and 0 (unless we have a reason to know it, for instance
>>> because
>>> it is the result of a comparison), and don't assume any specific
>>> interpretation on those other values. Not sure how much that limits
>>> possible
>>> optimizations.
>>
>>
>> I'm not sure either - I'd rather leave the possibility open until we see
>> a compelling reason to go either way (read: a testcase where it matters
>> in practice).
>
>
> Ok, I implemented the safe way. My current opinion is that we should go with
> a VEC_COND_EXPR that only accepts 0 and -1 (it is easy to pass a LT_EXPR or
> NE_EXPR as first argument if that is what one wants), but it can wait.

Ok with ...

> --
> Marc Glisse
> Index: gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c (revision 0)
> +++ gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c (revision 0)
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O -fdump-tree-ccp1" } */
> +
> +typedef long vec __attribute__ ((vector_size (2 * sizeof(long))));
> +
> +vec f ()
> +{
> +  vec a = { -2, 666 };
> +  vec b = { 3, 2 };
> +  return a < b;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "666" "ccp1"} } */
> +/* { dg-final { cleanup-tree-dump "ccp1" } } */
>
> Property changes on: gcc/testsuite/gcc.dg/tree-ssa/foldconst-6.c
> ___________________________________________________________________
> Added: svn:keywords
>    + Author Date Id Revision URL
> Added: svn:eol-style
>    + native
>
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c    (revision 192695)
> +++ gcc/fold-const.c    (working copy)
> @@ -16123,20 +16123,45 @@ fold_relational_const (enum tree_code co
>                                           TREE_IMAGPART (op0),
>                                           TREE_IMAGPART (op1));
>        if (code == EQ_EXPR)
>         return fold_build2 (TRUTH_ANDIF_EXPR, type, rcond, icond);
>        else if (code == NE_EXPR)
>         return fold_build2 (TRUTH_ORIF_EXPR, type, rcond, icond);
>        else
>         return NULL_TREE;
>      }
>
> +  if (TREE_CODE (op0) == VECTOR_CST && TREE_CODE (op1) == VECTOR_CST)
> +    {
> +      unsigned count = VECTOR_CST_NELTS (op0);
> +      tree *elts =  XALLOCAVEC (tree, count);
> +      gcc_assert (VECTOR_CST_NELTS (op1) == count);
> +      gcc_assert (TYPE_VECTOR_SUBPARTS (type) == count);

gcc_assert (VECTOR_CST_NELTS (op1) == count
                   && TYPE_VECTOR_SUBPARTS (type) == count);

Thanks,
Richard.

> +
> +      for (unsigned i = 0; i < count; i++)
> +       {
> +         tree elem_type = TREE_TYPE (type);
> +         tree elem0 = VECTOR_CST_ELT (op0, i);
> +         tree elem1 = VECTOR_CST_ELT (op1, i);
> +
> +         tree tem = fold_relational_const (code, elem_type,
> +                                           elem0, elem1);
> +
> +         if (tem == NULL_TREE)
> +           return NULL_TREE;
> +
> +         elts[i] = build_int_cst (elem_type, integer_zerop (tem) ? 0 : -1);
> +       }
> +
> +      return build_vector (type, elts);
> +    }
> +
>    /* From here on we only handle LT, LE, GT, GE, EQ and NE.
>
>       To compute GT, swap the arguments and do LT.
>       To compute GE, do LT and invert the result.
>       To compute LE, swap the arguments, do LT and invert the result.
>       To compute NE, do EQ and invert the result.
>
>       Therefore, the code below must handle only EQ and LT.  */
>
>    if (code == LE_EXPR || code == GT_EXPR)
> Index: gcc/doc/generic.texi
> ===================================================================
> --- gcc/doc/generic.texi        (revision 192695)
> +++ gcc/doc/generic.texi        (working copy)
> @@ -1773,22 +1773,23 @@ elements of the two vectors are merged (
>  vector.
>
>  @item VEC_COND_EXPR
>  These nodes represent @code{?:} expressions.  The three operands must be
>  vectors of the same size and number of elements.  The second and third
>  operands must have the same type as the entire expression.  The first
>  operand is of signed integral vector type.  If an element of the first
>  operand evaluates to a zero value, the corresponding element of the
>  result is taken from the third operand. If it evaluates to a minus one
>  value, it is taken from the second operand. It should never evaluate to
> -any other value. In contrast with a @code{COND_EXPR}, all operands are
> -always evaluated.
> +any other value currently, but optimizations should not rely on that
> +property. In contrast with a @code{COND_EXPR}, all operands are always
> +evaluated.
>  @end table
>
>
>  @c ---------------------------------------------------------------------
>  @c Statements
>  @c ---------------------------------------------------------------------
>
>  @node Statements
>  @section Statements
>  @cindex Statements
> Index: gcc/tree-ssa-forwprop.c
> ===================================================================
> --- gcc/tree-ssa-forwprop.c     (revision 192695)
> +++ gcc/tree-ssa-forwprop.c     (working copy)
> @@ -544,66 +544,69 @@ forward_propagate_into_gimple_cond (gimp
>  /* Propagate from the ssa name definition statements of COND_EXPR
>     in the rhs of statement STMT into the conditional if that simplifies it.
>     Returns true zero if the stmt was changed.  */
>
>  static bool
>  forward_propagate_into_cond (gimple_stmt_iterator *gsi_p)
>  {
>    gimple stmt = gsi_stmt (*gsi_p);
>    tree tmp = NULL_TREE;
>    tree cond = gimple_assign_rhs1 (stmt);
> +  enum tree_code code = gimple_assign_rhs_code (stmt);
>    bool swap = false;
>
>    /* We can do tree combining on SSA_NAME and comparison expressions.  */
>    if (COMPARISON_CLASS_P (cond))
>      tmp = forward_propagate_into_comparison_1 (stmt, TREE_CODE (cond),
>                                                TREE_TYPE (cond),
>                                                TREE_OPERAND (cond, 0),
>                                                TREE_OPERAND (cond, 1));
>    else if (TREE_CODE (cond) == SSA_NAME)
>      {
> -      enum tree_code code;
> +      enum tree_code def_code;
>        tree name = cond;
>        gimple def_stmt = get_prop_source_stmt (name, true, NULL);
>        if (!def_stmt || !can_propagate_from (def_stmt))
>         return 0;
>
> -      code = gimple_assign_rhs_code (def_stmt);
> -      if (TREE_CODE_CLASS (code) == tcc_comparison)
> +      def_code = gimple_assign_rhs_code (def_stmt);
> +      if (TREE_CODE_CLASS (def_code) == tcc_comparison)
>         tmp = fold_build2_loc (gimple_location (def_stmt),
> -                              code,
> +                              def_code,
>                                TREE_TYPE (cond),
>                                gimple_assign_rhs1 (def_stmt),
>                                gimple_assign_rhs2 (def_stmt));
> -      else if ((code == BIT_NOT_EXPR
> -               && TYPE_PRECISION (TREE_TYPE (cond)) == 1)
> -              || (code == BIT_XOR_EXPR
> -                  && integer_onep (gimple_assign_rhs2 (def_stmt))))
> +      else if (code == COND_EXPR
> +              && ((def_code == BIT_NOT_EXPR
> +                   && TYPE_PRECISION (TREE_TYPE (cond)) == 1)
> +                  || (def_code == BIT_XOR_EXPR
> +                      && integer_onep (gimple_assign_rhs2 (def_stmt)))))
>         {
>           tmp = gimple_assign_rhs1 (def_stmt);
>           swap = true;
>         }
>      }
>
>    if (tmp
>        && is_gimple_condexpr (tmp))
>      {
>        if (dump_file && tmp)
>         {
>           fprintf (dump_file, "  Replaced '");
>           print_generic_expr (dump_file, cond, 0);
>           fprintf (dump_file, "' with '");
>           print_generic_expr (dump_file, tmp, 0);
>           fprintf (dump_file, "'\n");
>         }
>
> -      if (integer_onep (tmp))
> +      if ((code == VEC_COND_EXPR) ? integer_all_onesp (tmp)
> +                                 : integer_onep (tmp))
>         gimple_assign_set_rhs_from_tree (gsi_p, gimple_assign_rhs2 (stmt));
>        else if (integer_zerop (tmp))
>         gimple_assign_set_rhs_from_tree (gsi_p, gimple_assign_rhs3 (stmt));
>        else
>         {
>           gimple_assign_set_rhs1 (stmt, unshare_expr (tmp));
>           if (swap)
>             {
>               tree t = gimple_assign_rhs2 (stmt);
>               gimple_assign_set_rhs2 (stmt, gimple_assign_rhs3 (stmt));
>

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2012-10-23  9:55 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-09-29 15:16 Constant-fold vector comparisons Marc Glisse
2012-10-01 11:40 ` Richard Guenther
2012-10-01 15:58   ` vec_cond_expr adjustments Marc Glisse
2012-10-02 12:41     ` Richard Guenther
2012-10-05 15:01       ` Marc Glisse
2012-10-08  9:04         ` Richard Guenther
2012-10-08  9:34           ` Marc Glisse
2012-10-08  9:44             ` Richard Guenther
2012-10-10 23:37               ` Marc Glisse
2012-10-11 12:58                 ` Richard Biener
2012-10-12 14:10 ` Constant-fold vector comparisons Marc Glisse
2012-10-15 10:34   ` Richard Biener
2012-10-15 14:19     ` Marc Glisse
2012-10-16 11:38       ` Richard Biener
2012-10-22 20:33         ` Marc Glisse
2012-10-23 10:05           ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).