public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] Optimize some loops using bool types (PR tree-optimization/50596)
@ 2011-10-12 16:28 Jakub Jelinek
  2011-10-16 10:06 ` Ira Rosen
  0 siblings, 1 reply; 3+ messages in thread
From: Jakub Jelinek @ 2011-10-12 16:28 UTC (permalink / raw)
  To: Ira Rosen, Richard Guenther; +Cc: gcc-patches

Hi!

This patch allows vectorization of some loops that use
bool (which is especially important now that we use bool more often
even for stmts that weren't originally using bool in the sources),
in particular (when bool is cast to an integer type, and the bool rhs
has def stmts within the loop as either BIT_{AND,IOR,XOR}_EXPR,
or just SSA_NAME assigns or bool -> another bool casts, or comparisons
(tested recursively).  In that case the pattern recognizer transforms
the comparisons into COND_EXPRs using suitable integer type (the same width
as the comparison operands) and other bools to suitable integer types
with casts added where needed.

The patch doesn't yet handle vectorization of storing into a bool array,
I'll work on that later.

Bootstrapped/regtested on x86_64-linux and i686-linux.  Ok for trunk?

2011-10-12  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/50596
	* tree-vectorizer.h (NUM_PATTERNS): Increase to 7.
	* tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add
	vect_recog_bool_pattern.
	(check_bool_pattern, adjust_bool_pattern_cast,
	adjust_bool_pattern, vect_recog_bool_pattern): New functions.

	* gcc.dg/vect/vect-cond-9.c: New test.

--- gcc/tree-vectorizer.h.jj	2011-10-10 09:41:29.000000000 +0200
+++ gcc/tree-vectorizer.h	2011-10-10 10:12:03.000000000 +0200
@@ -902,7 +902,7 @@ extern void vect_slp_transform_bb (basic
    Additional pattern recognition functions can (and will) be added
    in the future.  */
 typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
-#define NUM_PATTERNS 6
+#define NUM_PATTERNS 7
 void vect_pattern_recog (loop_vec_info);
 
 /* In tree-vectorizer.c.  */
--- gcc/tree-vect-patterns.c.jj	2011-10-10 09:41:29.000000000 +0200
+++ gcc/tree-vect-patterns.c	2011-10-10 18:23:41.000000000 +0200
@@ -51,13 +51,15 @@ static gimple vect_recog_over_widening_p
                                                  tree *);
 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
 						  tree *, tree *);
+static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
 	vect_recog_widen_mult_pattern,
 	vect_recog_widen_sum_pattern,
 	vect_recog_dot_prod_pattern,
 	vect_recog_pow_pattern,
 	vect_recog_over_widening_pattern,
-	vect_recog_mixed_size_cond_pattern};
+	vect_recog_mixed_size_cond_pattern,
+	vect_recog_bool_pattern};
 
 
 /* Function widened_name_p
@@ -1068,10 +1070,8 @@ vect_operation_fits_smaller_type (gimple
    constants.
    Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
    be 'type' or some intermediate type.  For now, we expect S5 to be a type
-   demotion operation.  We also check that S3 and S4 have only one use.
-.
+   demotion operation.  We also check that S3 and S4 have only one use.  */
 
-*/
 static gimple
 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
                                   tree *type_in, tree *type_out)
@@ -1333,6 +1333,356 @@ vect_recog_mixed_size_cond_pattern (VEC 
 }
 
 
+/* Helper function of vect_recog_bool_pattern.  Called recursively, return
+   true if bool VAR can be optimized that way.  */
+
+static bool
+check_bool_pattern (tree var, loop_vec_info loop_vinfo)
+{
+  gimple def_stmt;
+  enum vect_def_type dt;
+  tree def, rhs1;
+  enum tree_code rhs_code;
+
+  if (!vect_is_simple_use (var, loop_vinfo, NULL, &def_stmt, &def, &dt))
+    return false;
+
+  if (dt != vect_internal_def)
+    return false;
+
+  if (!is_gimple_assign (def_stmt))
+    return false;
+
+  if (!has_single_use (def))
+    return false;
+
+  rhs1 = gimple_assign_rhs1 (def_stmt);
+  rhs_code = gimple_assign_rhs_code (def_stmt);
+  switch (rhs_code)
+    {
+    case SSA_NAME:
+      return check_bool_pattern (rhs1, loop_vinfo);
+
+    CASE_CONVERT:
+      if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
+	   || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
+	  && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
+	return false;
+      return check_bool_pattern (rhs1, loop_vinfo);
+
+    case BIT_NOT_EXPR:
+      return check_bool_pattern (rhs1, loop_vinfo);
+
+    case BIT_AND_EXPR:
+    case BIT_IOR_EXPR:
+    case BIT_XOR_EXPR:
+      if (!check_bool_pattern (rhs1, loop_vinfo))
+	return false;
+      return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo);
+
+    default:
+      if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
+	{
+	  tree vecitype, comp_vectype;
+
+	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
+	  if (comp_vectype == NULL_TREE)
+	    return false;
+
+	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
+	    {
+	      enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
+	      tree itype
+		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0);
+	      vecitype = get_vectype_for_scalar_type (itype);
+	      if (vecitype == NULL_TREE)
+		return false;
+	    }
+	  else
+	    vecitype = comp_vectype;
+	  return expand_vec_cond_expr_p (vecitype, comp_vectype);
+	}
+      return false;
+    }
+}
+
+
+/* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
+   stmt (SSA_NAME_DEF_STMT of VAR), but moving the COND_EXPR from RELATED_STMT
+   to PATTERN_DEF_STMT and adding a cast as RELATED_STMT.  */
+
+static tree
+adjust_bool_pattern_cast (tree type, tree var)
+{
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
+  gimple cast_stmt, pattern_stmt;
+
+  gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo));
+  pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
+  STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = pattern_stmt;
+  cast_stmt
+    = gimple_build_assign_with_ops (NOP_EXPR,
+				    vect_recog_temp_ssa_var (type, NULL),
+				    gimple_assign_lhs (pattern_stmt),
+				    NULL_TREE);
+  STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
+  return gimple_assign_lhs (cast_stmt);
+}
+
+
+/* Helper function of vect_recog_bool_pattern.  Do the actual transformations,
+   recursively.  VAR is an SSA_NAME that should be transformed from bool
+   to a wider integer type, OUT_TYPE is the desired final integer type of
+   the whole pattern, TRUEVAL should be NULL unless optimizing
+   BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
+   in the then_clause, STMTS is where statements with added pattern stmts
+   should be pushed to.  */
+
+static tree
+adjust_bool_pattern (tree var, tree out_type, tree trueval,
+		     VEC (gimple, heap) **stmts)
+{
+  gimple stmt = SSA_NAME_DEF_STMT (var);
+  enum tree_code rhs_code, def_rhs_code;
+  tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
+  location_t loc;
+  gimple pattern_stmt, def_stmt;
+
+  rhs1 = gimple_assign_rhs1 (stmt);
+  rhs2 = gimple_assign_rhs2 (stmt);
+  rhs_code = gimple_assign_rhs_code (stmt);
+  loc = gimple_location (stmt);
+  switch (rhs_code)
+    {
+    case SSA_NAME:
+    CASE_CONVERT:
+      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
+      itype = TREE_TYPE (irhs1);
+      pattern_stmt
+	= gimple_build_assign_with_ops (SSA_NAME,
+					vect_recog_temp_ssa_var (itype, NULL),
+					irhs1, NULL_TREE);
+      break;
+
+    case BIT_NOT_EXPR:
+      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
+      itype = TREE_TYPE (irhs1);
+      pattern_stmt
+	= gimple_build_assign_with_ops (BIT_XOR_EXPR,
+					vect_recog_temp_ssa_var (itype, NULL),
+					irhs1, build_int_cst (itype, 1));
+      break;
+
+    case BIT_AND_EXPR:
+      /* Try to optimize x = y & (a < b ? 1 : 0); into
+	 x = (a < b ? y : 0);  */
+      def_stmt = SSA_NAME_DEF_STMT (rhs2);
+      def_rhs_code = gimple_assign_rhs_code (def_stmt);
+      if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
+	{
+	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
+	  irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
+	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
+	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
+	    {
+	      gimple tstmt;
+	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
+	      irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
+	      tstmt = VEC_pop (gimple, *stmts);
+	      gcc_assert (tstmt == def_stmt);
+	      VEC_quick_push (gimple, *stmts, stmt);
+	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
+		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
+	      gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo));
+	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
+	      return irhs2;
+	    }
+	  else
+	    irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
+	  goto and_ior_xor;
+	}
+      def_stmt = SSA_NAME_DEF_STMT (rhs1);
+      def_rhs_code = gimple_assign_rhs_code (def_stmt);
+      if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
+	{
+	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
+	  irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
+	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
+	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
+	    {
+	      gimple tstmt;
+	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
+	      irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
+	      tstmt = VEC_pop (gimple, *stmts);
+	      gcc_assert (tstmt == def_stmt);
+	      VEC_quick_push (gimple, *stmts, stmt);
+	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
+		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
+	      gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo));
+	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
+	      return irhs1;
+	    }
+	  else
+	    irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
+	  goto and_ior_xor;
+	}
+      /* FALLTHRU */
+    case BIT_IOR_EXPR:
+    case BIT_XOR_EXPR:
+      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
+      irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
+    and_ior_xor:
+      if (TYPE_PRECISION (TREE_TYPE (irhs1))
+	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
+	{
+	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
+	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
+	  int out_prec = TYPE_PRECISION (out_type);
+	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
+	    irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
+	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
+	    irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
+	  else
+	    {
+	      irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
+	      irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
+	    }
+	}
+      itype = TREE_TYPE (irhs1);
+      pattern_stmt
+	= gimple_build_assign_with_ops (rhs_code,
+					vect_recog_temp_ssa_var (itype, NULL),
+					irhs1, irhs2);
+      break;
+
+    default:
+      gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
+      if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
+	  || TYPE_UNSIGNED (TREE_TYPE (rhs1)))
+	{
+	  enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
+	  itype
+	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0);
+	}
+      else
+	itype = TREE_TYPE (rhs1);
+      cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
+      if (trueval == NULL_TREE)
+	trueval = build_int_cst (itype, 1);
+      else
+	gcc_checking_assert (useless_type_conversion_p (itype,
+							TREE_TYPE (trueval)));
+      pattern_stmt
+	= gimple_build_assign_with_ops3 (COND_EXPR,
+					 vect_recog_temp_ssa_var (itype, NULL),
+					 cond_expr, trueval,
+					 build_int_cst (itype, 0));
+      break;
+    }
+
+  VEC_safe_push (gimple, heap, *stmts, stmt);
+  gimple_set_location (pattern_stmt, loc);
+  STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
+  return gimple_assign_lhs (pattern_stmt);
+}
+
+
+/* Function vect_recog_bool_pattern
+
+   Try to find pattern like following:
+
+     bool a_b, b_b, c_b, d_b, e_b;
+     TYPE f_T;
+   loop:
+     S1  a_b = x1 CMP1 y1;
+     S2  b_b = x2 CMP2 y2;
+     S3  c_b = a_b & b_b;
+     S4  d_b = x3 CMP3 y3;
+     S5  e_b = c_b | d_b;
+     S6  f_T = (TYPE) e_b;
+
+   where type 'TYPE' is an integral type.
+
+   Input:
+
+   * LAST_STMT: A stmt at the end from which the pattern
+		search begins, i.e. cast of a bool to
+		an integer type.
+
+   Output:
+
+   * TYPE_IN: The type of the input arguments to the pattern.
+
+   * TYPE_OUT: The type of the output of this pattern.
+
+   * Return value: A new stmt that will be used to replace the pattern.
+
+	Assuming size of TYPE is the same as size of all comparisons
+	(otherwise some casts would be added where needed), the above
+	sequence we create related pattern stmts:
+	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
+	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
+	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
+	S5'  e_T = c_T | d_T;
+	S6'  f_T = e_T;
+
+	Instead of the above S3' we could emit:
+	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
+	S3'  c_T = a_T | b_T;
+	but the above is more efficient.  */
+
+static gimple
+vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
+			 tree *type_out)
+{
+  gimple last_stmt = VEC_pop (gimple, *stmts);
+  enum tree_code rhs_code;
+  tree var, lhs, rhs, vectype;
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+  gimple pattern_stmt;
+
+  if (!is_gimple_assign (last_stmt))
+    return NULL;
+
+  var = gimple_assign_rhs1 (last_stmt);
+  lhs = gimple_assign_lhs (last_stmt);
+
+  if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
+       || !TYPE_UNSIGNED (TREE_TYPE (var)))
+      && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
+    return NULL;
+
+  rhs_code = gimple_assign_rhs_code (last_stmt);
+  if (CONVERT_EXPR_CODE_P (rhs_code))
+    {
+      if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE)
+	return NULL;
+      vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
+      if (vectype == NULL_TREE)
+	return NULL;
+
+      if (!check_bool_pattern (var, loop_vinfo))
+	return NULL;
+
+      rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
+      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
+      if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
+	pattern_stmt
+	  = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
+      else
+	pattern_stmt
+	  = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
+      *type_out = vectype;
+      *type_in = vectype;
+      VEC_safe_push (gimple, heap, *stmts, last_stmt);
+      return pattern_stmt;
+    }
+  else
+    return NULL;
+}
+
+
 /* Mark statements that are involved in a pattern.  */
 
 static inline void
--- gcc/testsuite/gcc.dg/vect/vect-cond-9.c.jj	2011-10-12 14:39:13.000000000 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-cond-9.c	2011-10-12 14:56:17.000000000 +0200
@@ -0,0 +1,200 @@
+/* { dg-require-effective-target vect_cond_mixed } */
+
+#include "tree-vect.h"
+
+#define N 1024
+float a[N], b[N], c[N], d[N];
+int j[N];
+unsigned char k[N];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      unsigned int x = a[i] < b[i] ? -1 : 0;
+      unsigned int y = c[i] < d[i] ? -1 : 0;
+      j[i] = (x & y) >> 31;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      j[i] = x & y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f3 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    j[i] = (a[i] < b[i]) & (c[i] < d[i]);
+}
+
+__attribute__((noinline, noclone)) void
+f4 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      k[i] = x & y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f5 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    k[i] = (a[i] < b[i]) & (c[i] < d[i]);
+}
+
+__attribute__((noinline, noclone)) void
+f6 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      unsigned int x = a[i] < b[i] ? -1 : 0;
+      unsigned int y = c[i] < d[i] ? -1 : 0;
+      j[i] = (x | y) >> 31;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f7 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      j[i] = x | y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f8 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    j[i] = (a[i] < b[i]) | (c[i] < d[i]);
+}
+
+__attribute__((noinline, noclone)) void
+f9 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      k[i] = x | y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f10 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    k[i] = (a[i] < b[i]) | (c[i] < d[i]);
+}
+
+int
+main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      switch (i % 9)
+	{
+	case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
+	case 1: a[i] = 0; b[i] = 0; break;
+	case 2: a[i] = i + 1; b[i] = - i - 1; break;
+	case 3: a[i] = i; b[i] = i + 7; break;
+	case 4: a[i] = i; b[i] = i; break;
+	case 5: a[i] = i + 16; b[i] = i + 3; break;
+	case 6: a[i] = - i - 5; b[i] = - i; break;
+	case 7: a[i] = - i; b[i] = - i; break;
+	case 8: a[i] = - i; b[i] = - i - 7; break;
+	}
+    }
+  for (i = 0; i < N; i++)
+    {
+      switch ((i / 9) % 3)
+	{
+	case 0: c[i] = a[i / 9]; d[i] = b[i / 9]; break;
+	case 1: c[i] = a[i / 9 + 3]; d[i] = b[i / 9 + 3]; break;
+	case 2: c[i] = a[i / 9 + 6]; d[i] = b[i / 9 + 6]; break;
+	}
+    }
+  f1 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (j, -6, sizeof (j));
+  f2 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (j, -6, sizeof (j));
+  f3 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (j, -6, sizeof (j));
+  f4 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, -6, sizeof (k));
+  f5 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, -6, sizeof (k));
+  f6 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (j, -6, sizeof (j));
+  f7 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (j, -6, sizeof (j));
+  f8 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (j, -6, sizeof (j));
+  f9 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, -6, sizeof (k));
+  f10 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, -6, sizeof (k));
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 10 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */

	Jakub

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Optimize some loops using bool types (PR tree-optimization/50596)
  2011-10-12 16:28 [PATCH] Optimize some loops using bool types (PR tree-optimization/50596) Jakub Jelinek
@ 2011-10-16 10:06 ` Ira Rosen
  2011-10-16 13:48   ` Jakub Jelinek
  0 siblings, 1 reply; 3+ messages in thread
From: Ira Rosen @ 2011-10-16 10:06 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Richard Guenther, gcc-patches

On 12 October 2011 17:54, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!

Hi,

>
> This patch allows vectorization of some loops that use
> bool (which is especially important now that we use bool more often
> even for stmts that weren't originally using bool in the sources),
> in particular (when bool is cast to an integer type, and the bool rhs
> has def stmts within the loop as either BIT_{AND,IOR,XOR}_EXPR,
> or just SSA_NAME assigns or bool -> another bool casts, or comparisons
> (tested recursively).  In that case the pattern recognizer transforms
> the comparisons into COND_EXPRs using suitable integer type (the same width
> as the comparison operands) and other bools to suitable integer types
> with casts added where needed.
>
> The patch doesn't yet handle vectorization of storing into a bool array,
> I'll work on that later.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux.  Ok for trunk?

OK with:

> +
> +/* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
> +   stmt (SSA_NAME_DEF_STMT of VAR), but moving the COND_EXPR from RELATED_STMT

by moving?

> +   to PATTERN_DEF_STMT and adding a cast as RELATED_STMT.  */
> +
> +static tree
> +adjust_bool_pattern_cast (tree type, tree var)
> +{
> +  stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
> +  gimple cast_stmt, pattern_stmt;
> +
> +  gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo));
> +  pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
> +  STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = pattern_stmt;
> +  cast_stmt
> +    = gimple_build_assign_with_ops (NOP_EXPR,
> +                                   vect_recog_temp_ssa_var (type, NULL),
> +                                   gimple_assign_lhs (pattern_stmt),
> +                                   NULL_TREE);
> +  STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
> +  return gimple_assign_lhs (cast_stmt);
> +}
> +
> +
> +/* Helper function of vect_recog_bool_pattern.  Do the actual transformations,
> +   recursively.  VAR is an SSA_NAME that should be transformed from bool
> +   to a wider integer type, OUT_TYPE is the desired final integer type of
> +   the whole pattern, TRUEVAL should be NULL unless optimizing
> +   BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
> +   in the then_clause, STMTS is where statements with added pattern stmts
> +   should be pushed to.  */
> +
> +static tree
> +adjust_bool_pattern (tree var, tree out_type, tree trueval,
> +                    VEC (gimple, heap) **stmts)
> +{
> +  gimple stmt = SSA_NAME_DEF_STMT (var);
> +  enum tree_code rhs_code, def_rhs_code;
> +  tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
> +  location_t loc;
> +  gimple pattern_stmt, def_stmt;
> +
> +  rhs1 = gimple_assign_rhs1 (stmt);
> +  rhs2 = gimple_assign_rhs2 (stmt);
> +  rhs_code = gimple_assign_rhs_code (stmt);
> +  loc = gimple_location (stmt);
> +  switch (rhs_code)
> +    {
> +    case SSA_NAME:
> +    CASE_CONVERT:
> +      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
> +      itype = TREE_TYPE (irhs1);
> +      pattern_stmt
> +       = gimple_build_assign_with_ops (SSA_NAME,
> +                                       vect_recog_temp_ssa_var (itype, NULL),
> +                                       irhs1, NULL_TREE);
> +      break;
> +
> +    case BIT_NOT_EXPR:
> +      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
> +      itype = TREE_TYPE (irhs1);
> +      pattern_stmt
> +       = gimple_build_assign_with_ops (BIT_XOR_EXPR,
> +                                       vect_recog_temp_ssa_var (itype, NULL),
> +                                       irhs1, build_int_cst (itype, 1));
> +      break;
> +
> +    case BIT_AND_EXPR:
> +      /* Try to optimize x = y & (a < b ? 1 : 0); into
> +        x = (a < b ? y : 0);  */

Could you please add some more explanations here? I found it very
difficult to follow. It would be nice to have an example here (similar
to vect_recog_bool_pattern) to illustrate what these statements and
operands are.


> +      def_stmt = SSA_NAME_DEF_STMT (rhs2);
> +      def_rhs_code = gimple_assign_rhs_code (def_stmt);
> +      if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
> +       {
> +         tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
> +         irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
> +         if (TYPE_PRECISION (TREE_TYPE (irhs1))
> +             == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
> +           {
> +             gimple tstmt;
> +             stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
> +             irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
> +             tstmt = VEC_pop (gimple, *stmts);
> +             gcc_assert (tstmt == def_stmt);
> +             VEC_quick_push (gimple, *stmts, stmt);
> +             STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
> +               = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
> +             gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo));
> +             STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
> +             return irhs2;
> +           }
> +         else
> +           irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
> +         goto and_ior_xor;
> +       }
> +      def_stmt = SSA_NAME_DEF_STMT (rhs1);
> +      def_rhs_code = gimple_assign_rhs_code (def_stmt);
> +      if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
> +       {
> +         tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
> +         irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
> +         if (TYPE_PRECISION (TREE_TYPE (irhs2))
> +             == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
> +           {
> +             gimple tstmt;
> +             stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
> +             irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
> +             tstmt = VEC_pop (gimple, *stmts);
> +             gcc_assert (tstmt == def_stmt);
> +             VEC_quick_push (gimple, *stmts, stmt);
> +             STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
> +               = STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
> +             gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo));
> +             STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
> +             return irhs1;
> +           }
> +         else
> +           irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
> +         goto and_ior_xor;
> +       }
> +      /* FALLTHRU */

Thanks,
Ira

^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] Optimize some loops using bool types (PR tree-optimization/50596)
  2011-10-16 10:06 ` Ira Rosen
@ 2011-10-16 13:48   ` Jakub Jelinek
  0 siblings, 0 replies; 3+ messages in thread
From: Jakub Jelinek @ 2011-10-16 13:48 UTC (permalink / raw)
  To: Ira Rosen; +Cc: Richard Guenther, gcc-patches

On Sun, Oct 16, 2011 at 09:56:57AM +0200, Ira Rosen wrote:
> by moving?

Yeah.

> Could you please add some more explanations here? I found it very
> difficult to follow. It would be nice to have an example here (similar
> to vect_recog_bool_pattern) to illustrate what these statements and
> operands are.

Done.  Here is what I've committed.  Thanks for the review.

2011-10-16  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/50596
	* tree-vectorizer.h (NUM_PATTERNS): Increase to 7.
	* tree-vect-patterns.c (vect_vect_recog_func_ptrs): Add
	vect_recog_bool_pattern.
	(check_bool_pattern, adjust_bool_pattern_cast,
	adjust_bool_pattern, vect_recog_bool_pattern): New functions.

	* gcc.dg/vect/vect-cond-9.c: New test.

--- gcc/tree-vectorizer.h.jj	2011-10-12 21:12:54.822647510 +0200
+++ gcc/tree-vectorizer.h	2011-10-16 14:54:40.571765729 +0200
@@ -902,7 +902,7 @@ extern void vect_slp_transform_bb (basic
    Additional pattern recognition functions can (and will) be added
    in the future.  */
 typedef gimple (* vect_recog_func_ptr) (VEC (gimple, heap) **, tree *, tree *);
-#define NUM_PATTERNS 6
+#define NUM_PATTERNS 7
 void vect_pattern_recog (loop_vec_info);
 
 /* In tree-vectorizer.c.  */
--- gcc/tree-vect-patterns.c.jj	2011-10-12 21:12:54.884989937 +0200
+++ gcc/tree-vect-patterns.c	2011-10-16 15:07:22.647880120 +0200
@@ -51,13 +51,15 @@ static gimple vect_recog_over_widening_p
                                                  tree *);
 static gimple vect_recog_mixed_size_cond_pattern (VEC (gimple, heap) **,
 						  tree *, tree *);
+static gimple vect_recog_bool_pattern (VEC (gimple, heap) **, tree *, tree *);
 static vect_recog_func_ptr vect_vect_recog_func_ptrs[NUM_PATTERNS] = {
 	vect_recog_widen_mult_pattern,
 	vect_recog_widen_sum_pattern,
 	vect_recog_dot_prod_pattern,
 	vect_recog_pow_pattern,
 	vect_recog_over_widening_pattern,
-	vect_recog_mixed_size_cond_pattern};
+	vect_recog_mixed_size_cond_pattern,
+	vect_recog_bool_pattern};
 
 
 /* Function widened_name_p
@@ -1068,10 +1070,8 @@ vect_operation_fits_smaller_type (gimple
    constants.
    Check if S3 and S4 can be done on a smaller type than 'TYPE', it can either
    be 'type' or some intermediate type.  For now, we expect S5 to be a type
-   demotion operation.  We also check that S3 and S4 have only one use.
-.
+   demotion operation.  We also check that S3 and S4 have only one use.  */
 
-*/
 static gimple
 vect_recog_over_widening_pattern (VEC (gimple, heap) **stmts,
                                   tree *type_in, tree *type_out)
@@ -1333,6 +1333,387 @@ vect_recog_mixed_size_cond_pattern (VEC 
 }
 
 
+/* Helper function of vect_recog_bool_pattern.  Called recursively, return
+   true if bool VAR can be optimized that way.  */
+
+static bool
+check_bool_pattern (tree var, loop_vec_info loop_vinfo)
+{
+  gimple def_stmt;
+  enum vect_def_type dt;
+  tree def, rhs1;
+  enum tree_code rhs_code;
+
+  if (!vect_is_simple_use (var, loop_vinfo, NULL, &def_stmt, &def, &dt))
+    return false;
+
+  if (dt != vect_internal_def)
+    return false;
+
+  if (!is_gimple_assign (def_stmt))
+    return false;
+
+  if (!has_single_use (def))
+    return false;
+
+  rhs1 = gimple_assign_rhs1 (def_stmt);
+  rhs_code = gimple_assign_rhs_code (def_stmt);
+  switch (rhs_code)
+    {
+    case SSA_NAME:
+      return check_bool_pattern (rhs1, loop_vinfo);
+
+    CASE_CONVERT:
+      if ((TYPE_PRECISION (TREE_TYPE (rhs1)) != 1
+	   || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
+	  && TREE_CODE (TREE_TYPE (rhs1)) != BOOLEAN_TYPE)
+	return false;
+      return check_bool_pattern (rhs1, loop_vinfo);
+
+    case BIT_NOT_EXPR:
+      return check_bool_pattern (rhs1, loop_vinfo);
+
+    case BIT_AND_EXPR:
+    case BIT_IOR_EXPR:
+    case BIT_XOR_EXPR:
+      if (!check_bool_pattern (rhs1, loop_vinfo))
+	return false;
+      return check_bool_pattern (gimple_assign_rhs2 (def_stmt), loop_vinfo);
+
+    default:
+      if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
+	{
+	  tree vecitype, comp_vectype;
+
+	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
+	  if (comp_vectype == NULL_TREE)
+	    return false;
+
+	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE)
+	    {
+	      enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
+	      tree itype
+		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0);
+	      vecitype = get_vectype_for_scalar_type (itype);
+	      if (vecitype == NULL_TREE)
+		return false;
+	    }
+	  else
+	    vecitype = comp_vectype;
+	  return expand_vec_cond_expr_p (vecitype, comp_vectype);
+	}
+      return false;
+    }
+}
+
+
+/* Helper function of adjust_bool_pattern.  Add a cast to TYPE to a previous
+   stmt (SSA_NAME_DEF_STMT of VAR) by moving the COND_EXPR from RELATED_STMT
+   to PATTERN_DEF_STMT and adding a cast as RELATED_STMT.  */
+
+static tree
+adjust_bool_pattern_cast (tree type, tree var)
+{
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (SSA_NAME_DEF_STMT (var));
+  gimple cast_stmt, pattern_stmt;
+
+  gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo));
+  pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
+  STMT_VINFO_PATTERN_DEF_STMT (stmt_vinfo) = pattern_stmt;
+  cast_stmt
+    = gimple_build_assign_with_ops (NOP_EXPR,
+				    vect_recog_temp_ssa_var (type, NULL),
+				    gimple_assign_lhs (pattern_stmt),
+				    NULL_TREE);
+  STMT_VINFO_RELATED_STMT (stmt_vinfo) = cast_stmt;
+  return gimple_assign_lhs (cast_stmt);
+}
+
+
+/* Helper function of vect_recog_bool_pattern.  Do the actual transformations,
+   recursively.  VAR is an SSA_NAME that should be transformed from bool
+   to a wider integer type, OUT_TYPE is the desired final integer type of
+   the whole pattern, TRUEVAL should be NULL unless optimizing
+   BIT_AND_EXPR into a COND_EXPR with one integer from one of the operands
+   in the then_clause, STMTS is where statements with added pattern stmts
+   should be pushed to.  */
+
+static tree
+adjust_bool_pattern (tree var, tree out_type, tree trueval,
+		     VEC (gimple, heap) **stmts)
+{
+  gimple stmt = SSA_NAME_DEF_STMT (var);
+  enum tree_code rhs_code, def_rhs_code;
+  tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
+  location_t loc;
+  gimple pattern_stmt, def_stmt;
+
+  rhs1 = gimple_assign_rhs1 (stmt);
+  rhs2 = gimple_assign_rhs2 (stmt);
+  rhs_code = gimple_assign_rhs_code (stmt);
+  loc = gimple_location (stmt);
+  switch (rhs_code)
+    {
+    case SSA_NAME:
+    CASE_CONVERT:
+      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
+      itype = TREE_TYPE (irhs1);
+      pattern_stmt
+	= gimple_build_assign_with_ops (SSA_NAME,
+					vect_recog_temp_ssa_var (itype, NULL),
+					irhs1, NULL_TREE);
+      break;
+
+    case BIT_NOT_EXPR:
+      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
+      itype = TREE_TYPE (irhs1);
+      pattern_stmt
+	= gimple_build_assign_with_ops (BIT_XOR_EXPR,
+					vect_recog_temp_ssa_var (itype, NULL),
+					irhs1, build_int_cst (itype, 1));
+      break;
+
+    case BIT_AND_EXPR:
+      /* Try to optimize x = y & (a < b ? 1 : 0); into
+	 x = (a < b ? y : 0);
+
+	 E.g. for:
+	   bool a_b, b_b, c_b;
+	   TYPE d_T;
+
+	   S1  a_b = x1 CMP1 y1;
+	   S2  b_b = x2 CMP2 y2;
+	   S3  c_b = a_b & b_b;
+	   S4  d_T = (TYPE) c_b;
+
+	 we would normally emit:
+
+	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
+	   S2'  b_T = x2 CMP2 y2 ? 1 : 0;
+	   S3'  c_T = a_T & b_T;
+	   S4'  d_T = c_T;
+
+	 but we can save one stmt by using the
+	 result of one of the COND_EXPRs in the other COND_EXPR and leave
+	 BIT_AND_EXPR stmt out:
+
+	   S1'  a_T = x1 CMP1 y1 ? 1 : 0;
+	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
+	   S4'  f_T = c_T;
+
+	 At least when VEC_COND_EXPR is implemented using masks
+	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
+	 computes the comparison masks and ands it, in one case with
+	 all ones vector, in the other case with a vector register.
+	 Don't do this for BIT_IOR_EXPR, because cond ? 1 : var; is
+	 often more expensive.  */
+      def_stmt = SSA_NAME_DEF_STMT (rhs2);
+      def_rhs_code = gimple_assign_rhs_code (def_stmt);
+      if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
+	{
+	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
+	  irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
+	  if (TYPE_PRECISION (TREE_TYPE (irhs1))
+	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
+	    {
+	      gimple tstmt;
+	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
+	      irhs2 = adjust_bool_pattern (rhs2, out_type, irhs1, stmts);
+	      tstmt = VEC_pop (gimple, *stmts);
+	      gcc_assert (tstmt == def_stmt);
+	      VEC_quick_push (gimple, *stmts, stmt);
+	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
+		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
+	      gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo));
+	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
+	      return irhs2;
+	    }
+	  else
+	    irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
+	  goto and_ior_xor;
+	}
+      def_stmt = SSA_NAME_DEF_STMT (rhs1);
+      def_rhs_code = gimple_assign_rhs_code (def_stmt);
+      if (TREE_CODE_CLASS (def_rhs_code) == tcc_comparison)
+	{
+	  tree def_rhs1 = gimple_assign_rhs1 (def_stmt);
+	  irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
+	  if (TYPE_PRECISION (TREE_TYPE (irhs2))
+	      == GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (def_rhs1))))
+	    {
+	      gimple tstmt;
+	      stmt_vec_info stmt_def_vinfo = vinfo_for_stmt (def_stmt);
+	      irhs1 = adjust_bool_pattern (rhs1, out_type, irhs2, stmts);
+	      tstmt = VEC_pop (gimple, *stmts);
+	      gcc_assert (tstmt == def_stmt);
+	      VEC_quick_push (gimple, *stmts, stmt);
+	      STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt))
+		= STMT_VINFO_RELATED_STMT (stmt_def_vinfo);
+	      gcc_assert (!STMT_VINFO_PATTERN_DEF_STMT (stmt_def_vinfo));
+	      STMT_VINFO_RELATED_STMT (stmt_def_vinfo) = NULL;
+	      return irhs1;
+	    }
+	  else
+	    irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
+	  goto and_ior_xor;
+	}
+      /* FALLTHRU */
+    case BIT_IOR_EXPR:
+    case BIT_XOR_EXPR:
+      irhs1 = adjust_bool_pattern (rhs1, out_type, NULL_TREE, stmts);
+      irhs2 = adjust_bool_pattern (rhs2, out_type, NULL_TREE, stmts);
+    and_ior_xor:
+      if (TYPE_PRECISION (TREE_TYPE (irhs1))
+	  != TYPE_PRECISION (TREE_TYPE (irhs2)))
+	{
+	  int prec1 = TYPE_PRECISION (TREE_TYPE (irhs1));
+	  int prec2 = TYPE_PRECISION (TREE_TYPE (irhs2));
+	  int out_prec = TYPE_PRECISION (out_type);
+	  if (absu_hwi (out_prec - prec1) < absu_hwi (out_prec - prec2))
+	    irhs2 = adjust_bool_pattern_cast (TREE_TYPE (irhs1), rhs2);
+	  else if (absu_hwi (out_prec - prec1) > absu_hwi (out_prec - prec2))
+	    irhs1 = adjust_bool_pattern_cast (TREE_TYPE (irhs2), rhs1);
+	  else
+	    {
+	      irhs1 = adjust_bool_pattern_cast (out_type, rhs1);
+	      irhs2 = adjust_bool_pattern_cast (out_type, rhs2);
+	    }
+	}
+      itype = TREE_TYPE (irhs1);
+      pattern_stmt
+	= gimple_build_assign_with_ops (rhs_code,
+					vect_recog_temp_ssa_var (itype, NULL),
+					irhs1, irhs2);
+      break;
+
+    default:
+      gcc_assert (TREE_CODE_CLASS (rhs_code) == tcc_comparison);
+      if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
+	  || TYPE_UNSIGNED (TREE_TYPE (rhs1)))
+	{
+	  enum machine_mode mode = TYPE_MODE (TREE_TYPE (rhs1));
+	  itype
+	    = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 0);
+	}
+      else
+	itype = TREE_TYPE (rhs1);
+      cond_expr = build2_loc (loc, rhs_code, itype, rhs1, rhs2);
+      if (trueval == NULL_TREE)
+	trueval = build_int_cst (itype, 1);
+      else
+	gcc_checking_assert (useless_type_conversion_p (itype,
+							TREE_TYPE (trueval)));
+      pattern_stmt
+	= gimple_build_assign_with_ops3 (COND_EXPR,
+					 vect_recog_temp_ssa_var (itype, NULL),
+					 cond_expr, trueval,
+					 build_int_cst (itype, 0));
+      break;
+    }
+
+  VEC_safe_push (gimple, heap, *stmts, stmt);
+  gimple_set_location (pattern_stmt, loc);
+  STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)) = pattern_stmt;
+  return gimple_assign_lhs (pattern_stmt);
+}
+
+
+/* Function vect_recog_bool_pattern
+
+   Try to find pattern like following:
+
+     bool a_b, b_b, c_b, d_b, e_b;
+     TYPE f_T;
+   loop:
+     S1  a_b = x1 CMP1 y1;
+     S2  b_b = x2 CMP2 y2;
+     S3  c_b = a_b & b_b;
+     S4  d_b = x3 CMP3 y3;
+     S5  e_b = c_b | d_b;
+     S6  f_T = (TYPE) e_b;
+
+   where type 'TYPE' is an integral type.
+
+   Input:
+
+   * LAST_STMT: A stmt at the end from which the pattern
+		search begins, i.e. cast of a bool to
+		an integer type.
+
+   Output:
+
+   * TYPE_IN: The type of the input arguments to the pattern.
+
+   * TYPE_OUT: The type of the output of this pattern.
+
+   * Return value: A new stmt that will be used to replace the pattern.
+
+	Assuming size of TYPE is the same as size of all comparisons
+	(otherwise some casts would be added where needed), the above
+	sequence we create related pattern stmts:
+	S1'  a_T = x1 CMP1 y1 ? 1 : 0;
+	S3'  c_T = x2 CMP2 y2 ? a_T : 0;
+	S4'  d_T = x3 CMP3 y3 ? 1 : 0;
+	S5'  e_T = c_T | d_T;
+	S6'  f_T = e_T;
+
+	Instead of the above S3' we could emit:
+	S2'  b_T = x2 CMP2 y2 ? 1 : 0;
+	S3'  c_T = a_T | b_T;
+	but the above is more efficient.  */
+
+static gimple
+vect_recog_bool_pattern (VEC (gimple, heap) **stmts, tree *type_in,
+			 tree *type_out)
+{
+  gimple last_stmt = VEC_pop (gimple, *stmts);
+  enum tree_code rhs_code;
+  tree var, lhs, rhs, vectype;
+  stmt_vec_info stmt_vinfo = vinfo_for_stmt (last_stmt);
+  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+  gimple pattern_stmt;
+
+  if (!is_gimple_assign (last_stmt))
+    return NULL;
+
+  var = gimple_assign_rhs1 (last_stmt);
+  lhs = gimple_assign_lhs (last_stmt);
+
+  if ((TYPE_PRECISION (TREE_TYPE (var)) != 1
+       || !TYPE_UNSIGNED (TREE_TYPE (var)))
+      && TREE_CODE (TREE_TYPE (var)) != BOOLEAN_TYPE)
+    return NULL;
+
+  rhs_code = gimple_assign_rhs_code (last_stmt);
+  if (CONVERT_EXPR_CODE_P (rhs_code))
+    {
+      if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE)
+	return NULL;
+      vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
+      if (vectype == NULL_TREE)
+	return NULL;
+
+      if (!check_bool_pattern (var, loop_vinfo))
+	return NULL;
+
+      rhs = adjust_bool_pattern (var, TREE_TYPE (lhs), NULL_TREE, stmts);
+      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
+      if (useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs)))
+	pattern_stmt
+	  = gimple_build_assign_with_ops (SSA_NAME, lhs, rhs, NULL_TREE);
+      else
+	pattern_stmt
+	  = gimple_build_assign_with_ops (NOP_EXPR, lhs, rhs, NULL_TREE);
+      *type_out = vectype;
+      *type_in = vectype;
+      VEC_safe_push (gimple, heap, *stmts, last_stmt);
+      return pattern_stmt;
+    }
+  else
+    return NULL;
+}
+
+
 /* Mark statements that are involved in a pattern.  */
 
 static inline void
--- gcc/testsuite/gcc.dg/vect/vect-cond-9.c.jj	2011-10-16 14:54:40.584818542 +0200
+++ gcc/testsuite/gcc.dg/vect/vect-cond-9.c	2011-10-16 14:54:40.584818542 +0200
@@ -0,0 +1,200 @@
+/* { dg-require-effective-target vect_cond_mixed } */
+
+#include "tree-vect.h"
+
+#define N 1024
+float a[N], b[N], c[N], d[N];
+int j[N];
+unsigned char k[N];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      unsigned int x = a[i] < b[i] ? -1 : 0;
+      unsigned int y = c[i] < d[i] ? -1 : 0;
+      j[i] = (x & y) >> 31;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      j[i] = x & y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f3 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    j[i] = (a[i] < b[i]) & (c[i] < d[i]);
+}
+
+__attribute__((noinline, noclone)) void
+f4 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      k[i] = x & y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f5 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    k[i] = (a[i] < b[i]) & (c[i] < d[i]);
+}
+
+__attribute__((noinline, noclone)) void
+f6 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      unsigned int x = a[i] < b[i] ? -1 : 0;
+      unsigned int y = c[i] < d[i] ? -1 : 0;
+      j[i] = (x | y) >> 31;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f7 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      j[i] = x | y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f8 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    j[i] = (a[i] < b[i]) | (c[i] < d[i]);
+}
+
+__attribute__((noinline, noclone)) void
+f9 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    {
+      int x = a[i] < b[i];
+      int y = c[i] < d[i];
+      k[i] = x | y;
+    }
+}
+
+__attribute__((noinline, noclone)) void
+f10 (void)
+{
+  int i;
+  for (i = 0; i < N; ++i)
+    k[i] = (a[i] < b[i]) | (c[i] < d[i]);
+}
+
+int
+main ()
+{
+  int i;
+
+  check_vect ();
+
+  for (i = 0; i < N; i++)
+    {
+      switch (i % 9)
+	{
+	case 0: asm (""); a[i] = - i - 1; b[i] = i + 1; break;
+	case 1: a[i] = 0; b[i] = 0; break;
+	case 2: a[i] = i + 1; b[i] = - i - 1; break;
+	case 3: a[i] = i; b[i] = i + 7; break;
+	case 4: a[i] = i; b[i] = i; break;
+	case 5: a[i] = i + 16; b[i] = i + 3; break;
+	case 6: a[i] = - i - 5; b[i] = - i; break;
+	case 7: a[i] = - i; b[i] = - i; break;
+	case 8: a[i] = - i; b[i] = - i - 7; break;
+	}
+    }
+  for (i = 0; i < N; i++)
+    {
+      switch ((i / 9) % 3)
+	{
+	case 0: c[i] = a[i / 9]; d[i] = b[i / 9]; break;
+	case 1: c[i] = a[i / 9 + 3]; d[i] = b[i / 9 + 3]; break;
+	case 2: c[i] = a[i / 9 + 6]; d[i] = b[i / 9 + 6]; break;
+	}
+    }
+  f1 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (j, -6, sizeof (j));
+  f2 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (j, -6, sizeof (j));
+  f3 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (j, -6, sizeof (j));
+  f4 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, -6, sizeof (k));
+  f5 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 && ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, -6, sizeof (k));
+  f6 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (j, -6, sizeof (j));
+  f7 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (j, -6, sizeof (j));
+  f8 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (j, -6, sizeof (j));
+  f9 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, -6, sizeof (k));
+  f10 ();
+  for (i = 0; i < N; i++)
+    if (k[i] != ((i % 3) == 0 || ((i / 9) % 3) == 0))
+      abort ();
+  __builtin_memset (k, -6, sizeof (k));
+
+  return 0;
+}
+
+/* { dg-final { scan-tree-dump-times "note: vectorized 1 loops" 10 "vect" } } */
+/* { dg-final { cleanup-tree-dump "vect" } } */

	Jakub

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-10-16 13:13 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2011-10-12 16:28 [PATCH] Optimize some loops using bool types (PR tree-optimization/50596) Jakub Jelinek
2011-10-16 10:06 ` Ira Rosen
2011-10-16 13:48   ` Jakub Jelinek

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).