public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH][RFC] Come up with VEC_COND_OP_EXPRs.
@ 2019-09-24 10:25 Martin Liška
  2019-09-24 11:11 ` Richard Sandiford
  2020-04-01 10:19 ` [stage1][PATCH] Lower VEC_COND_EXPR into internal functions Martin Liška
  0 siblings, 2 replies; 65+ messages in thread
From: Martin Liška @ 2019-09-24 10:25 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Biener

[-- Attachment #1: Type: text/plain, Size: 2383 bytes --]

Hi.

The patch introduces couple of new TREE_CODEs that will help us to have
a proper GIMPLE representation of current VECT_COND_EXPR. Right now,
the first argument is typically a GENERIC tcc_expression tree with 2 operands
that are visited at various places in GIMPLE code. That said, based on the discussion
with Richi, I'm suggesting to come up with e.g.
VECT_COND_LT_EXPR<COND_LHS, COND_RHS, IF_CLAUSE, ELSE_CLAUSE>. Such a change logically
introduces new GIMPLE_QUATERNARY_RHS gassignments. For now, the VEC_COND_EXPR remains
and is only valid in GENERIC and gimplifier will take care of the corresponding transition.

The patch is a prototype and missing bits are:
- folding support addition for GIMPLE_QUATERNARY_RHS is missing
- fancy tcc_comparison expressions like LTGT_EXPR, UNORDERED_EXPR, ORDERED_EXPR,
  UNLT_EXPR and others are not supported right now
- comments are missing for various functions added

Apart from that I was able to bootstrap and run tests with a quite small fallout.
Thoughts?
Martin

---
 gcc/cfgexpand.c             | 33 ++++++++-----
 gcc/expr.c                  | 36 +++++++++-----
 gcc/expr.h                  |  2 +-
 gcc/gimple-expr.c           | 14 +++++-
 gcc/gimple-expr.h           |  6 +--
 gcc/gimple-fold.c           | 15 +++++-
 gcc/gimple-match-head.c     |  3 ++
 gcc/gimple-pretty-print.c   | 76 ++++++++++++++++++++--------
 gcc/gimple.c                | 95 ++++++++++++++++++++++++++++++-----
 gcc/gimple.h                | 82 +++++++++++++++++++++++++-----
 gcc/gimplify.c              | 42 +++++++++++-----
 gcc/optabs.c                | 58 +++++++++-------------
 gcc/optabs.h                |  2 +-
 gcc/tree-cfg.c              | 99 ++++++++++++++++++++++++++++++++-----
 gcc/tree-inline.c           |  2 +-
 gcc/tree-ssa-forwprop.c     | 11 +++--
 gcc/tree-ssa-loop-niter.c   |  4 +-
 gcc/tree-ssa-operands.c     |  1 -
 gcc/tree-ssa-pre.c          |  5 +-
 gcc/tree-ssa-reassoc.c      |  4 +-
 gcc/tree-ssa-scopedtables.c | 46 ++++++++++++++++-
 gcc/tree-ssa-scopedtables.h |  2 +
 gcc/tree-vect-generic.c     | 53 +++++++++++---------
 gcc/tree-vect-loop.c        | 50 ++++++++-----------
 gcc/tree-vect-patterns.c    |  4 +-
 gcc/tree-vect-stmts.c       | 17 ++++---
 gcc/tree.def                |  7 +++
 gcc/tree.h                  | 64 ++++++++++++++++++++++++
 28 files changed, 620 insertions(+), 213 deletions(-)



[-- Attachment #2: 0001-Come-up-with-VECT_COND_OP_EXPRs.patch --]
[-- Type: text/x-patch, Size: 60874 bytes --]

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index a2f96239e2f..eb5ada52a3b 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -106,6 +106,12 @@ gimple_assign_rhs_to_tree (gimple *stmt)
   tree t;
   switch (get_gimple_rhs_class (gimple_expr_code (stmt)))
     {
+    case GIMPLE_QUATERNARY_RHS:
+      t = build4 (gimple_assign_rhs_code (stmt),
+		  TREE_TYPE (gimple_assign_lhs (stmt)),
+		  gimple_assign_rhs1 (stmt), gimple_assign_rhs2 (stmt),
+		  gimple_assign_rhs3 (stmt), gimple_assign_rhs4 (stmt));
+      break;
     case GIMPLE_TERNARY_RHS:
       t = build3 (gimple_assign_rhs_code (stmt),
 		  TREE_TYPE (gimple_assign_lhs (stmt)),
@@ -3793,17 +3799,20 @@ expand_gimple_stmt_1 (gimple *stmt)
 	    ops.type = TREE_TYPE (lhs);
 	    switch (get_gimple_rhs_class (ops.code))
 	      {
-		case GIMPLE_TERNARY_RHS:
-		  ops.op2 = gimple_assign_rhs3 (assign_stmt);
-		  /* Fallthru */
-		case GIMPLE_BINARY_RHS:
-		  ops.op1 = gimple_assign_rhs2 (assign_stmt);
-		  /* Fallthru */
-		case GIMPLE_UNARY_RHS:
-		  ops.op0 = gimple_assign_rhs1 (assign_stmt);
-		  break;
-		default:
-		  gcc_unreachable ();
+	      case GIMPLE_QUATERNARY_RHS:
+		ops.op3 = gimple_assign_rhs4 (assign_stmt);
+		/* Fallthru */
+	      case GIMPLE_TERNARY_RHS:
+		ops.op2 = gimple_assign_rhs3 (assign_stmt);
+		/* Fallthru */
+	      case GIMPLE_BINARY_RHS:
+		ops.op1 = gimple_assign_rhs2 (assign_stmt);
+		/* Fallthru */
+	      case GIMPLE_UNARY_RHS:
+		ops.op0 = gimple_assign_rhs1 (assign_stmt);
+		break;
+	      default:
+		gcc_unreachable ();
 	      }
 	    ops.location = gimple_location (stmt);
 
@@ -5172,7 +5181,7 @@ expand_debug_expr (tree exp)
 
     /* Vector stuff.  For most of the codes we don't have rtl codes.  */
     case REALIGN_LOAD_EXPR:
-    case VEC_COND_EXPR:
+    CASE_VEC_COND_EXPR:
     case VEC_PACK_FIX_TRUNC_EXPR:
     case VEC_PACK_FLOAT_EXPR:
     case VEC_PACK_SAT_EXPR:
diff --git a/gcc/expr.c b/gcc/expr.c
index 2f2b53f8b69..de18229f162 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -8450,7 +8450,7 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
   int ignore;
   bool reduce_bit_field;
   location_t loc = ops->location;
-  tree treeop0, treeop1, treeop2;
+  tree treeop0, treeop1, treeop2, treeop3;
 #define REDUCE_BIT_FIELD(expr)	(reduce_bit_field			  \
 				 ? reduce_to_bit_field_precision ((expr), \
 								  target, \
@@ -8464,13 +8464,15 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
   treeop0 = ops->op0;
   treeop1 = ops->op1;
   treeop2 = ops->op2;
+  treeop3 = ops->op3;
 
   /* We should be called only on simple (binary or unary) expressions,
      exactly those that are valid in gimple expressions that aren't
      GIMPLE_SINGLE_RHS (or invalid).  */
   gcc_assert (get_gimple_rhs_class (code) == GIMPLE_UNARY_RHS
 	      || get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS
-	      || get_gimple_rhs_class (code) == GIMPLE_TERNARY_RHS);
+	      || get_gimple_rhs_class (code) == GIMPLE_TERNARY_RHS
+	      || get_gimple_rhs_class (code) == GIMPLE_QUATERNARY_RHS);
 
   ignore = (target == const0_rtx
 	    || ((CONVERT_EXPR_CODE_P (code)
@@ -9141,16 +9143,15 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
       if (temp != 0)
 	return temp;
 
-      /* For vector MIN <x, y>, expand it a VEC_COND_EXPR <x <= y, x, y>
+      /* For vector MIN <x, y>, expand it a VEC_COND_*_EXPR <x <= y, x, y>
 	 and similarly for MAX <x, y>.  */
       if (VECTOR_TYPE_P (type))
 	{
 	  tree t0 = make_tree (type, op0);
 	  tree t1 = make_tree (type, op1);
-	  tree comparison = build2 (code == MIN_EXPR ? LE_EXPR : GE_EXPR,
-				    type, t0, t1);
-	  return expand_vec_cond_expr (type, comparison, t0, t1,
-				       original_target);
+	  return expand_vec_cond_expr (type,
+				       code == MIN_EXPR ? LE_EXPR : GE_EXPR, t0,
+				       t1, t0, t1, original_target);
 	}
 
       /* At this point, a MEM target is no longer useful; we will get better
@@ -9743,8 +9744,9 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 	return temp;
       }
 
-    case VEC_COND_EXPR:
-      target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
+    CASE_VEC_COND_EXPR:
+      target = expand_vec_cond_expr (type, code, treeop0, treeop1, treeop2,
+				     treeop3, target);
       return target;
 
     case VEC_DUPLICATE_EXPR:
@@ -9971,6 +9973,9 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 	  ops.code = gimple_assign_rhs_code (g);
           switch (get_gimple_rhs_class (ops.code))
 	    {
+	    case GIMPLE_QUATERNARY_RHS:
+	      ops.op3 = gimple_assign_rhs4 (g);
+	      /* Fallthru */
 	    case GIMPLE_TERNARY_RHS:
 	      ops.op2 = gimple_assign_rhs3 (g);
 	      /* Fallthru */
@@ -11774,6 +11779,7 @@ maybe_optimize_pow2p_mod_cmp (enum tree_code code, tree *arg0, tree *arg1)
   ops.op0 = treeop0;
   ops.op1 = treeop1;
   ops.op2 = NULL_TREE;
+  ops.op3 = NULL_TREE;
   start_sequence ();
   rtx mor = expand_expr_real_2 (&ops, NULL_RTX, TYPE_MODE (ops.type),
 				EXPAND_NORMAL);
@@ -11790,6 +11796,7 @@ maybe_optimize_pow2p_mod_cmp (enum tree_code code, tree *arg0, tree *arg1)
   ops.op0 = treeop0;
   ops.op1 = c3;
   ops.op2 = NULL_TREE;
+  ops.op3 = NULL_TREE;
   start_sequence ();
   rtx mur = expand_expr_real_2 (&ops, NULL_RTX, TYPE_MODE (ops.type),
 				EXPAND_NORMAL);
@@ -11977,6 +11984,7 @@ maybe_optimize_mod_cmp (enum tree_code code, tree *arg0, tree *arg1)
   ops.op0 = treeop0;
   ops.op1 = treeop1;
   ops.op2 = NULL_TREE;
+  ops.op3 = NULL_TREE;
   start_sequence ();
   rtx mor = expand_expr_real_2 (&ops, NULL_RTX, TYPE_MODE (ops.type),
 				EXPAND_NORMAL);
@@ -12082,16 +12090,18 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
      expander for this.  */
   if (TREE_CODE (ops->type) == VECTOR_TYPE)
     {
-      tree ifexp = build2 (ops->code, ops->type, arg0, arg1);
       if (VECTOR_BOOLEAN_TYPE_P (ops->type)
 	  && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type, ops->code))
-	return expand_vec_cmp_expr (ops->type, ifexp, target);
+	{
+	  tree ifexp = build2 (ops->code, ops->type, arg0, arg1);
+	  return expand_vec_cmp_expr (ops->type, ifexp, target);
+	}
       else
 	{
 	  tree if_true = constant_boolean_node (true, ops->type);
 	  tree if_false = constant_boolean_node (false, ops->type);
-	  return expand_vec_cond_expr (ops->type, ifexp, if_true,
-				       if_false, target);
+	  return expand_vec_cond_expr (ops->type, ops->code, arg0, arg1,
+				       if_true, if_false, target);
 	}
     }
 
diff --git a/gcc/expr.h b/gcc/expr.h
index 6eb70bf12f1..312d25c7be4 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -51,7 +51,7 @@ typedef struct separate_ops
   enum tree_code code;
   location_t location;
   tree type;
-  tree op0, op1, op2;
+  tree op0, op1, op2, op3;
 } *sepops;
 \f
 /* This is run during target initialization to set up which modes can be
diff --git a/gcc/gimple-expr.c b/gcc/gimple-expr.c
index 4082828e198..800ece6ec09 100644
--- a/gcc/gimple-expr.c
+++ b/gcc/gimple-expr.c
@@ -526,16 +526,25 @@ create_tmp_reg_fn (struct function *fn, tree type, const char *prefix)
 
 void
 extract_ops_from_tree (tree expr, enum tree_code *subcode_p, tree *op1_p,
-		       tree *op2_p, tree *op3_p)
+		       tree *op2_p, tree *op3_p, tree *op4_p)
 {
   *subcode_p = TREE_CODE (expr);
   switch (get_gimple_rhs_class (*subcode_p))
     {
+    case GIMPLE_QUATERNARY_RHS:
+      {
+	*op1_p = TREE_OPERAND (expr, 0);
+	*op2_p = TREE_OPERAND (expr, 1);
+	*op3_p = TREE_OPERAND (expr, 2);
+	*op4_p = TREE_OPERAND (expr, 3);
+	break;
+      }
     case GIMPLE_TERNARY_RHS:
       {
 	*op1_p = TREE_OPERAND (expr, 0);
 	*op2_p = TREE_OPERAND (expr, 1);
 	*op3_p = TREE_OPERAND (expr, 2);
+	*op4_p = NULL_TREE;
 	break;
       }
     case GIMPLE_BINARY_RHS:
@@ -543,6 +552,7 @@ extract_ops_from_tree (tree expr, enum tree_code *subcode_p, tree *op1_p,
 	*op1_p = TREE_OPERAND (expr, 0);
 	*op2_p = TREE_OPERAND (expr, 1);
 	*op3_p = NULL_TREE;
+	*op4_p = NULL_TREE;
 	break;
       }
     case GIMPLE_UNARY_RHS:
@@ -550,6 +560,7 @@ extract_ops_from_tree (tree expr, enum tree_code *subcode_p, tree *op1_p,
 	*op1_p = TREE_OPERAND (expr, 0);
 	*op2_p = NULL_TREE;
 	*op3_p = NULL_TREE;
+	*op4_p = NULL_TREE;
 	break;
       }
     case GIMPLE_SINGLE_RHS:
@@ -557,6 +568,7 @@ extract_ops_from_tree (tree expr, enum tree_code *subcode_p, tree *op1_p,
 	*op1_p = expr;
 	*op2_p = NULL_TREE;
 	*op3_p = NULL_TREE;
+	*op4_p = NULL_TREE;
 	break;
       }
     default:
diff --git a/gcc/gimple-expr.h b/gcc/gimple-expr.h
index 1ad1432bd17..c37bff201cc 100644
--- a/gcc/gimple-expr.h
+++ b/gcc/gimple-expr.h
@@ -36,7 +36,7 @@ extern tree create_tmp_reg_fn (struct function *, tree, const char *);
 
 
 extern void extract_ops_from_tree (tree, enum tree_code *, tree *, tree *,
-				   tree *);
+				   tree *, tree *);
 extern void gimple_cond_get_ops_from_tree (tree, enum tree_code *, tree *,
 					   tree *);
 extern bool is_gimple_lvalue (tree);
@@ -151,8 +151,8 @@ static inline void
 extract_ops_from_tree (tree expr, enum tree_code *code, tree *op0,
 		       tree *op1)
 {
-  tree op2;
-  extract_ops_from_tree (expr, code, op0, op1, &op2);
+  tree op2, op3;
+  extract_ops_from_tree (expr, code, op0, op1, &op2, &op3);
   gcc_assert (op2 == NULL_TREE);
 }
 
diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index 8d642de2f67..4c2d5e73c0c 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -470,6 +470,10 @@ fold_gimple_assign (gimple_stmt_iterator *si)
         }
       break;
 
+    case GIMPLE_QUATERNARY_RHS:
+      // TODO
+      break;
+
     case GIMPLE_INVALID_RHS:
       gcc_unreachable ();
     }
@@ -4676,7 +4680,8 @@ replace_stmt_with_simplification (gimple_stmt_iterator *gsi,
 	  gimple_assign_set_rhs_with_ops (gsi, res_op->code,
 					  res_op->op_or_null (0),
 					  res_op->op_or_null (1),
-					  res_op->op_or_null (2));
+					  res_op->op_or_null (2),
+					  res_op->op_or_null (3));
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	    {
 	      fprintf (dump_file, "gimple_simplified to ");
@@ -6361,7 +6366,11 @@ gimple_fold_stmt_to_constant_1 (gimple *stmt, tree (*valueize) (tree),
 				       gimple_expr_type (stmt), op0, op1, op2);
             }
 
-          default:
+	  case GIMPLE_QUATERNARY_RHS:
+	    // TODO
+	    return NULL_TREE;
+
+	  default:
             gcc_unreachable ();
           }
       }
@@ -7628,6 +7637,7 @@ gimple_assign_nonnegative_warnv_p (gimple *stmt, bool *strict_overflow_p,
 					      gimple_assign_rhs2 (stmt),
 					      strict_overflow_p, depth);
     case GIMPLE_TERNARY_RHS:
+    case GIMPLE_QUATERNARY_RHS:
       return false;
     case GIMPLE_SINGLE_RHS:
       return tree_single_nonnegative_warnv_p (gimple_assign_rhs1 (stmt),
@@ -7722,6 +7732,7 @@ gimple_assign_integer_valued_real_p (gimple *stmt, int depth)
 					   gimple_assign_rhs1 (stmt),
 					   gimple_assign_rhs2 (stmt), depth);
     case GIMPLE_TERNARY_RHS:
+    case GIMPLE_QUATERNARY_RHS:
       return false;
     case GIMPLE_SINGLE_RHS:
       return integer_valued_real_single_p (gimple_assign_rhs1 (stmt), depth);
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index 53278168a59..ebe60ba774d 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -974,6 +974,9 @@ gimple_simplify (gimple *stmt, gimple_match_op *res_op, gimple_seq *seq,
 	      return (gimple_resimplify3 (seq, res_op, valueize)
 		      || valueized);
 	    }
+	  case GIMPLE_QUATERNARY_RHS:
+	    // TODO: add support
+	    break;
 	  default:
 	    gcc_unreachable ();
 	  }
diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index 2d5ece06805..1d4d489429e 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -620,6 +620,59 @@ dump_ternary_rhs (pretty_printer *buffer, gassign *gs, int spc,
     }
 }
 
+static void
+dump_comparison (pretty_printer *buffer, tree_code code)
+{
+  switch (code)
+    {
+    case LT_EXPR:
+      pp_less (buffer);
+      break;
+    case GT_EXPR:
+      pp_greater (buffer);
+      break;
+    case LE_EXPR:
+      pp_less_equal (buffer);
+      break;
+    case GE_EXPR:
+      pp_greater_equal (buffer);
+      break;
+    case EQ_EXPR:
+      pp_string (buffer, "==");
+      break;
+    case NE_EXPR:
+      pp_string (buffer, "!=");
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Helper for dump_gimple_assign.  Print the quaternary RHS of the
+   assignment GS.  BUFFER, SPC and FLAGS are as in pp_gimple_stmt_1.  */
+
+static void
+dump_quaternary_rhs (pretty_printer *buffer, gassign *gs, int spc,
+		     dump_flags_t flags)
+{
+  enum tree_code code = gimple_assign_rhs_code (gs);
+  switch (code)
+    {
+    CASE_VEC_COND_EXPR:
+      dump_generic_node (buffer, gimple_assign_rhs1 (gs), spc, flags, false);
+      pp_space (buffer);
+      dump_comparison (buffer, vec_cmp_to_cmp_code (code));
+      pp_space (buffer);
+      dump_generic_node (buffer, gimple_assign_rhs2 (gs), spc, flags, false);
+      pp_string (buffer, " ? ");
+      dump_generic_node (buffer, gimple_assign_rhs3 (gs), spc, flags, false);
+      pp_string (buffer, " : ");
+      dump_generic_node (buffer, gimple_assign_rhs4 (gs), spc, flags, false);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+}
 
 /* Dump the gimple assignment GS.  BUFFER, SPC and FLAGS are as in
    pp_gimple_stmt_1.  */
@@ -675,6 +728,8 @@ dump_gimple_assign (pretty_printer *buffer, gassign *gs, int spc,
         dump_binary_rhs (buffer, gs, spc, flags);
       else if (gimple_num_ops (gs) == 4)
         dump_ternary_rhs (buffer, gs, spc, flags);
+      else if (gimple_num_ops (gs) == 5)
+	dump_quaternary_rhs (buffer, gs, spc, flags);
       else
         gcc_unreachable ();
       if (!(flags & TDF_RHS_ONLY))
@@ -1518,26 +1573,7 @@ dump_gimple_omp_for (pretty_printer *buffer, gomp_for *gs, int spc,
 	  dump_generic_node (buffer, gimple_omp_for_index (gs, i), spc,
 			     flags, false);
 	  pp_space (buffer);
-	  switch (gimple_omp_for_cond (gs, i))
-	    {
-	    case LT_EXPR:
-	      pp_less (buffer);
-	      break;
-	    case GT_EXPR:
-	      pp_greater (buffer);
-	      break;
-	    case LE_EXPR:
-	      pp_less_equal (buffer);
-	      break;
-	    case GE_EXPR:
-	      pp_greater_equal (buffer);
-	      break;
-	    case NE_EXPR:
-	      pp_string (buffer, "!=");
-	      break;
-	    default:
-	      gcc_unreachable ();
-	    }
+	  dump_comparison (buffer, gimple_omp_for_cond (gs, i));
 	  pp_space (buffer);
 	  dump_generic_node (buffer, gimple_omp_for_final (gs, i), spc,
 			     flags, false);
diff --git a/gcc/gimple.c b/gcc/gimple.c
index af62c8bf477..84150f3bebf 100644
--- a/gcc/gimple.c
+++ b/gcc/gimple.c
@@ -424,10 +424,10 @@ gassign *
 gimple_build_assign (tree lhs, tree rhs MEM_STAT_DECL)
 {
   enum tree_code subcode;
-  tree op1, op2, op3;
+  tree op1, op2, op3, op4;
 
-  extract_ops_from_tree (rhs, &subcode, &op1, &op2, &op3);
-  return gimple_build_assign (lhs, subcode, op1, op2, op3 PASS_MEM_STAT);
+  extract_ops_from_tree (rhs, &subcode, &op1, &op2, &op3, &op4);
+  return gimple_build_assign (lhs, subcode, op1, op2, op3, op4 PASS_MEM_STAT);
 }
 
 
@@ -436,7 +436,7 @@ gimple_build_assign (tree lhs, tree rhs MEM_STAT_DECL)
 
 static inline gassign *
 gimple_build_assign_1 (tree lhs, enum tree_code subcode, tree op1,
-		       tree op2, tree op3 MEM_STAT_DECL)
+		       tree op2, tree op3, tree op4 MEM_STAT_DECL)
 {
   unsigned num_ops;
   gassign *p;
@@ -462,9 +462,25 @@ gimple_build_assign_1 (tree lhs, enum tree_code subcode, tree op1,
       gimple_assign_set_rhs3 (p, op3);
     }
 
+  if (op4)
+    {
+      gcc_assert (num_ops > 4);
+      gimple_assign_set_rhs4 (p, op4);
+    }
+
   return p;
 }
 
+/* Build a GIMPLE_ASSIGN statement with subcode SUBCODE and operands
+   OP1, OP2, OP3 and OP4.  */
+
+gassign *
+gimple_build_assign (tree lhs, enum tree_code subcode, tree op1, tree op2,
+		     tree op3, tree op4 MEM_STAT_DECL)
+{
+  return gimple_build_assign_1 (lhs, subcode, op1, op2, op3, op4 PASS_MEM_STAT);
+}
+
 /* Build a GIMPLE_ASSIGN statement with subcode SUBCODE and operands
    OP1, OP2 and OP3.  */
 
@@ -472,7 +488,8 @@ gassign *
 gimple_build_assign (tree lhs, enum tree_code subcode, tree op1,
 		     tree op2, tree op3 MEM_STAT_DECL)
 {
-  return gimple_build_assign_1 (lhs, subcode, op1, op2, op3 PASS_MEM_STAT);
+  return gimple_build_assign_1 (lhs, subcode, op1, op2, op3,
+				NULL_TREE PASS_MEM_STAT);
 }
 
 /* Build a GIMPLE_ASSIGN statement with subcode SUBCODE and operands
@@ -482,8 +499,8 @@ gassign *
 gimple_build_assign (tree lhs, enum tree_code subcode, tree op1,
 		     tree op2 MEM_STAT_DECL)
 {
-  return gimple_build_assign_1 (lhs, subcode, op1, op2, NULL_TREE
-				PASS_MEM_STAT);
+  return gimple_build_assign_1 (lhs, subcode, op1, op2, NULL_TREE,
+				NULL_TREE PASS_MEM_STAT);
 }
 
 /* Build a GIMPLE_ASSIGN statement with subcode SUBCODE and operand OP1.  */
@@ -491,8 +508,8 @@ gimple_build_assign (tree lhs, enum tree_code subcode, tree op1,
 gassign *
 gimple_build_assign (tree lhs, enum tree_code subcode, tree op1 MEM_STAT_DECL)
 {
-  return gimple_build_assign_1 (lhs, subcode, op1, NULL_TREE, NULL_TREE
-				PASS_MEM_STAT);
+  return gimple_build_assign_1 (lhs, subcode, op1, NULL_TREE, NULL_TREE,
+				NULL_TREE PASS_MEM_STAT);
 }
 
 
@@ -1737,10 +1754,10 @@ void
 gimple_assign_set_rhs_from_tree (gimple_stmt_iterator *gsi, tree expr)
 {
   enum tree_code subcode;
-  tree op1, op2, op3;
+  tree op1, op2, op3, op4;
 
-  extract_ops_from_tree (expr, &subcode, &op1, &op2, &op3);
-  gimple_assign_set_rhs_with_ops (gsi, subcode, op1, op2, op3);
+  extract_ops_from_tree (expr, &subcode, &op1, &op2, &op3, &op4);
+  gimple_assign_set_rhs_with_ops (gsi, subcode, op1, op2, op3, op4);
 }
 
 
@@ -1752,7 +1769,7 @@ gimple_assign_set_rhs_from_tree (gimple_stmt_iterator *gsi, tree expr)
 
 void
 gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *gsi, enum tree_code code,
-				tree op1, tree op2, tree op3)
+				tree op1, tree op2, tree op3, tree op4)
 {
   unsigned new_rhs_ops = get_gimple_rhs_num_ops (code);
   gimple *stmt = gsi_stmt (*gsi);
@@ -1778,6 +1795,8 @@ gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *gsi, enum tree_code code,
     gimple_assign_set_rhs2 (stmt, op2);
   if (new_rhs_ops > 2)
     gimple_assign_set_rhs3 (stmt, op3);
+  if (new_rhs_ops > 3)
+    gimple_assign_set_rhs4 (stmt, op4);
   if (stmt != old_stmt)
     gsi_replace (gsi, stmt, false);
 }
@@ -2234,6 +2253,8 @@ get_gimple_rhs_num_ops (enum tree_code code)
       return 2;
     case GIMPLE_TERNARY_RHS:
       return 3;
+    case GIMPLE_QUATERNARY_RHS:
+      return 4;
     default:
       gcc_unreachable ();
     }
@@ -2266,6 +2287,13 @@ get_gimple_rhs_num_ops (enum tree_code code)
       || (SYM) == ADDR_EXPR						    \
       || (SYM) == WITH_SIZE_EXPR					    \
       || (SYM) == SSA_NAME) ? GIMPLE_SINGLE_RHS				    \
+   : ((SYM) == VEC_COND_LT_EXPR						    \
+      || (SYM) == VEC_COND_LE_EXPR					    \
+      || (SYM) == VEC_COND_LE_EXPR					    \
+      || (SYM) == VEC_COND_GT_EXPR					    \
+      || (SYM) == VEC_COND_GE_EXPR					    \
+      || (SYM) == VEC_COND_EQ_EXPR					    \
+      || (SYM) == VEC_COND_NE_EXPR) ? GIMPLE_QUATERNARY_RHS		    \
    : GIMPLE_INVALID_RHS),
 #define END_OF_BASE_TREE_CODES (unsigned char) GIMPLE_INVALID_RHS,
 
@@ -3271,6 +3299,47 @@ gimple_inexpensive_call_p (gcall *stmt)
   return false;
 }
 
+gassign *
+gimple_build_vec_cond_expr (tree lhs, tree condition, tree then_clause,
+			    tree else_clause)
+{
+  tree cond_lhs, cond_rhs;
+  tree_code code;
+
+  if (TREE_CODE (condition) == SSA_NAME)
+    {
+      gimple *stmt = SSA_NAME_DEF_STMT (condition);
+      code = gimple_assign_rhs_code (stmt);
+      if (TREE_CODE_CLASS (code) == tcc_comparison)
+	{
+	  code = cmp_to_vec_cmp_code (code);
+	  cond_lhs = gimple_assign_rhs1 (stmt);
+	  cond_rhs = gimple_assign_rhs2 (stmt);
+	}
+      else
+	{
+	  code = VEC_COND_EQ_EXPR;
+	  cond_lhs = condition;
+	  cond_rhs = constant_boolean_node (true, TREE_TYPE (condition));
+	}
+    }
+  else if (TREE_CODE (condition) == VECTOR_CST)
+    {
+      // TODO: this should be probably folded right away
+      code = VEC_COND_EQ_EXPR;
+      cond_lhs = condition;
+      cond_rhs = constant_boolean_node (true, TREE_TYPE (condition));
+    }
+  else
+    {
+      code = cmp_to_vec_cmp_code (TREE_CODE (condition));
+      cond_lhs = TREE_OPERAND (condition, 0);
+      cond_rhs = TREE_OPERAND (condition, 1);
+    }
+  return gimple_build_assign (lhs, code, cond_lhs, cond_rhs, then_clause,
+			      else_clause);
+}
+
 #if CHECKING_P
 
 namespace selftest {
diff --git a/gcc/gimple.h b/gcc/gimple.h
index cf1f8da5ae2..5363b0910a8 100644
--- a/gcc/gimple.h
+++ b/gcc/gimple.h
@@ -115,12 +115,13 @@ GIMPLE_CHECK2(const gimple *gs)
    get_gimple_rhs_class.  */
 enum gimple_rhs_class
 {
-  GIMPLE_INVALID_RHS,	/* The expression cannot be used on the RHS.  */
-  GIMPLE_TERNARY_RHS,	/* The expression is a ternary operation.  */
-  GIMPLE_BINARY_RHS,	/* The expression is a binary operation.  */
-  GIMPLE_UNARY_RHS,	/* The expression is a unary operation.  */
-  GIMPLE_SINGLE_RHS	/* The expression is a single object (an SSA
-			   name, a _DECL, a _REF, etc.  */
+  GIMPLE_INVALID_RHS,    /* The expression cannot be used on the RHS.  */
+  GIMPLE_QUATERNARY_RHS, /* The expression is a quoternary operation.  */
+  GIMPLE_TERNARY_RHS,    /* The expression is a ternary operation.  */
+  GIMPLE_BINARY_RHS,     /* The expression is a binary operation.  */
+  GIMPLE_UNARY_RHS,      /* The expression is a unary operation.  */
+  GIMPLE_SINGLE_RHS      /* The expression is a single object (an SSA
+			    name, a _DECL, a _REF, etc.  */
 };
 
 /* Specific flags for individual GIMPLE statements.  These flags are
@@ -1457,11 +1458,15 @@ gcall *gimple_build_call_internal (enum internal_fn, unsigned, ...);
 gcall *gimple_build_call_internal_vec (enum internal_fn, vec<tree> );
 gcall *gimple_build_call_from_tree (tree, tree);
 gassign *gimple_build_assign (tree, tree CXX_MEM_STAT_INFO);
+gassign *gimple_build_assign (tree, enum tree_code,
+			      tree, tree, tree, tree CXX_MEM_STAT_INFO);
 gassign *gimple_build_assign (tree, enum tree_code,
 			      tree, tree, tree CXX_MEM_STAT_INFO);
 gassign *gimple_build_assign (tree, enum tree_code,
 			      tree, tree CXX_MEM_STAT_INFO);
 gassign *gimple_build_assign (tree, enum tree_code, tree CXX_MEM_STAT_INFO);
+gassign *gimple_build_vec_cond_expr (tree, tree, tree, tree);
+
 gcond *gimple_build_cond (enum tree_code, tree, tree, tree, tree);
 gcond *gimple_build_cond_from_tree (tree, tree, tree);
 void gimple_cond_set_condition_from_tree (gcond *, tree);
@@ -1530,7 +1535,7 @@ bool gimple_assign_unary_nop_p (gimple *);
 void gimple_set_bb (gimple *, basic_block);
 void gimple_assign_set_rhs_from_tree (gimple_stmt_iterator *, tree);
 void gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *, enum tree_code,
-				     tree, tree, tree);
+				     tree, tree, tree, tree);
 tree gimple_get_lhs (const gimple *);
 void gimple_set_lhs (gimple *, tree);
 gimple *gimple_copy (gimple *);
@@ -2685,25 +2690,80 @@ gimple_assign_set_rhs3 (gimple *gs, tree rhs)
   gimple_assign_set_rhs3 (ass, rhs);
 }
 
+/* Return the fourth operand on the RHS of assignment statement GS.
+   If GS does not have two operands, NULL is returned instead.  */
+
+static inline tree
+gimple_assign_rhs4 (const gassign *gs)
+{
+  if (gimple_num_ops (gs) >= 5)
+    return gs->op[4];
+  else
+    return NULL_TREE;
+}
+
+static inline tree
+gimple_assign_rhs4 (const gimple *gs)
+{
+  const gassign *ass = GIMPLE_CHECK2<const gassign *> (gs);
+  return gimple_assign_rhs4 (ass);
+}
+
+/* Return a pointer to the fourth operand on the RHS of assignment
+   statement GS.  */
+
+static inline tree *
+gimple_assign_rhs4_ptr (gimple *gs)
+{
+  gassign *ass = GIMPLE_CHECK2<gassign *> (gs);
+  gcc_gimple_checking_assert (gimple_num_ops (gs) >= 5);
+  return &ass->op[4];
+}
+
+/* Set RHS to be the fourth operand on the RHS of assignment statement GS.  */
+
+static inline void
+gimple_assign_set_rhs4 (gassign *gs, tree rhs)
+{
+  gcc_gimple_checking_assert (gimple_num_ops (gs) >= 5);
+  gs->op[4] = rhs;
+}
+
+static inline void
+gimple_assign_set_rhs4 (gimple *gs, tree rhs)
+{
+  gassign *ass = GIMPLE_CHECK2<gassign *> (gs);
+  gimple_assign_set_rhs4 (ass, rhs);
+}
+
+/* A wrapper around 4 operand gimple_assign_set_rhs_with_ops, for callers
+   which expect to see only three operands.  */
+
+static inline void
+gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *gsi, enum tree_code code,
+				tree op1, tree op2, tree op3)
+{
+  gimple_assign_set_rhs_with_ops (gsi, code, op1, op2, op3, NULL);
+}
 
-/* A wrapper around 3 operand gimple_assign_set_rhs_with_ops, for callers
+/* A wrapper around 4 operand gimple_assign_set_rhs_with_ops, for callers
    which expect to see only two operands.  */
 
 static inline void
 gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *gsi, enum tree_code code,
 				tree op1, tree op2)
 {
-  gimple_assign_set_rhs_with_ops (gsi, code, op1, op2, NULL);
+  gimple_assign_set_rhs_with_ops (gsi, code, op1, op2, NULL, NULL);
 }
 
-/* A wrapper around 3 operand gimple_assign_set_rhs_with_ops, for callers
+/* A wrapper around 4 operand gimple_assign_set_rhs_with_ops, for callers
    which expect to see only one operands.  */
 
 static inline void
 gimple_assign_set_rhs_with_ops (gimple_stmt_iterator *gsi, enum tree_code code,
 				tree op1)
 {
-  gimple_assign_set_rhs_with_ops (gsi, code, op1, NULL, NULL);
+  gimple_assign_set_rhs_with_ops (gsi, code, op1, NULL, NULL, NULL);
 }
 
 /* Returns true if GS is a nontemporal move.  */
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 623cdbfed7c..dcde288789f 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -13812,19 +13812,20 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 
 	case VEC_COND_EXPR:
 	  {
-	    enum gimplify_status r0, r1, r2;
-
-	    r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
-				post_p, is_gimple_condexpr, fb_rvalue);
-	    r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
-				post_p, is_gimple_val, fb_rvalue);
-	    r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p,
-				post_p, is_gimple_val, fb_rvalue);
-
-	    ret = MIN (MIN (r0, r1), r2);
-	    recalculate_side_effects (*expr_p);
+	    tree type = TREE_TYPE (TREE_OPERAND (*expr_p, 1));
+	    tree cond_expr = TREE_OPERAND (*expr_p, 0);
+	    tree_code vec_code = cmp_to_vec_cmp_code (TREE_CODE (cond_expr));
+	    *expr_p = build4_loc (input_location, vec_code, type,
+				  TREE_OPERAND (cond_expr, 0),
+				  TREE_OPERAND (cond_expr, 1),
+				  TREE_OPERAND (*expr_p, 1),
+				  TREE_OPERAND (*expr_p, 2));
+
+	    goto expr_4;
 	  }
-	  break;
+
+	CASE_VEC_COND_EXPR:
+	  goto expr_4;
 
 	case VEC_PERM_EXPR:
 	  /* Classified as tcc_expression.  */
@@ -13923,6 +13924,23 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 		break;
 	      }
 
+	    expr_4:
+	      {
+		enum gimplify_status r0, r1, r2, r3;
+
+		r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p, post_p,
+				    is_gimple_val, fb_rvalue);
+		r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p, post_p,
+				    is_gimple_val, fb_rvalue);
+		r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p, post_p,
+				    is_gimple_val, fb_rvalue);
+		r3 = gimplify_expr (&TREE_OPERAND (*expr_p, 3), pre_p, post_p,
+				    is_gimple_val, fb_rvalue);
+
+		ret = MIN (MIN (r0, r1), MIN (r2, r3));
+		break;
+	      }
+
 	    case tcc_declaration:
 	    case tcc_constant:
 	      ret = GS_ALL_DONE;
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 35921e691f9..b9d540d935d 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -5822,44 +5822,36 @@ expand_vec_cond_mask_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
    three operands.  */
 
 rtx
-expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-		      rtx target)
+expand_vec_cond_expr (tree vec_cond_type, tree_code tcode, tree cond_lhs,
+		      tree cond_rhs, tree if_true, tree if_false, rtx target)
 {
   class expand_operand ops[6];
   enum insn_code icode;
-  rtx comparison, rtx_op1, rtx_op2;
   machine_mode mode = TYPE_MODE (vec_cond_type);
   machine_mode cmp_op_mode;
   bool unsignedp;
-  tree op0a, op0b;
-  enum tree_code tcode;
+  tcode = vec_cmp_to_cmp_code (tcode);
 
-  if (COMPARISON_CLASS_P (op0))
-    {
-      op0a = TREE_OPERAND (op0, 0);
-      op0b = TREE_OPERAND (op0, 1);
-      tcode = TREE_CODE (op0);
-    }
-  else
+  if (tcode == EQ_EXPR && TREE_CODE (cond_rhs) == VECTOR_CST
+      && integer_all_onesp (cond_rhs))
     {
-      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
-      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
+      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (cond_lhs)));
+      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (cond_lhs)))
 	  != CODE_FOR_nothing)
-	return expand_vec_cond_mask_expr (vec_cond_type, op0, op1,
-					  op2, target);
+	return expand_vec_cond_mask_expr (vec_cond_type, cond_lhs, if_true,
+					  if_false, target);
       /* Fake op0 < 0.  */
       else
 	{
-	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
+	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (cond_lhs)))
 		      == MODE_VECTOR_INT);
-	  op0a = op0;
-	  op0b = build_zero_cst (TREE_TYPE (op0));
+	  cond_rhs = build_zero_cst (TREE_TYPE (cond_lhs));
 	  tcode = LT_EXPR;
 	}
     }
-  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
-  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
 
+  cmp_op_mode = TYPE_MODE (TREE_TYPE (cond_lhs));
+  unsignedp = TYPE_UNSIGNED (TREE_TYPE (cond_lhs));
 
   gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
 	      && known_eq (GET_MODE_NUNITS (mode),
@@ -5868,22 +5860,20 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
   icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
   if (icode == CODE_FOR_nothing)
     {
-      if (tcode == LT_EXPR
-	  && op0a == op0
-	  && TREE_CODE (op0) == VECTOR_CST)
+      if (tcode == LT_EXPR && TREE_CODE (cond_lhs) == VECTOR_CST)
 	{
 	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
 	     into a constant when only get_vcond_eq_icode is supported.
 	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
 	  unsigned HOST_WIDE_INT nelts;
-	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
+	  if (!VECTOR_CST_NELTS (cond_lhs).is_constant (&nelts))
 	    {
-	      if (VECTOR_CST_STEPPED_P (op0))
+	      if (VECTOR_CST_STEPPED_P (cond_lhs))
 		return 0;
-	      nelts = vector_cst_encoded_nelts (op0);
+	      nelts = vector_cst_encoded_nelts (cond_lhs);
 	    }
 	  for (unsigned int i = 0; i < nelts; ++i)
-	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
+	    if (tree_int_cst_sgn (vector_cst_elt (cond_lhs, i)) == 1)
 	      return 0;
 	  tcode = NE_EXPR;
 	}
@@ -5893,14 +5883,14 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
 	return 0;
     }
 
-  comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp,
-				   icode, 4);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
+  rtx comparison = vector_compare_rtx (VOIDmode, tcode, cond_lhs, cond_rhs,
+				       unsignedp, icode, 4);
+  rtx rtx_true = expand_normal (if_true);
+  rtx rtx_false = expand_normal (if_false);
 
   create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
+  create_input_operand (&ops[1], rtx_true, mode);
+  create_input_operand (&ops[2], rtx_false, mode);
   create_fixed_operand (&ops[3], comparison);
   create_fixed_operand (&ops[4], XEXP (comparison, 0));
   create_fixed_operand (&ops[5], XEXP (comparison, 1));
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 897bb5d4443..df72cc7910b 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -315,7 +315,7 @@ extern rtx expand_vec_perm_const (machine_mode, rtx, rtx,
 extern rtx expand_vec_cmp_expr (tree, tree, rtx);
 
 /* Generate code for VEC_COND_EXPR.  */
-extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
+extern rtx expand_vec_cond_expr (tree, tree_code, tree, tree, tree, tree, rtx);
 
 /* Generate code for VEC_SERIES_EXPR.  */
 extern rtx expand_vec_series_expr (machine_mode, rtx, rtx, rtx);
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index b75fdb2e63f..348dd8437db 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4152,20 +4152,9 @@ verify_gimple_assign_ternary (gassign *stmt)
 	  return true;
 	}
       break;
-
     case VEC_COND_EXPR:
-      if (!VECTOR_BOOLEAN_TYPE_P (rhs1_type)
-	  || maybe_ne (TYPE_VECTOR_SUBPARTS (rhs1_type),
-		       TYPE_VECTOR_SUBPARTS (lhs_type)))
-	{
-	  error ("the first argument of a %qs must be of a "
-		 "boolean vector type of the same number of elements "
-		 "as the result", code_name);
-	  debug_generic_expr (lhs_type);
-	  debug_generic_expr (rhs1_type);
-	  return true;
-	}
-      /* Fallthrough.  */
+      error ("%qs in gimple IL", code_name);
+      return true;
     case COND_EXPR:
       if (!is_gimple_val (rhs1)
 	  && verify_gimple_comparison (TREE_TYPE (rhs1),
@@ -4364,6 +4353,87 @@ verify_gimple_assign_ternary (gassign *stmt)
   return false;
 }
 
+/* Verify a gimple assignment statement STMT with a quaternary rhs.
+   Returns true if anything is wrong.  */
+
+static bool
+verify_gimple_assign_quaternary (gassign *stmt)
+{
+  enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
+  tree lhs = gimple_assign_lhs (stmt);
+  tree lhs_type = TREE_TYPE (lhs);
+  tree rhs1 = gimple_assign_rhs1 (stmt);
+  tree rhs1_type = TREE_TYPE (rhs1);
+  tree rhs2 = gimple_assign_rhs2 (stmt);
+  tree rhs2_type = TREE_TYPE (rhs2);
+  tree rhs3 = gimple_assign_rhs3 (stmt);
+  tree rhs3_type = TREE_TYPE (rhs3);
+  tree rhs4 = gimple_assign_rhs4 (stmt);
+  tree rhs4_type = TREE_TYPE (rhs4);
+
+  const char *const code_name = get_tree_code_name (rhs_code);
+
+  if (!is_gimple_reg (lhs))
+    {
+      error ("non-register as LHS of ternary operation");
+      return true;
+    }
+
+  /* First handle operations that involve different types.  */
+  switch (rhs_code)
+    {
+    CASE_VEC_COND_EXPR:
+      if (TREE_CODE (rhs1_type) != VECTOR_TYPE
+	  || TREE_CODE (rhs2_type) != VECTOR_TYPE
+	  || TREE_CODE (rhs3_type) != VECTOR_TYPE
+	  || TREE_CODE (rhs4_type) != VECTOR_TYPE)
+	{
+	  error ("vector types expected in %qs", code_name);
+	  debug_generic_expr (lhs_type);
+	  debug_generic_expr (rhs1_type);
+	  debug_generic_expr (rhs2_type);
+	  debug_generic_expr (rhs3_type);
+	  debug_generic_expr (rhs4_type);
+	  return true;
+	}
+      if (maybe_ne (TYPE_VECTOR_SUBPARTS (rhs1_type),
+		    TYPE_VECTOR_SUBPARTS (rhs2_type))
+	  || maybe_ne (TYPE_VECTOR_SUBPARTS (rhs2_type),
+		       TYPE_VECTOR_SUBPARTS (rhs3_type))
+	  || maybe_ne (TYPE_VECTOR_SUBPARTS (rhs3_type),
+		       TYPE_VECTOR_SUBPARTS (rhs4_type))
+	  || maybe_ne (TYPE_VECTOR_SUBPARTS (rhs4_type),
+		       TYPE_VECTOR_SUBPARTS (lhs_type)))
+	{
+	  error ("vectors with different element number found in %qs",
+		 code_name);
+	  debug_generic_expr (lhs_type);
+	  debug_generic_expr (rhs1_type);
+	  debug_generic_expr (rhs2_type);
+	  debug_generic_expr (rhs3_type);
+	  debug_generic_expr (rhs4_type);
+	  return true;
+	}
+      if (!useless_type_conversion_p (lhs_type, rhs3_type)
+	  || !useless_type_conversion_p (lhs_type, rhs4_type)
+	  || !useless_type_conversion_p (rhs1_type, rhs2_type))
+	{
+	  error ("type mismatch in %qs", code_name);
+	  debug_generic_expr (lhs_type);
+	  debug_generic_expr (rhs1_type);
+	  debug_generic_expr (rhs2_type);
+	  debug_generic_expr (rhs3_type);
+	  debug_generic_expr (rhs4_type);
+	  return true;
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  return false;
+}
+
 /* Verify a gimple assignment statement STMT with a single rhs.
    Returns true if anything is wrong.  */
 
@@ -4617,6 +4687,9 @@ verify_gimple_assign (gassign *stmt)
     case GIMPLE_TERNARY_RHS:
       return verify_gimple_assign_ternary (stmt);
 
+    case GIMPLE_QUATERNARY_RHS:
+      return verify_gimple_assign_quaternary (stmt);
+
     default:
       gcc_unreachable ();
     }
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index e4ae1b058fd..a5034dc862b 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -4092,7 +4092,7 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
     /* Assign cost of 1 to usual operations.
        ??? We may consider mapping RTL costs to this.  */
     case COND_EXPR:
-    case VEC_COND_EXPR:
+    CASE_VEC_COND_EXPR:
     case VEC_PERM_EXPR:
 
     case PLUS_EXPR:
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 221f140b356..a62e524d52f 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -349,6 +349,11 @@ rhs_to_tree (tree type, gimple *stmt)
   enum tree_code code = gimple_assign_rhs_code (stmt);
   switch (get_gimple_rhs_class (code))
     {
+    case GIMPLE_QUATERNARY_RHS:
+      // TODO: use fold_build4_loc
+      return build4_loc (loc, code, type, gimple_assign_rhs1 (stmt),
+			 gimple_assign_rhs2 (stmt), gimple_assign_rhs3 (stmt),
+			 gimple_assign_rhs4 (stmt));
     case GIMPLE_TERNARY_RHS:
       return fold_build3_loc (loc, code, type, gimple_assign_rhs1 (stmt),
 			      gimple_assign_rhs2 (stmt),
@@ -2166,8 +2171,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
       if (conv_code == ERROR_MARK)
 	gimple_assign_set_rhs_from_tree (gsi, orig[0]);
       else
-	gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0],
-					NULL_TREE, NULL_TREE);
+	gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0], NULL_TREE);
     }
   else
     {
@@ -2227,8 +2231,7 @@ simplify_vector_constructor (gimple_stmt_iterator *gsi)
 				   VEC_PERM_EXPR, orig[0], orig[1], op2);
 	  orig[0] = gimple_assign_lhs (perm);
 	  gsi_insert_before (gsi, perm, GSI_SAME_STMT);
-	  gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0],
-					  NULL_TREE, NULL_TREE);
+	  gimple_assign_set_rhs_with_ops (gsi, conv_code, orig[0]);
 	}
     }
   update_stmt (gsi_stmt (*gsi));
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index cd2ced36971..7f2b96a1bf0 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -3177,9 +3177,9 @@ static widest_int
 derive_constant_upper_bound (tree val)
 {
   enum tree_code code;
-  tree op0, op1, op2;
+  tree op0, op1, op2, op3;
 
-  extract_ops_from_tree (val, &code, &op0, &op1, &op2);
+  extract_ops_from_tree (val, &code, &op0, &op1, &op2, &op3);
   return derive_constant_upper_bound_ops (TREE_TYPE (val), op0, code, op1);
 }
 
diff --git a/gcc/tree-ssa-operands.c b/gcc/tree-ssa-operands.c
index e643b33a6b0..a09229c470e 100644
--- a/gcc/tree-ssa-operands.c
+++ b/gcc/tree-ssa-operands.c
@@ -797,7 +797,6 @@ get_expr_operands (struct function *fn, gimple *stmt, tree *expr_p, int flags)
       return;
 
     case COND_EXPR:
-    case VEC_COND_EXPR:
     case VEC_PERM_EXPR:
       get_expr_operands (fn, stmt, &TREE_OPERAND (expr, 0), uflags);
       get_expr_operands (fn, stmt, &TREE_OPERAND (expr, 1), uflags);
diff --git a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c
index c618601a184..b09a6188c90 100644
--- a/gcc/tree-ssa-pre.c
+++ b/gcc/tree-ssa-pre.c
@@ -3875,11 +3875,10 @@ compute_avail (void)
 		      enum tree_code code = gimple_assign_rhs_code (stmt);
 		      vn_nary_op_t nary;
 
-		      /* COND_EXPR and VEC_COND_EXPR are awkward in
+		      /* COND_EXPR and VEC_COND_*_EXPR are awkward in
 			 that they contain an embedded complex expression.
 			 Don't even try to shove those through PRE.  */
-		      if (code == COND_EXPR
-			  || code == VEC_COND_EXPR)
+		      if (code == COND_EXPR || vec_cond_expr_p (code))
 			continue;
 
 		      vn_nary_op_lookup_stmt (stmt, &nary);
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index 510dfd1e188..01ec9701bfb 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -3747,7 +3747,7 @@ optimize_range_tests (enum tree_code opcode,
 }
 
 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
-   the operands of the VEC_COND_EXPR.  Returns ERROR_MARK on failure,
+   the operands of the VEC_COND_*_EXPR.  Returns ERROR_MARK on failure,
    otherwise the comparison code.  TYPE is a return value that is set
    to type of comparison.  */
 
@@ -3763,7 +3763,7 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
 
   /* ??? If we start creating more COND_EXPR, we could perform
      this same optimization with them.	For now, simplify.  */
-  if (gimple_assign_rhs_code (stmt) != VEC_COND_EXPR)
+  if (!vec_cond_expr_p (gimple_assign_rhs_code (stmt)))
     return ERROR_MARK;
 
   tree cond = gimple_assign_rhs1 (stmt);
diff --git a/gcc/tree-ssa-scopedtables.c b/gcc/tree-ssa-scopedtables.c
index 574bc30eee1..91d1b10c142 100644
--- a/gcc/tree-ssa-scopedtables.c
+++ b/gcc/tree-ssa-scopedtables.c
@@ -432,6 +432,14 @@ add_hashable_expr (const struct hashable_expr *expr, hash &hstate)
       inchash::add_expr (expr->ops.ternary.opnd2, hstate);
       break;
 
+    case EXPR_QUATERNARY:
+      hstate.add_object (expr->ops.quaternary.op);
+      inchash::add_expr (expr->ops.quaternary.opnd0, hstate);
+      inchash::add_expr (expr->ops.quaternary.opnd1, hstate);
+      inchash::add_expr (expr->ops.quaternary.opnd2, hstate);
+      inchash::add_expr (expr->ops.quaternary.opnd3, hstate);
+      break;
+
     case EXPR_CALL:
       {
         size_t i;
@@ -643,6 +651,19 @@ hashable_expr_equal_p (const struct hashable_expr *expr0,
 	      && operand_equal_p (expr0->ops.ternary.opnd1,
 				  expr1->ops.ternary.opnd0, 0));
 
+    case EXPR_QUATERNARY:
+      if (expr0->ops.quaternary.op != expr1->ops.quaternary.op)
+	return false;
+
+      return (operand_equal_p (expr0->ops.quaternary.opnd0,
+			       expr1->ops.quaternary.opnd0, 0)
+	      && operand_equal_p (expr0->ops.quaternary.opnd1,
+				  expr1->ops.quaternary.opnd1, 0)
+	      && operand_equal_p (expr0->ops.quaternary.opnd2,
+				  expr1->ops.quaternary.opnd2, 0)
+	      && operand_equal_p (expr0->ops.quaternary.opnd3,
+				  expr1->ops.quaternary.opnd3, 0));
+
     case EXPR_CALL:
       {
         size_t i;
@@ -736,7 +757,17 @@ expr_hash_elt::expr_hash_elt (gimple *stmt, tree orig_lhs)
 	  expr->ops.ternary.opnd1 = gimple_assign_rhs2 (stmt);
 	  expr->ops.ternary.opnd2 = gimple_assign_rhs3 (stmt);
 	  break;
-        default:
+	case GIMPLE_QUATERNARY_RHS:
+	  expr->kind = EXPR_TERNARY;
+	  expr->type = TREE_TYPE (gimple_assign_lhs (stmt));
+	  expr->ops.quaternary.op = subcode;
+	  expr->ops.quaternary.opnd0 = gimple_assign_rhs1 (stmt);
+	  expr->ops.quaternary.opnd1 = gimple_assign_rhs2 (stmt);
+	  expr->ops.quaternary.opnd2 = gimple_assign_rhs3 (stmt);
+	  expr->ops.quaternary.opnd3 = gimple_assign_rhs4 (stmt);
+	  break;
+
+	default:
           gcc_unreachable ();
         }
     }
@@ -896,6 +927,19 @@ expr_hash_elt::print (FILE *stream)
 	fputs (">", stream);
 	break;
 
+      case EXPR_QUATERNARY:
+	fprintf (stream, " %s <",
+		 get_tree_code_name (m_expr.ops.quaternary.op));
+	print_generic_expr (stream, m_expr.ops.quaternary.opnd0);
+	fputs (", ", stream);
+	print_generic_expr (stream, m_expr.ops.quaternary.opnd1);
+	fputs (", ", stream);
+	print_generic_expr (stream, m_expr.ops.quaternary.opnd2);
+	fputs (", ", stream);
+	print_generic_expr (stream, m_expr.ops.quaternary.opnd3);
+	fputs (">", stream);
+	break;
+
       case EXPR_CALL:
         {
           size_t i;
diff --git a/gcc/tree-ssa-scopedtables.h b/gcc/tree-ssa-scopedtables.h
index 48185006823..e3ed5c5e753 100644
--- a/gcc/tree-ssa-scopedtables.h
+++ b/gcc/tree-ssa-scopedtables.h
@@ -29,6 +29,7 @@ enum expr_kind
   EXPR_UNARY,
   EXPR_BINARY,
   EXPR_TERNARY,
+  EXPR_QUATERNARY,
   EXPR_CALL,
   EXPR_PHI
 };
@@ -42,6 +43,7 @@ struct hashable_expr
     struct { enum tree_code op;  tree opnd; } unary;
     struct { enum tree_code op;  tree opnd0, opnd1; } binary;
     struct { enum tree_code op;  tree opnd0, opnd1, opnd2; } ternary;
+    struct { enum tree_code op;  tree opnd0, opnd1, opnd2, opnd3; } quaternary;
     struct { gcall *fn_from; bool pure; size_t nargs; tree *args; } call;
     struct { size_t nargs; tree *args; } phi;
   } ops;
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index 5855653257b..01ab9423b3c 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -373,8 +373,9 @@ expand_vector_addition (gimple_stmt_iterator *gsi,
 
 /* Try to expand vector comparison expression OP0 CODE OP1 by
    querying optab if the following expression:
-	VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
+	VEC_COND_CODE_EXPR< OP0, OP1, {-1,...}, {0,...}>
    can be expanded.  */
+
 static tree
 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
                           tree op1, enum tree_code code)
@@ -691,12 +692,10 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 	  if (addend == NULL_TREE
 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
 	    {
-	      tree zero, cst, cond, mask_type;
+	      tree zero, cst;
 	      gimple *stmt;
 
-	      mask_type = build_same_sized_truth_vector_type (type);
 	      zero = build_zero_cst (type);
-	      cond = build2 (LT_EXPR, mask_type, op0, zero);
 	      tree_vector_builder vec (type, nunits, 1);
 	      for (i = 0; i < nunits; i++)
 		vec.quick_push (build_int_cst (TREE_TYPE (type),
@@ -704,7 +703,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 						<< shifts[i]) - 1));
 	      cst = vec.build ();
 	      addend = make_ssa_name (type);
-	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
+	      stmt = gimple_build_assign (addend, VEC_COND_LT_EXPR, op0, zero,
 					  cst, zero);
 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
 	    }
@@ -909,41 +908,46 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 
 /* Expand a vector condition to scalars, by using many conditions
    on the vector's elements.  */
+
 static void
 expand_vector_condition (gimple_stmt_iterator *gsi)
 {
   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
+  tree_code code = vec_cmp_to_cmp_code (gimple_assign_rhs_code (stmt));
   tree type = gimple_expr_type (stmt);
-  tree a = gimple_assign_rhs1 (stmt);
-  tree a1 = a;
-  tree a2 = NULL_TREE;
-  bool a_is_comparison = false;
+  tree a = NULL_TREE;
+  tree a1 = gimple_assign_rhs1 (stmt);
+  tree a2 = gimple_assign_rhs2 (stmt);
+  bool a_is_comparison = true;
   bool a_is_scalar_bitmask = false;
-  tree b = gimple_assign_rhs2 (stmt);
-  tree c = gimple_assign_rhs3 (stmt);
+  tree b = gimple_assign_rhs3 (stmt);
+  tree c = gimple_assign_rhs4 (stmt);
   vec<constructor_elt, va_gc> *v;
   tree constr;
   tree inner_type = TREE_TYPE (type);
-  tree cond_type = TREE_TYPE (TREE_TYPE (a));
-  tree comp_inner_type = cond_type;
+  tree cond_type = TREE_TYPE (TREE_TYPE (a1));
+  tree comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
   tree width = TYPE_SIZE (inner_type);
   tree index = bitsize_int (0);
-  tree comp_width = width;
+  tree comp_width = TYPE_SIZE (comp_inner_type);
   tree comp_index = index;
   int i;
   location_t loc = gimple_location (gsi_stmt (*gsi));
 
-  if (!is_gimple_val (a))
+  if (code == EQ_EXPR
+      && TREE_CODE (a2) == VECTOR_CST
+      && integer_all_onesp (a2))
     {
-      gcc_assert (COMPARISON_CLASS_P (a));
-      a_is_comparison = true;
-      a1 = TREE_OPERAND (a, 0);
-      a2 = TREE_OPERAND (a, 1);
-      comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
-      comp_width = TYPE_SIZE (comp_inner_type);
+      a_is_comparison = false;
+      a = a1;
+
+      comp_inner_type = cond_type;
+      comp_width = width;
     }
+  else
+    cond_type = truth_type_for (cond_type);
 
-  if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
+  if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code))
     return;
 
   /* Handle vector boolean types with bitmasks.  If there is a comparison
@@ -997,6 +1001,7 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
     }
 
   int nunits = nunits_for_known_piecewise_op (type);
+
   vec_alloc (v, nunits);
   for (i = 0; i < nunits; i++)
     {
@@ -1009,7 +1014,7 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
 				       comp_width, comp_index);
 	  tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
 				       comp_width, comp_index);
-	  aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
+	  aa = fold_build2 (code, cond_type, aa1, aa2);
 	}
       else if (a_is_scalar_bitmask)
 	{
@@ -1964,7 +1969,7 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
       return;
     }
 
-  if (code == VEC_COND_EXPR)
+  if (vec_cond_expr_p (code))
     {
       expand_vector_condition (gsi);
       return;
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 9e399cdacee..b884089e086 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -4463,7 +4463,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
       poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
 
       gimple *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info)->stmt;
-      gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
+      gcc_assert (vec_cond_expr_p (gimple_assign_rhs_code (vec_stmt)));
 
       int scalar_precision
 	= GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype)));
@@ -4511,15 +4511,13 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
 	 Finally, we update the phi (NEW_PHI_TREE) to take the value of
 	 the new cond_expr (INDEX_COND_EXPR).  */
 
-      /* Duplicate the condition from vec_stmt.  */
-      tree ccompare = unshare_expr (gimple_assign_rhs1 (vec_stmt));
-
       /* Create a conditional, where the condition is taken from vec_stmt
 	 (CCOMPARE), then is the induction index (INDEX_BEFORE_INCR) and
 	 else is the phi (NEW_PHI_TREE).  */
-      tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
-				     ccompare, indx_before_incr,
-				     new_phi_tree);
+      tree index_cond_expr
+	= build4 (gimple_assign_rhs_code (vec_stmt), cr_index_vector_type,
+		  gimple_assign_rhs1 (vec_stmt), gimple_assign_rhs2 (vec_stmt),
+		  indx_before_incr, new_phi_tree);
       induction_index = make_ssa_name (cr_index_vector_type);
       gimple *index_condition = gimple_build_assign (induction_index,
 						     index_cond_expr);
@@ -4741,8 +4739,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
       tree index_vec_type = TREE_TYPE (induction_index);
       gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
       tree index_scalar_type = TREE_TYPE (index_vec_type);
-      tree index_vec_cmp_type = build_same_sized_truth_vector_type
-	(index_vec_type);
 
       /* Get an unsigned integer version of the type of the data vector.  */
       int scalar_precision
@@ -4785,22 +4781,16 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
 	 (VEC_COND) with one data value and the rest zeros.
 	 In the case where the loop never made any matches, every index will
 	 match, resulting in a vector with all data values (which will all be
-	 the default value).  */
-
-      /* Compare the max index vector to the vector of found indexes to find
+	 the default value).
+	 Compare the max index vector to the vector of found indexes to find
 	 the position of the max value.  */
-      tree vec_compare = make_ssa_name (index_vec_cmp_type);
-      gimple *vec_compare_stmt = gimple_build_assign (vec_compare, EQ_EXPR,
-						      induction_index,
-						      max_index_vec);
-      gsi_insert_before (&exit_gsi, vec_compare_stmt, GSI_SAME_STMT);
 
       /* Use the compare to choose either values from the data vector or
 	 zero.  */
       tree vec_cond = make_ssa_name (vectype);
-      gimple *vec_cond_stmt = gimple_build_assign (vec_cond, VEC_COND_EXPR,
-						   vec_compare, new_phi_result,
-						   zero_vec);
+      gimple *vec_cond_stmt
+	= gimple_build_assign (vec_cond, VEC_COND_EQ_EXPR, induction_index,
+			       max_index_vec, new_phi_result, zero_vec);
       gsi_insert_before (&exit_gsi, vec_cond_stmt, GSI_SAME_STMT);
 
       /* Finally we need to extract the data value from the vector (VEC_COND)
@@ -5033,8 +5023,11 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
 	     vec = seq ? new_phi_result : vector_identity;
 
 	     VEC is now suitable for a full vector reduction.  */
-	  tree vec = gimple_build (&seq, VEC_COND_EXPR, vectype,
-				   sel, new_phi_result, vector_identity);
+	  tree vec = make_ssa_name (new_phi_result);
+	  gimple *cond_expr
+	    = gimple_build_vec_cond_expr (vec, sel, new_phi_result,
+					  vector_identity);
+	  gimple_seq_add_stmt (&seq, cond_expr);
 
 	  /* Do the reduction and convert it to the appropriate type.  */
 	  tree scalar = gimple_build (&seq, as_combined_fn (reduc_fn),
@@ -5634,8 +5627,7 @@ merge_with_identity (gimple_stmt_iterator *gsi, tree mask, tree vectype,
 		     tree vec, tree identity)
 {
   tree cond = make_temp_ssa_name (vectype, NULL, "cond");
-  gimple *new_stmt = gimple_build_assign (cond, VEC_COND_EXPR,
-					  mask, vec, identity);
+  gimple *new_stmt = gimple_build_vec_cond_expr (cond, mask, vec, identity);
   gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
   return cond;
 }
@@ -5936,8 +5928,8 @@ build_vect_cond_expr (enum tree_code code, tree vop[3], tree mask,
 	tree vectype = TREE_TYPE (vop[1]);
 	tree zero = build_zero_cst (vectype);
 	tree masked_op1 = make_temp_ssa_name (vectype, NULL, "masked_op1");
-	gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR,
-					       mask, vop[1], zero);
+	gassign *select
+	  = gimple_build_vec_cond_expr (masked_op1, mask, vop[1], zero);
 	gsi_insert_before (gsi, select, GSI_SAME_STMT);
 	vop[1] = masked_op1;
 	break;
@@ -5947,8 +5939,8 @@ build_vect_cond_expr (enum tree_code code, tree vop[3], tree mask,
       {
 	tree vectype = TREE_TYPE (vop[1]);
 	tree masked_op1 = make_temp_ssa_name (vectype, NULL, "masked_op1");
-	gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR,
-					       mask, vop[1], vop[0]);
+	gassign *select
+	  = gimple_build_vec_cond_expr (masked_op1, mask, vop[1], vop[0]);
 	gsi_insert_before (gsi, select, GSI_SAME_STMT);
 	vop[1] = masked_op1;
 	break;
@@ -6376,7 +6368,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 
 	 we're interested in the last element in x_3 for which a_2 || a_3
 	 is true, whereas the current reduction chain handling would
-	 vectorize x_2 as a normal VEC_COND_EXPR and only treat x_3
+	 vectorize x_2 as a normal VEC_COND_*_EXPR and only treat x_3
 	 as a reduction operation.  */
       if (reduc_index == -1)
 	{
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index baa9a4cb8fa..402879c8635 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -3460,7 +3460,7 @@ check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
 	  tree vecitype, comp_vectype;
 
 	  /* If the comparison can throw, then is_gimple_condexpr will be
-	     false and we can't make a COND_EXPR/VEC_COND_EXPR out of it.  */
+	     false and we can't make a COND_EXPR/VEC_COND_*_EXPR out of it.  */
 	  if (stmt_could_throw_p (cfun, def_stmt))
 	    return false;
 
@@ -3582,7 +3582,7 @@ adjust_bool_pattern (tree var, tree out_type,
 	   S3'  c_T = x2 CMP2 y2 ? a_T : 0;
 	   S4'  f_T = c_T;
 
-	 At least when VEC_COND_EXPR is implemented using masks
+	 At least when VEC_COND_*_EXPR is implemented using masks
 	 cond ? 1 : 0 is as expensive as cond ? var : 0, in both cases it
 	 computes the comparison masks and ands it, in one case with
 	 all ones vector, in the other case with a vector register.
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index b1e97f85d96..9d291da705c 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-fold.h"
 #include "regs.h"
 #include "attribs.h"
+#include "print-tree.h"
 
 /* For lang_hooks.types.type_for_mode.  */
 #include "langhooks.h"
@@ -6419,15 +6420,15 @@ scan_operand_equal_p (tree ref1, tree ref2)
   return true;
 }
 
-
-enum scan_store_kind {
+enum scan_store_kind
+{
   /* Normal permutation.  */
   scan_store_kind_perm,
 
   /* Whole vector left shift permutation with zero init.  */
   scan_store_kind_lshift_zero,
 
-  /* Whole vector left shift permutation and VEC_COND_EXPR.  */
+  /* Whole vector left shift permutation and VEC_COND_*_EXPR.  */
   scan_store_kind_lshift_cond
 };
 
@@ -7095,8 +7096,8 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 			       ? boolean_false_node : boolean_true_node);
 
 	      tree new_temp2 = make_ssa_name (vectype);
-	      g = gimple_build_assign (new_temp2, VEC_COND_EXPR, vb.build (),
-				       new_temp, vec_oprnd1);
+	      g = gimple_build_vec_cond_expr (new_temp2, vb.build (), new_temp,
+					      vec_oprnd1);
 	      new_stmt_info = vect_finish_stmt_generation (stmt_info, g, gsi);
 	      STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
 	      prev_stmt_info = new_stmt_info;
@@ -9769,7 +9770,7 @@ vect_is_simple_cond (tree cond, vec_info *vinfo,
 
    Check if STMT_INFO is conditional modify expression that can be vectorized.
    If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
-   stmt using VEC_COND_EXPR  to replace it, put it in VEC_STMT, and insert it
+   stmt using VEC_COND_*_EXPR  to replace it, put it in VEC_STMT, and insert it
    at GSI.
 
    When STMT_INFO is vectorized as a nested cycle, for_reduction is true.
@@ -10158,8 +10159,8 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
 	    {
 	      new_temp = make_ssa_name (vec_dest);
 	      gassign *new_stmt
-		= gimple_build_assign (new_temp, VEC_COND_EXPR, vec_compare,
-				       vec_then_clause, vec_else_clause);
+		= gimple_build_vec_cond_expr (new_temp, vec_compare,
+					      vec_then_clause, vec_else_clause);
 	      new_stmt_info
 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
 	    }
diff --git a/gcc/tree.def b/gcc/tree.def
index fb6e7344fa6..da9d28359fd 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -551,6 +551,13 @@ DEFTREECODE (VEC_SERIES_EXPR, "vec_series_expr", tcc_binary, 2)
 */
 DEFTREECODE (VEC_COND_EXPR, "vec_cond_expr", tcc_expression, 3)
 
+DEFTREECODE (VEC_COND_LT_EXPR, "vec_cond_lt_expr", tcc_expression, 4)
+DEFTREECODE (VEC_COND_LE_EXPR, "vec_cond_le_expr", tcc_expression, 4)
+DEFTREECODE (VEC_COND_GT_EXPR, "vec_cond_gt_expr", tcc_expression, 4)
+DEFTREECODE (VEC_COND_GE_EXPR, "vec_cond_ge_expr", tcc_expression, 4)
+DEFTREECODE (VEC_COND_EQ_EXPR, "vec_cond_eq_expr", tcc_expression, 4)
+DEFTREECODE (VEC_COND_NE_EXPR, "vec_cond_ne_expr", tcc_expression, 4)
+
 /* Vector permutation expression.  A = VEC_PERM_EXPR<v0, v1, mask> means
 
    N = length(mask)
diff --git a/gcc/tree.h b/gcc/tree.h
index c825109b5f7..5512e356b9b 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -5132,6 +5132,70 @@ complete_or_array_type_p (const_tree type)
 	     && COMPLETE_TYPE_P (TREE_TYPE (type)));
 }
 
+static inline tree_code
+cmp_to_vec_cmp_code (tree_code code)
+{
+  switch (code)
+    {
+    case LT_EXPR:
+      return VEC_COND_LT_EXPR;
+    case LE_EXPR:
+      return VEC_COND_LE_EXPR;
+    case GT_EXPR:
+      return VEC_COND_GT_EXPR;
+    case GE_EXPR:
+      return VEC_COND_GE_EXPR;
+    case EQ_EXPR:
+      return VEC_COND_EQ_EXPR;
+    case NE_EXPR:
+      return VEC_COND_NE_EXPR;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+static inline tree_code
+vec_cmp_to_cmp_code (tree_code code)
+{
+  switch (code)
+    {
+    case VEC_COND_LT_EXPR:
+      return LT_EXPR;
+    case VEC_COND_LE_EXPR:
+      return LE_EXPR;
+    case VEC_COND_GT_EXPR:
+      return GT_EXPR;
+    case VEC_COND_GE_EXPR:
+      return GE_EXPR;
+    case VEC_COND_EQ_EXPR:
+      return EQ_EXPR;
+    case VEC_COND_NE_EXPR:
+      return NE_EXPR;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+#define CASE_VEC_COND_EXPR                                                     \
+  case VEC_COND_LT_EXPR:                                                       \
+  case VEC_COND_LE_EXPR:                                                       \
+  case VEC_COND_GT_EXPR:                                                       \
+  case VEC_COND_GE_EXPR:                                                       \
+  case VEC_COND_EQ_EXPR:                                                       \
+  case VEC_COND_NE_EXPR
+
+static inline bool
+vec_cond_expr_p (tree_code code)
+{
+  switch (code)
+    {
+    CASE_VEC_COND_EXPR:
+      return true;
+    default:
+      return false;
+    }
+}
+
 /* Return true if the value of T could be represented as a poly_widest_int.  */
 
 inline bool


[-- Attachment #3: failures.txt --]
[-- Type: text/plain, Size: 2967 bytes --]

=== FAILURES ===
FAIL: g++.dg/ext/pr56790-1.C  -std=gnu++14  scan-tree-dump ccp1 "{ 5, 13 }"
FAIL: g++.dg/ext/pr56790-1.C  -std=gnu++17  scan-tree-dump ccp1 "{ 5, 13 }"
FAIL: g++.dg/ext/pr56790-1.C  -std=gnu++98  scan-tree-dump ccp1 "{ 5, 13 }"
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++14  1 blank line(s) in output
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++14 (internal compiler error)
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++14 (test for excess errors)
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++17  1 blank line(s) in output
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++17 (internal compiler error)
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++17 (test for excess errors)
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++98  1 blank line(s) in output
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++98 (internal compiler error)
FAIL: g++.dg/tree-ssa/cprop-vcond.C  -std=gnu++98 (test for excess errors)
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++14  scan-tree-dump-not forwprop1 " (?:>|>=|<|<=) { -214748364[78]"
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++14  scan-tree-dump-not forwprop1 " (?:>|>=|<|<=) { 214748364[67]"
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++14  scan-tree-dump-times forwprop1 "(?:return| =) { -1, -1, -1, -1 }" 2
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++14  scan-tree-dump-times forwprop1 "(?:return| =) { 0, 0, 0, 0 }" 2
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++17  scan-tree-dump-not forwprop1 " (?:>|>=|<|<=) { -214748364[78]"
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++17  scan-tree-dump-not forwprop1 " (?:>|>=|<|<=) { 214748364[67]"
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++17  scan-tree-dump-times forwprop1 "(?:return| =) { -1, -1, -1, -1 }" 2
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++17  scan-tree-dump-times forwprop1 "(?:return| =) { 0, 0, 0, 0 }" 2
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++98  scan-tree-dump-not forwprop1 " (?:>|>=|<|<=) { -214748364[78]"
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++98  scan-tree-dump-not forwprop1 " (?:>|>=|<|<=) { 214748364[67]"
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++98  scan-tree-dump-times forwprop1 "(?:return| =) { -1, -1, -1, -1 }" 2
FAIL: g++.dg/tree-ssa/pr88152-2.C  -std=gnu++98  scan-tree-dump-times forwprop1 "(?:return| =) { 0, 0, 0, 0 }" 2
FAIL: gcc.c-torture/execute/ieee/pr50310.c compilation,  -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions  (internal compiler error)
FAIL: gcc.c-torture/execute/ieee/pr50310.c compilation,  -O3 -g  (internal compiler error)
FAIL: gcc.dg/pr50310-1.c (internal compiler error)
FAIL: gcc.dg/pr50310-1.c (test for excess errors)
FAIL: gcc.dg/pr50310-2.c (internal compiler error)
FAIL: gcc.dg/pr50310-2.c (test for excess errors)
FAIL: gcc.dg/tree-ssa/foldconst-6.c scan-tree-dump-not ccp1 "2, 666"
FAIL: gcc.dg/tree-ssa/operand-equal-2.c scan-tree-dump fre1 "v = . 0, 0, 0, 0 ."
FAIL: gcc.dg/tree-ssa/pr68714.c scan-tree-dump-times reassoc1 " <= " 1

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH][RFC] Come up with VEC_COND_OP_EXPRs.
  2019-09-24 10:25 [PATCH][RFC] Come up with VEC_COND_OP_EXPRs Martin Liška
@ 2019-09-24 11:11 ` Richard Sandiford
  2019-09-24 11:29   ` Richard Biener
  2020-04-01 10:19 ` [stage1][PATCH] Lower VEC_COND_EXPR into internal functions Martin Liška
  1 sibling, 1 reply; 65+ messages in thread
From: Richard Sandiford @ 2019-09-24 11:11 UTC (permalink / raw)
  To: Martin Liška; +Cc: gcc-patches, Richard Biener

Martin Liška <mliska@suse.cz> writes:
> Hi.
>
> The patch introduces couple of new TREE_CODEs that will help us to have
> a proper GIMPLE representation of current VECT_COND_EXPR. Right now,
> the first argument is typically a GENERIC tcc_expression tree with 2 operands
> that are visited at various places in GIMPLE code. That said, based on the discussion
> with Richi, I'm suggesting to come up with e.g.
> VECT_COND_LT_EXPR<COND_LHS, COND_RHS, IF_CLAUSE, ELSE_CLAUSE>. Such a change logically
> introduces new GIMPLE_QUATERNARY_RHS gassignments. For now, the VEC_COND_EXPR remains
> and is only valid in GENERIC and gimplifier will take care of the corresponding transition.
>
> The patch is a prototype and missing bits are:
> - folding support addition for GIMPLE_QUATERNARY_RHS is missing
> - fancy tcc_comparison expressions like LTGT_EXPR, UNORDERED_EXPR, ORDERED_EXPR,
>   UNLT_EXPR and others are not supported right now
> - comments are missing for various functions added
>
> Apart from that I was able to bootstrap and run tests with a quite small fallout.
> Thoughts?
> Martin

I think this is going in the wrong direction.  There are some targets
that can only handle VEC_COND_EXPRs well if we know the associated
condition, and others where a compare-and-VEC_COND_EXPR will always be
two operations.  In that situation, it seems like the native gimple
representation should be the simpler representation rather than the
more complex one.  That way the comparisons can be optimised
independently of any VEC_COND_EXPRs on targets that benefit from that.

So IMO it would be better to use three-operand VEC_COND_EXPRs with
no embedded conditions as the preferred gimple representation and
have internal functions for the fused operations that some targets
prefer.  This means that using fused operations is "just" an instruction
selection decision rather than hard-coded throughout gimple.  (And that
fits in well with the idea of doing more instruction selection in gimple.)

Thanks,
Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH][RFC] Come up with VEC_COND_OP_EXPRs.
  2019-09-24 11:11 ` Richard Sandiford
@ 2019-09-24 11:29   ` Richard Biener
  2019-09-24 11:57     ` Richard Sandiford
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Biener @ 2019-09-24 11:29 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: Martin Liška, GCC Patches

On Tue, Sep 24, 2019 at 1:11 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Martin Liška <mliska@suse.cz> writes:
> > Hi.
> >
> > The patch introduces couple of new TREE_CODEs that will help us to have
> > a proper GIMPLE representation of current VECT_COND_EXPR. Right now,
> > the first argument is typically a GENERIC tcc_expression tree with 2 operands
> > that are visited at various places in GIMPLE code. That said, based on the discussion
> > with Richi, I'm suggesting to come up with e.g.
> > VECT_COND_LT_EXPR<COND_LHS, COND_RHS, IF_CLAUSE, ELSE_CLAUSE>. Such a change logically
> > introduces new GIMPLE_QUATERNARY_RHS gassignments. For now, the VEC_COND_EXPR remains
> > and is only valid in GENERIC and gimplifier will take care of the corresponding transition.
> >
> > The patch is a prototype and missing bits are:
> > - folding support addition for GIMPLE_QUATERNARY_RHS is missing
> > - fancy tcc_comparison expressions like LTGT_EXPR, UNORDERED_EXPR, ORDERED_EXPR,
> >   UNLT_EXPR and others are not supported right now
> > - comments are missing for various functions added
> >
> > Apart from that I was able to bootstrap and run tests with a quite small fallout.
> > Thoughts?
> > Martin
>
> I think this is going in the wrong direction.  There are some targets
> that can only handle VEC_COND_EXPRs well if we know the associated
> condition, and others where a compare-and-VEC_COND_EXPR will always be
> two operations.  In that situation, it seems like the native gimple
> representation should be the simpler representation rather than the
> more complex one.  That way the comparisons can be optimised
> independently of any VEC_COND_EXPRs on targets that benefit from that.
>
> So IMO it would be better to use three-operand VEC_COND_EXPRs with
> no embedded conditions as the preferred gimple representation and
> have internal functions for the fused operations that some targets
> prefer.  This means that using fused operations is "just" an instruction
> selection decision rather than hard-coded throughout gimple.  (And that
> fits in well with the idea of doing more instruction selection in gimple.)

So I've been doing that before, but more generally also for COND_EXPR.
We cannot rely on TER and the existing RTL expansion "magic" for the
instruction selection issue you mention because TER isn't reliable.  With
IFNs for optabs we could do actual [vector] condition instruction selection
before RTL expansion, ignoring "single-use" issues - is that what you are
hinting at?  How should the vectorizer deal with this?  Should it directly
use the optab IFNs then when facing "split" COND_EXPRs?  IIRC the
most fallout of a simple patch (adjusting is_gimple_condexpr) is in the
vectorizer.

Note I'm specifically looking for a solution that applies to both COND_EXPR
and VEC_COND_EXPR since both suffer from the same issues.

There was also recent work in putting back possibly trapping comparisons
into [VEC_]COND_EXPR because it doesn't interfere with EH and allows
better code.  Also you SVE people had VN issues with cond-exprs and
VN runs into the exact same issue (but would handle separate comparisons
better - with the caveat of breaking TER).

Richard.

>
> Thanks,
> Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH][RFC] Come up with VEC_COND_OP_EXPRs.
  2019-09-24 11:29   ` Richard Biener
@ 2019-09-24 11:57     ` Richard Sandiford
  2019-09-24 12:18       ` Richard Biener
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Sandiford @ 2019-09-24 11:57 UTC (permalink / raw)
  To: Richard Biener; +Cc: Martin Liška, GCC Patches

Richard Biener <richard.guenther@gmail.com> writes:
> On Tue, Sep 24, 2019 at 1:11 PM Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>>
>> Martin Liška <mliska@suse.cz> writes:
>> > Hi.
>> >
>> > The patch introduces couple of new TREE_CODEs that will help us to have
>> > a proper GIMPLE representation of current VECT_COND_EXPR. Right now,
>> > the first argument is typically a GENERIC tcc_expression tree with 2 operands
>> > that are visited at various places in GIMPLE code. That said, based on the discussion
>> > with Richi, I'm suggesting to come up with e.g.
>> > VECT_COND_LT_EXPR<COND_LHS, COND_RHS, IF_CLAUSE, ELSE_CLAUSE>. Such a change logically
>> > introduces new GIMPLE_QUATERNARY_RHS gassignments. For now, the VEC_COND_EXPR remains
>> > and is only valid in GENERIC and gimplifier will take care of the corresponding transition.
>> >
>> > The patch is a prototype and missing bits are:
>> > - folding support addition for GIMPLE_QUATERNARY_RHS is missing
>> > - fancy tcc_comparison expressions like LTGT_EXPR, UNORDERED_EXPR, ORDERED_EXPR,
>> >   UNLT_EXPR and others are not supported right now
>> > - comments are missing for various functions added
>> >
>> > Apart from that I was able to bootstrap and run tests with a quite small fallout.
>> > Thoughts?
>> > Martin
>>
>> I think this is going in the wrong direction.  There are some targets
>> that can only handle VEC_COND_EXPRs well if we know the associated
>> condition, and others where a compare-and-VEC_COND_EXPR will always be
>> two operations.  In that situation, it seems like the native gimple
>> representation should be the simpler representation rather than the
>> more complex one.  That way the comparisons can be optimised
>> independently of any VEC_COND_EXPRs on targets that benefit from that.
>>
>> So IMO it would be better to use three-operand VEC_COND_EXPRs with
>> no embedded conditions as the preferred gimple representation and
>> have internal functions for the fused operations that some targets
>> prefer.  This means that using fused operations is "just" an instruction
>> selection decision rather than hard-coded throughout gimple.  (And that
>> fits in well with the idea of doing more instruction selection in gimple.)
>
> So I've been doing that before, but more generally also for COND_EXPR.
> We cannot rely on TER and the existing RTL expansion "magic" for the
> instruction selection issue you mention because TER isn't reliable.  With
> IFNs for optabs we could do actual [vector] condition instruction selection
> before RTL expansion, ignoring "single-use" issues - is that what you are
> hinting at?

Yeah.  It'd be similar to how most FMA selection happens after
vectorisation but before expand.

> How should the vectorizer deal with this?  Should it directly
> use the optab IFNs then when facing "split" COND_EXPRs?  IIRC the
> most fallout of a simple patch (adjusting is_gimple_condexpr) is in the
> vectorizer.

I guess that would be down to how well the vector costings work if we
just stick to VEC_COND_EXPR and cost the comparison separately.  Using
optabs directly in the vectoriser definitely sounds OK if that ends up
being necessary for good code.  But if (like you say) the COND_EXPR is
also split apart, we'd be costing the scalar comparison and selection
separately as well.

> Note I'm specifically looking for a solution that applies to both COND_EXPR
> and VEC_COND_EXPR since both suffer from the same issues.

Yeah, think the same approach would work for COND_EXPR if it's needed.
(And I think the same trade-off applies there too.  Some targets will
always need a separate comparison to implement a four-operand COND_EXPR.)

> There was also recent work in putting back possibly trapping comparisons
> into [VEC_]COND_EXPR because it doesn't interfere with EH and allows
> better code.

OK, that's a good counter-reason :-)  But it seems quite special-purpose.
I assume this works even for targets that do split the VEC_COND_EXPR
because the result is undefined on entry to the EH receiver if the
operation didn't complete.  But that should be true of any non-trapping
work done after the comparison, with the same proviso.

So this still seems like an instruction-selection issue.  We're just
saying that it's OK to combine a trapping comparison and a VEC_COND_EXPR
from the non-trapping path.  The same would be true for any other
instruction selection that combines trapping and non-trapping
operations, provided that the speculated parts can never trap.

> Also you SVE people had VN issues with cond-exprs and
> VN runs into the exact same issue (but would handle separate comparisons
> better - with the caveat of breaking TER).

The VN thing turned out to be a red herring there, sorry.  I think
I was remembering the state before ifcvt did its own value numbering.
The remaining issue for the vectoriser is that we don't avoid duplicate
cast conversions in vect_recog_mask_conversion_pattern, but that's
mostly a cost thing.  The redundancies do get removed by later passes.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH][RFC] Come up with VEC_COND_OP_EXPRs.
  2019-09-24 11:57     ` Richard Sandiford
@ 2019-09-24 12:18       ` Richard Biener
  2019-09-24 14:51         ` Richard Sandiford
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Biener @ 2019-09-24 12:18 UTC (permalink / raw)
  To: Richard Sandiford; +Cc: Martin Liška, GCC Patches

On Tue, Sep 24, 2019 at 1:57 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Richard Biener <richard.guenther@gmail.com> writes:
> > On Tue, Sep 24, 2019 at 1:11 PM Richard Sandiford
> > <richard.sandiford@arm.com> wrote:
> >>
> >> Martin Liška <mliska@suse.cz> writes:
> >> > Hi.
> >> >
> >> > The patch introduces couple of new TREE_CODEs that will help us to have
> >> > a proper GIMPLE representation of current VECT_COND_EXPR. Right now,
> >> > the first argument is typically a GENERIC tcc_expression tree with 2 operands
> >> > that are visited at various places in GIMPLE code. That said, based on the discussion
> >> > with Richi, I'm suggesting to come up with e.g.
> >> > VECT_COND_LT_EXPR<COND_LHS, COND_RHS, IF_CLAUSE, ELSE_CLAUSE>. Such a change logically
> >> > introduces new GIMPLE_QUATERNARY_RHS gassignments. For now, the VEC_COND_EXPR remains
> >> > and is only valid in GENERIC and gimplifier will take care of the corresponding transition.
> >> >
> >> > The patch is a prototype and missing bits are:
> >> > - folding support addition for GIMPLE_QUATERNARY_RHS is missing
> >> > - fancy tcc_comparison expressions like LTGT_EXPR, UNORDERED_EXPR, ORDERED_EXPR,
> >> >   UNLT_EXPR and others are not supported right now
> >> > - comments are missing for various functions added
> >> >
> >> > Apart from that I was able to bootstrap and run tests with a quite small fallout.
> >> > Thoughts?
> >> > Martin
> >>
> >> I think this is going in the wrong direction.  There are some targets
> >> that can only handle VEC_COND_EXPRs well if we know the associated
> >> condition, and others where a compare-and-VEC_COND_EXPR will always be
> >> two operations.  In that situation, it seems like the native gimple
> >> representation should be the simpler representation rather than the
> >> more complex one.  That way the comparisons can be optimised
> >> independently of any VEC_COND_EXPRs on targets that benefit from that.
> >>
> >> So IMO it would be better to use three-operand VEC_COND_EXPRs with
> >> no embedded conditions as the preferred gimple representation and
> >> have internal functions for the fused operations that some targets
> >> prefer.  This means that using fused operations is "just" an instruction
> >> selection decision rather than hard-coded throughout gimple.  (And that
> >> fits in well with the idea of doing more instruction selection in gimple.)
> >
> > So I've been doing that before, but more generally also for COND_EXPR.
> > We cannot rely on TER and the existing RTL expansion "magic" for the
> > instruction selection issue you mention because TER isn't reliable.  With
> > IFNs for optabs we could do actual [vector] condition instruction selection
> > before RTL expansion, ignoring "single-use" issues - is that what you are
> > hinting at?
>
> Yeah.  It'd be similar to how most FMA selection happens after
> vectorisation but before expand.
>
> > How should the vectorizer deal with this?  Should it directly
> > use the optab IFNs then when facing "split" COND_EXPRs?  IIRC the
> > most fallout of a simple patch (adjusting is_gimple_condexpr) is in the
> > vectorizer.
>
> I guess that would be down to how well the vector costings work if we
> just stick to VEC_COND_EXPR and cost the comparison separately.  Using
> optabs directly in the vectoriser definitely sounds OK if that ends up
> being necessary for good code.  But if (like you say) the COND_EXPR is
> also split apart, we'd be costing the scalar comparison and selection
> separately as well.
>
> > Note I'm specifically looking for a solution that applies to both COND_EXPR
> > and VEC_COND_EXPR since both suffer from the same issues.
>
> Yeah, think the same approach would work for COND_EXPR if it's needed.
> (And I think the same trade-off applies there too.  Some targets will
> always need a separate comparison to implement a four-operand COND_EXPR.)
>
> > There was also recent work in putting back possibly trapping comparisons
> > into [VEC_]COND_EXPR because it doesn't interfere with EH and allows
> > better code.
>
> OK, that's a good counter-reason :-)  But it seems quite special-purpose.
> I assume this works even for targets that do split the VEC_COND_EXPR
> because the result is undefined on entry to the EH receiver if the
> operation didn't complete.  But that should be true of any non-trapping
> work done after the comparison, with the same proviso.
>
> So this still seems like an instruction-selection issue.  We're just
> saying that it's OK to combine a trapping comparison and a VEC_COND_EXPR
> from the non-trapping path.  The same would be true for any other
> instruction selection that combines trapping and non-trapping
> operations, provided that the speculated parts can never trap.

Sure, but that case would necessarily be combining the compare and the
select to the compare place which is "backwards" (and would speculate
the select).  Certainly something we don't do anywhere.  This case btw
made me consider going the four-operand way (I've pondered with all available
ops multiple times...).

> > Also you SVE people had VN issues with cond-exprs and
> > VN runs into the exact same issue (but would handle separate comparisons
> > better - with the caveat of breaking TER).
>
> The VN thing turned out to be a red herring there, sorry.  I think
> I was remembering the state before ifcvt did its own value numbering.
> The remaining issue for the vectoriser is that we don't avoid duplicate
> cast conversions in vect_recog_mask_conversion_pattern, but that's
> mostly a cost thing.  The redundancies do get removed by later passes.

Well, I checked and value-numbering doesn't really handle non-trivial
"equalities" of the condition operand (if one of the operands of the
condition need to be valueized to be detected equal).

So to go forward and to make sure we don't regress the appropriate
way would probably to tackle the expansion part first.  I guess we'll
not notice for scalar COND_EXPRs (because those don't happen
very often) so we could "lower" VEC_COND_EXPRs to the desired
form (and key IL verificataion on PROP_gimple_lvec), which then
means late FRE/DOM have the chance to break things by doing
CSE.  At the same time we'd remove the forwprop pieces that put
the condition back in.  Then we can see to implement the
instruction selection somehow somewhere... (does it need to happen
at -O0?  I think that might be desirable - looking at vectorizer
intrinsic code might help to decide).

Does that sound sensible?  I've searched my patch archieves and
could share several incomplete attempts on tackling this, dating
back to as far as 2010...)

Richard.

> Thanks,
> Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [PATCH][RFC] Come up with VEC_COND_OP_EXPRs.
  2019-09-24 12:18       ` Richard Biener
@ 2019-09-24 14:51         ` Richard Sandiford
  0 siblings, 0 replies; 65+ messages in thread
From: Richard Sandiford @ 2019-09-24 14:51 UTC (permalink / raw)
  To: Richard Biener; +Cc: Martin Liška, GCC Patches

Richard Biener <richard.guenther@gmail.com> writes:
> On Tue, Sep 24, 2019 at 1:57 PM Richard Sandiford
> <richard.sandiford@arm.com> wrote:
>>
>> Richard Biener <richard.guenther@gmail.com> writes:
>> > On Tue, Sep 24, 2019 at 1:11 PM Richard Sandiford
>> > <richard.sandiford@arm.com> wrote:
>> >>
>> >> Martin Liška <mliska@suse.cz> writes:
>> >> > Hi.
>> >> >
>> >> > The patch introduces couple of new TREE_CODEs that will help us to have
>> >> > a proper GIMPLE representation of current VECT_COND_EXPR. Right now,
>> >> > the first argument is typically a GENERIC tcc_expression tree with 2 operands
>> >> > that are visited at various places in GIMPLE code. That said, based on the discussion
>> >> > with Richi, I'm suggesting to come up with e.g.
>> >> > VECT_COND_LT_EXPR<COND_LHS, COND_RHS, IF_CLAUSE, ELSE_CLAUSE>. Such a change logically
>> >> > introduces new GIMPLE_QUATERNARY_RHS gassignments. For now, the VEC_COND_EXPR remains
>> >> > and is only valid in GENERIC and gimplifier will take care of the corresponding transition.
>> >> >
>> >> > The patch is a prototype and missing bits are:
>> >> > - folding support addition for GIMPLE_QUATERNARY_RHS is missing
>> >> > - fancy tcc_comparison expressions like LTGT_EXPR, UNORDERED_EXPR, ORDERED_EXPR,
>> >> >   UNLT_EXPR and others are not supported right now
>> >> > - comments are missing for various functions added
>> >> >
>> >> > Apart from that I was able to bootstrap and run tests with a quite small fallout.
>> >> > Thoughts?
>> >> > Martin
>> >>
>> >> I think this is going in the wrong direction.  There are some targets
>> >> that can only handle VEC_COND_EXPRs well if we know the associated
>> >> condition, and others where a compare-and-VEC_COND_EXPR will always be
>> >> two operations.  In that situation, it seems like the native gimple
>> >> representation should be the simpler representation rather than the
>> >> more complex one.  That way the comparisons can be optimised
>> >> independently of any VEC_COND_EXPRs on targets that benefit from that.
>> >>
>> >> So IMO it would be better to use three-operand VEC_COND_EXPRs with
>> >> no embedded conditions as the preferred gimple representation and
>> >> have internal functions for the fused operations that some targets
>> >> prefer.  This means that using fused operations is "just" an instruction
>> >> selection decision rather than hard-coded throughout gimple.  (And that
>> >> fits in well with the idea of doing more instruction selection in gimple.)
>> >
>> > So I've been doing that before, but more generally also for COND_EXPR.
>> > We cannot rely on TER and the existing RTL expansion "magic" for the
>> > instruction selection issue you mention because TER isn't reliable.  With
>> > IFNs for optabs we could do actual [vector] condition instruction selection
>> > before RTL expansion, ignoring "single-use" issues - is that what you are
>> > hinting at?
>>
>> Yeah.  It'd be similar to how most FMA selection happens after
>> vectorisation but before expand.
>>
>> > How should the vectorizer deal with this?  Should it directly
>> > use the optab IFNs then when facing "split" COND_EXPRs?  IIRC the
>> > most fallout of a simple patch (adjusting is_gimple_condexpr) is in the
>> > vectorizer.
>>
>> I guess that would be down to how well the vector costings work if we
>> just stick to VEC_COND_EXPR and cost the comparison separately.  Using
>> optabs directly in the vectoriser definitely sounds OK if that ends up
>> being necessary for good code.  But if (like you say) the COND_EXPR is
>> also split apart, we'd be costing the scalar comparison and selection
>> separately as well.
>>
>> > Note I'm specifically looking for a solution that applies to both COND_EXPR
>> > and VEC_COND_EXPR since both suffer from the same issues.
>>
>> Yeah, think the same approach would work for COND_EXPR if it's needed.
>> (And I think the same trade-off applies there too.  Some targets will
>> always need a separate comparison to implement a four-operand COND_EXPR.)
>>
>> > There was also recent work in putting back possibly trapping comparisons
>> > into [VEC_]COND_EXPR because it doesn't interfere with EH and allows
>> > better code.
>>
>> OK, that's a good counter-reason :-)  But it seems quite special-purpose.
>> I assume this works even for targets that do split the VEC_COND_EXPR
>> because the result is undefined on entry to the EH receiver if the
>> operation didn't complete.  But that should be true of any non-trapping
>> work done after the comparison, with the same proviso.
>>
>> So this still seems like an instruction-selection issue.  We're just
>> saying that it's OK to combine a trapping comparison and a VEC_COND_EXPR
>> from the non-trapping path.  The same would be true for any other
>> instruction selection that combines trapping and non-trapping
>> operations, provided that the speculated parts can never trap.
>
> Sure, but that case would necessarily be combining the compare and the
> select to the compare place which is "backwards" (and would speculate
> the select).  Certainly something we don't do anywhere.  This case btw
> made me consider going the four-operand way (I've pondered with all available
> ops multiple times...).

Yeah, but that was my point: speculating/moving back operations that
are dependent on the result of the comparison is valid for any non-trapping
operation, not just selects.  E.g. maybe some future target will want to
have a version of IFN_COND_ADD with an embedded condition, and the same
thing would then be useful for integer additions based on FP comparison
results.

So I don't think VEC_COND_EXPR is such a special case that we need
the four-operand form all the way through gimple.

>> > Also you SVE people had VN issues with cond-exprs and
>> > VN runs into the exact same issue (but would handle separate comparisons
>> > better - with the caveat of breaking TER).
>>
>> The VN thing turned out to be a red herring there, sorry.  I think
>> I was remembering the state before ifcvt did its own value numbering.
>> The remaining issue for the vectoriser is that we don't avoid duplicate
>> cast conversions in vect_recog_mask_conversion_pattern, but that's
>> mostly a cost thing.  The redundancies do get removed by later passes.
>
> Well, I checked and value-numbering doesn't really handle non-trivial
> "equalities" of the condition operand (if one of the operands of the
> condition need to be valueized to be detected equal).
>
> So to go forward and to make sure we don't regress the appropriate
> way would probably to tackle the expansion part first.  I guess we'll
> not notice for scalar COND_EXPRs (because those don't happen
> very often) so we could "lower" VEC_COND_EXPRs to the desired
> form (and key IL verificataion on PROP_gimple_lvec), which then
> means late FRE/DOM have the chance to break things by doing
> CSE.  At the same time we'd remove the forwprop pieces that put
> the condition back in.  Then we can see to implement the
> instruction selection somehow somewhere... (does it need to happen
> at -O0?  I think that might be desirable - looking at vectorizer
> intrinsic code might help to decide).

Not sure why we'd need it for correctness at -O0.  Can't VEC_COND_EXPR
always be emulated (albeit inefficiently)?  Even if you only have FP
compare-and-select, you can emulate VEC_COND_EXPRs on a 0/-1 mask.

If the code produced really is too poor even for -O0, then keeping
intrinsics as intrinsics during gimple would probably be better.

> Does that sound sensible?  I've searched my patch archieves and
> could share several incomplete attempts on tackling this, dating
> back to as far as 2010...)

Sounds good to me FWIW.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2019-09-24 10:25 [PATCH][RFC] Come up with VEC_COND_OP_EXPRs Martin Liška
  2019-09-24 11:11 ` Richard Sandiford
@ 2020-04-01 10:19 ` Martin Liška
  2020-04-06  9:17   ` Richard Sandiford
  1 sibling, 1 reply; 65+ messages in thread
From: Martin Liška @ 2020-04-01 10:19 UTC (permalink / raw)
  To: gcc-patches; +Cc: Richard Biener, Richard Sandiford

[-- Attachment #1: Type: text/plain, Size: 1693 bytes --]

Hello.

This is second attempt to get rid of tcc_comparison GENERIC trees
to be used as the first argument of VEC_COND_EXPR.

The patch attempts achieves that in the following steps:
1) veclower pass expands all tcc_comparison expression into a SSA_NAME
2) since that tcc_comparsion can't be used as the first argument of VEC_COND_EXPR
    (done in GIMPLE verifier)
3) I exposed new internal functions with:
DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)

4) logic of expand_vec_cond_expr is moved into the new pass_gimple_isel pass
5) the pass expands VEC_COND_EXPR into one of the internal functions defined in 3)
6) moreover, I've added a new logic that prefers expand_vec_cmp_expr_p when
    a SSA_NAME is being used in multiple (2+) VEC_COND_EXPR statements

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
Moreover, I run SPEC2006 and SPEC2017 benchmarks on znver1, znver2 and skylake
target and I don't see any reasonable change.

Achieved benefits of the patch:
- removal of a GENERIC expression being used in GIMPLE statements
- extraction into SSA_NAMEs can enable proper tree optimizer (FRE, DOM, PRE)
- possibility to expand smarter based on number of uses (expand_vec_cmp_expr_p)

Future plans:
- tcc_comparison removal just during gimplification
- removal of a code where these expressions are handled for VEC_COND_EXPR
- do the similar thing for COND_EXPR?

The task was guided by Richi (Biener) and I bet he can help with both further questions
and reasoning.

Thanks,
Martin


[-- Attachment #2: 0001-Lower-VEC_COND_EXPR-into-internal-functions.patch --]
[-- Type: text/x-patch, Size: 25732 bytes --]

From 4a6f4aa3cdef7a032a4ad442e6cd5ec2e706144d Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Mon, 9 Mar 2020 13:23:03 +0100
Subject: [PATCH] Lower VEC_COND_EXPR into internal functions.

gcc/ChangeLog:

2020-03-30  Martin Liska  <mliska@suse.cz>

	* expr.c (expand_expr_real_2): Put gcc_unreachable, we should reach
	this path.
	(do_store_flag): Likewise here.
	* internal-fn.c (vec_cond_mask_direct): New.
	(vec_cond_direct): Likewise.
	(vec_condu_direct): Likewise.
	(vec_condeq_direct): Likewise.
	(expand_vect_cond_optab_fn): Move from optabs.c.
	(expand_vec_cond_optab_fn): New alias.
	(expand_vec_condu_optab_fn): Likewise.
	(expand_vec_condeq_optab_fn): Likewise.
	(expand_vect_cond_mask_optab_fn): Moved from optabs.c.
	(expand_vec_cond_mask_optab_fn): New alias.
	(direct_vec_cond_mask_optab_supported_p): New.
	(direct_vec_cond_optab_supported_p): Likewise.
	(direct_vec_condu_optab_supported_p): Likewise.
	(direct_vec_condeq_optab_supported_p): Likewise.
	* internal-fn.def (VCOND): New new internal optab
	function.
	(VCONDU): Likewise.
	(VCONDEQ): Likewise.
	(VCOND_MASK): Likewise.
	* optabs.c (expand_vec_cond_mask_expr): Removed.
	(expand_vec_cond_expr): Likewise.
	* optabs.h (expand_vec_cond_expr): Likewise.
	(vector_compare_rtx): Likewise.
	* passes.def: Add pass_gimple_isel.
	* tree-cfg.c (verify_gimple_assign_ternary): Add new
	GIMPLE check.
	* tree-pass.h (make_pass_gimple_isel): New.
	* tree-ssa-forwprop.c (pass_forwprop::execute): Do not forward
	to already lowered VEC_COND_EXPR.
	* tree-vect-generic.c (expand_vector_divmod): Expand to SSA_NAME.
	(expand_vector_condition): Expand tcc_comparison of a VEC_COND_EXPR
	into a SSA_NAME.
	(gimple_expand_vec_cond_expr): New.
	(gimple_expand_vec_cond_exprs): New.
	(class pass_gimple_isel): New.
	(make_pass_gimple_isel): New.
---
 gcc/expr.c              |  25 +----
 gcc/internal-fn.c       |  89 ++++++++++++++++
 gcc/internal-fn.def     |   5 +
 gcc/optabs.c            | 124 +---------------------
 gcc/optabs.h            |   7 +-
 gcc/passes.def          |   1 +
 gcc/tree-cfg.c          |   8 ++
 gcc/tree-pass.h         |   1 +
 gcc/tree-ssa-forwprop.c |   6 ++
 gcc/tree-vect-generic.c | 226 ++++++++++++++++++++++++++++++++++++++--
 10 files changed, 338 insertions(+), 154 deletions(-)

diff --git a/gcc/expr.c b/gcc/expr.c
index b97c217e86d..d6cecd0f251 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9200,17 +9200,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
       if (temp != 0)
 	return temp;
 
-      /* For vector MIN <x, y>, expand it a VEC_COND_EXPR <x <= y, x, y>
-	 and similarly for MAX <x, y>.  */
       if (VECTOR_TYPE_P (type))
-	{
-	  tree t0 = make_tree (type, op0);
-	  tree t1 = make_tree (type, op1);
-	  tree comparison = build2 (code == MIN_EXPR ? LE_EXPR : GE_EXPR,
-				    type, t0, t1);
-	  return expand_vec_cond_expr (type, comparison, t0, t1,
-				       original_target);
-	}
+	gcc_unreachable ();
 
       /* At this point, a MEM target is no longer useful; we will get better
 	 code without it.  */
@@ -9799,10 +9790,6 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 	return temp;
       }
 
-    case VEC_COND_EXPR:
-      target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
-      return target;
-
     case VEC_DUPLICATE_EXPR:
       op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier);
       target = expand_vector_broadcast (mode, op0);
@@ -12133,8 +12120,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
   STRIP_NOPS (arg1);
 
   /* For vector typed comparisons emit code to generate the desired
-     all-ones or all-zeros mask.  Conveniently use the VEC_COND_EXPR
-     expander for this.  */
+     all-ones or all-zeros mask.  */
   if (TREE_CODE (ops->type) == VECTOR_TYPE)
     {
       tree ifexp = build2 (ops->code, ops->type, arg0, arg1);
@@ -12142,12 +12128,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
 	  && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type, ops->code))
 	return expand_vec_cmp_expr (ops->type, ifexp, target);
       else
-	{
-	  tree if_true = constant_boolean_node (true, ops->type);
-	  tree if_false = constant_boolean_node (false, ops->type);
-	  return expand_vec_cond_expr (ops->type, ifexp, if_true,
-				       if_false, target);
-	}
+	gcc_unreachable ();
     }
 
   /* Optimize (x % C1) == C2 or (x % C1) != C2 if it is beneficial
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 52d1638917a..827bd5aa0d2 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-ssa.h"
 #include "tree-phinodes.h"
 #include "ssa-iterators.h"
+#include "explow.h"
 
 /* The names of each internal function, indexed by function number.  */
 const char *const internal_fn_name_array[] = {
@@ -107,6 +108,10 @@ init_internal_fns ()
 #define mask_store_direct { 3, 2, false }
 #define store_lanes_direct { 0, 0, false }
 #define mask_store_lanes_direct { 0, 0, false }
+#define vec_cond_mask_direct { 0, 0, false }
+#define vec_cond_direct { 0, 0, false }
+#define vec_condu_direct { 0, 0, false }
+#define vec_condeq_direct { 0, 0, false }
 #define scatter_store_direct { 3, 1, false }
 #define unary_direct { 0, 0, true }
 #define binary_direct { 0, 0, true }
@@ -2544,6 +2549,86 @@ expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 
 #define expand_mask_store_lanes_optab_fn expand_mask_store_optab_fn
 
+/* Expand VCOND, VCONDU and VCONDEQ optab internal functions.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+  class expand_operand ops[6];
+  insn_code icode;
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0a = gimple_call_arg (stmt, 0);
+  tree op0b = gimple_call_arg (stmt, 1);
+  tree op1 = gimple_call_arg (stmt, 2);
+  tree op2 = gimple_call_arg (stmt, 3);
+  enum tree_code tcode = (tree_code) int_cst_value (gimple_call_arg (stmt, 4));
+
+  tree vec_cond_type = TREE_TYPE (lhs);
+  tree op_mode = TREE_TYPE (op0a);
+  bool unsignedp = TYPE_UNSIGNED (op_mode);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode cmp_op_mode = TYPE_MODE (op_mode);
+
+  icode = convert_optab_handler (optab, mode, cmp_op_mode);
+  rtx comparison
+    = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp, icode, 4);
+  rtx rtx_op1 = expand_normal (op1);
+  rtx rtx_op2 = expand_normal (op2);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_fixed_operand (&ops[3], comparison);
+  create_fixed_operand (&ops[4], XEXP (comparison, 0));
+  create_fixed_operand (&ops[5], XEXP (comparison, 1));
+  expand_insn (icode, 6, ops);
+}
+
+#define expand_vec_cond_optab_fn expand_vect_cond_optab_fn
+#define expand_vec_condu_optab_fn expand_vect_cond_optab_fn
+#define expand_vec_condeq_optab_fn expand_vect_cond_optab_fn
+
+/* Expand VCOND_MASK optab internal function.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+  class expand_operand ops[4];
+
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0 = gimple_call_arg (stmt, 0);
+  tree op1 = gimple_call_arg (stmt, 1);
+  tree op2 = gimple_call_arg (stmt, 2);
+  tree vec_cond_type = TREE_TYPE (lhs);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
+  enum insn_code icode = convert_optab_handler (optab, mode, mask_mode);
+  rtx mask, rtx_op1, rtx_op2;
+
+  gcc_assert (icode != CODE_FOR_nothing);
+
+  mask = expand_normal (op0);
+  rtx_op1 = expand_normal (op1);
+  rtx_op2 = expand_normal (op2);
+
+  mask = force_reg (mask_mode, mask);
+  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_input_operand (&ops[3], mask, mask_mode);
+  expand_insn (icode, 4, ops);
+}
+
+#define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn
+
 static void
 expand_ABNORMAL_DISPATCHER (internal_fn, gcall *)
 {
@@ -3125,6 +3210,10 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 #define direct_mask_store_optab_supported_p direct_optab_supported_p
 #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_cond_mask_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_cond_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_condu_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_condeq_optab_supported_p multi_vector_optab_supported_p
 #define direct_scatter_store_optab_supported_p convert_optab_supported_p
 #define direct_while_optab_supported_p convert_optab_supported_p
 #define direct_fold_extract_optab_supported_p direct_optab_supported_p
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 1d190d492ff..0c6fc371190 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -136,6 +136,11 @@ DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
 DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0,
 		       vec_mask_store_lanes, mask_store_lanes)
 
+DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
+DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
+DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
+DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
+
 DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
 DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW,
 		       check_raw_ptrs, check_ptrs)
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 8dd351286cd..c66c08e7d55 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -5439,7 +5439,7 @@ get_rtx_code (enum tree_code tcode, bool unsignedp)
    first comparison operand for insn ICODE.  Do not generate the
    compare instruction itself.  */
 
-static rtx
+rtx
 vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
 		    tree t_op0, tree t_op1, bool unsignedp,
 		    enum insn_code icode, unsigned int opno)
@@ -5804,128 +5804,6 @@ expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
   return tmp;
 }
 
-/* Generate insns for a VEC_COND_EXPR with mask, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_mask_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-			   rtx target)
-{
-  class expand_operand ops[4];
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
-  enum insn_code icode = get_vcond_mask_icode (mode, mask_mode);
-  rtx mask, rtx_op1, rtx_op2;
-
-  if (icode == CODE_FOR_nothing)
-    return 0;
-
-  mask = expand_normal (op0);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  mask = force_reg (mask_mode, mask);
-  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_input_operand (&ops[3], mask, mask_mode);
-  expand_insn (icode, 4, ops);
-
-  return ops[0].value;
-}
-
-/* Generate insns for a VEC_COND_EXPR, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-		      rtx target)
-{
-  class expand_operand ops[6];
-  enum insn_code icode;
-  rtx comparison, rtx_op1, rtx_op2;
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode cmp_op_mode;
-  bool unsignedp;
-  tree op0a, op0b;
-  enum tree_code tcode;
-
-  if (COMPARISON_CLASS_P (op0))
-    {
-      op0a = TREE_OPERAND (op0, 0);
-      op0b = TREE_OPERAND (op0, 1);
-      tcode = TREE_CODE (op0);
-    }
-  else
-    {
-      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
-      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
-	  != CODE_FOR_nothing)
-	return expand_vec_cond_mask_expr (vec_cond_type, op0, op1,
-					  op2, target);
-      /* Fake op0 < 0.  */
-      else
-	{
-	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
-		      == MODE_VECTOR_INT);
-	  op0a = op0;
-	  op0b = build_zero_cst (TREE_TYPE (op0));
-	  tcode = LT_EXPR;
-	}
-    }
-  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
-  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
-
-
-  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
-	      && known_eq (GET_MODE_NUNITS (mode),
-			   GET_MODE_NUNITS (cmp_op_mode)));
-
-  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
-  if (icode == CODE_FOR_nothing)
-    {
-      if (tcode == LT_EXPR
-	  && op0a == op0
-	  && TREE_CODE (op0) == VECTOR_CST)
-	{
-	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
-	     into a constant when only get_vcond_eq_icode is supported.
-	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
-	  unsigned HOST_WIDE_INT nelts;
-	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
-	    {
-	      if (VECTOR_CST_STEPPED_P (op0))
-		return 0;
-	      nelts = vector_cst_encoded_nelts (op0);
-	    }
-	  for (unsigned int i = 0; i < nelts; ++i)
-	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
-	      return 0;
-	  tcode = NE_EXPR;
-	}
-      if (tcode == EQ_EXPR || tcode == NE_EXPR)
-	icode = get_vcond_eq_icode (mode, cmp_op_mode);
-      if (icode == CODE_FOR_nothing)
-	return 0;
-    }
-
-  comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp,
-				   icode, 4);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_fixed_operand (&ops[3], comparison);
-  create_fixed_operand (&ops[4], XEXP (comparison, 0));
-  create_fixed_operand (&ops[5], XEXP (comparison, 1));
-  expand_insn (icode, 6, ops);
-  return ops[0].value;
-}
-
 /* Generate VEC_SERIES_EXPR <OP0, OP1>, returning a value of mode VMODE.
    Use TARGET for the result if nonnull and convenient.  */
 
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 5bd19503a0a..7c2ec257cb0 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -321,9 +321,6 @@ extern rtx expand_vec_perm_const (machine_mode, rtx, rtx,
 /* Generate code for vector comparison.  */
 extern rtx expand_vec_cmp_expr (tree, tree, rtx);
 
-/* Generate code for VEC_COND_EXPR.  */
-extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
-
 /* Generate code for VEC_SERIES_EXPR.  */
 extern rtx expand_vec_series_expr (machine_mode, rtx, rtx, rtx);
 
@@ -364,5 +361,9 @@ extern void expand_jump_insn (enum insn_code icode, unsigned int nops,
 			      class expand_operand *ops);
 
 extern enum rtx_code get_rtx_code (enum tree_code tcode, bool unsignedp);
+extern rtx vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
+			       tree t_op0, tree t_op1, bool unsignedp,
+			       enum insn_code icode, unsigned int opno);
+
 
 #endif /* GCC_OPTABS_H */
diff --git a/gcc/passes.def b/gcc/passes.def
index 2bf2cb78fc5..d654e5ee9fe 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -397,6 +397,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_cleanup_eh);
   NEXT_PASS (pass_lower_resx);
   NEXT_PASS (pass_nrv);
+  NEXT_PASS (pass_gimple_isel);
   NEXT_PASS (pass_cleanup_cfg_post_optimizing);
   NEXT_PASS (pass_warn_function_noreturn);
   NEXT_PASS (pass_gen_hsail);
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index f7b817d94e6..7154f436bb8 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4197,6 +4197,14 @@ verify_gimple_assign_ternary (gassign *stmt)
 	  debug_generic_expr (rhs1_type);
 	  return true;
 	}
+      else if (cfun->curr_properties & PROP_gimple_lvec
+	       && TREE_CODE_CLASS (TREE_CODE (rhs1)) == tcc_comparison)
+	{
+	  error ("the first argument of %<VEC_COND_EXPR%> cannot be "
+		 "a %<GENERIC%> tree comparison expression");
+	  debug_generic_expr (rhs1);
+	  return true;
+	}
       /* Fallthrough.  */
     case COND_EXPR:
       if (!is_gimple_val (rhs1)
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index a1207a20a3c..490bc9702be 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -625,6 +625,7 @@ extern gimple_opt_pass *make_pass_local_fn_summary (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lower_vaarg (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_gimple_isel (gcc::context *ctxt);
 
 /* Current optimization pass.  */
 extern opt_pass *current_pass;
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 234c1f7dd7d..ce8537a58a7 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -3057,6 +3057,12 @@ pass_forwprop::execute (function *fun)
 		    if (code == COND_EXPR
 			|| code == VEC_COND_EXPR)
 		      {
+			/* Do not propagate into VEC_COND_EXPRs after they are
+			   vector lowering pass.  */
+			if (code == VEC_COND_EXPR
+			    && (fun->curr_properties & PROP_gimple_lvec))
+			  break;
+
 			/* In this case the entire COND_EXPR is in rhs1. */
 			if (forward_propagate_into_cond (&gsi))
 			  {
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index 2f6fd5e980c..587faf7eb6e 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -691,12 +691,14 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 	  if (addend == NULL_TREE
 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
 	    {
-	      tree zero, cst, cond, mask_type;
-	      gimple *stmt;
+	      tree zero, cst, mask_type, mask;
+	      gimple *stmt, *cond;
 
 	      mask_type = truth_type_for (type);
 	      zero = build_zero_cst (type);
-	      cond = build2 (LT_EXPR, mask_type, op0, zero);
+	      mask = make_ssa_name (mask_type);
+	      cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
+	      gsi_insert_before (gsi, cond, GSI_SAME_STMT);
 	      tree_vector_builder vec (type, nunits, 1);
 	      for (i = 0; i < nunits; i++)
 		vec.quick_push (build_int_cst (TREE_TYPE (type),
@@ -704,8 +706,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 						<< shifts[i]) - 1));
 	      cst = vec.build ();
 	      addend = make_ssa_name (type);
-	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
-					  cst, zero);
+	      stmt
+		= gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
 	    }
 	}
@@ -944,7 +946,17 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
     }
 
   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
-    return;
+    {
+      if (a_is_comparison)
+	{
+	  a = gimplify_build2 (gsi, TREE_CODE (a), TREE_TYPE (a), a1, a2);
+	  gimple_assign_set_rhs1 (stmt, a);
+	  update_stmt (stmt);
+	  return;
+	}
+      gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
+      return;
+    }
 
   /* Handle vector boolean types with bitmasks.  If there is a comparison
      and we can expand the comparison into the vector boolean bitmask,
@@ -2224,6 +2236,165 @@ expand_vector_operations (void)
   return cfg_changed ? TODO_cleanup_cfg : 0;
 }
 
+/* Expand all VEC_COND_EXPR gimple assignments into calls to internal
+   function based on type of selected expansion.  */
+
+static gimple *
+gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi)
+{
+  tree lhs, op0a = NULL_TREE, op0b = NULL_TREE;
+  enum tree_code code;
+  enum tree_code tcode;
+  machine_mode cmp_op_mode;
+  bool unsignedp;
+  enum insn_code icode;
+  imm_use_iterator imm_iter;
+
+  /* Only consider code == GIMPLE_ASSIGN.  */
+  gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi));
+  if (!stmt)
+    return NULL;
+
+  code = gimple_assign_rhs_code (stmt);
+  if (code != VEC_COND_EXPR)
+    return NULL;
+
+  tree op0 = gimple_assign_rhs1 (stmt);
+  tree op1 = gimple_assign_rhs2 (stmt);
+  tree op2 = gimple_assign_rhs3 (stmt);
+  lhs = gimple_assign_lhs (stmt);
+  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+
+  gcc_assert (!COMPARISON_CLASS_P (op0));
+  if (TREE_CODE (op0) == SSA_NAME)
+    {
+      unsigned int used_vec_cond_exprs = 0;
+      gimple *use_stmt;
+      FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, op0)
+	{
+	  gassign *assign = dyn_cast<gassign *> (use_stmt);
+	  if (assign != NULL && gimple_assign_rhs_code (assign) == VEC_COND_EXPR
+	      && gimple_assign_rhs1 (assign) == op0)
+	    used_vec_cond_exprs++;
+	}
+
+      gassign *def_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (op0));
+      if (def_stmt)
+	{
+	  tcode = gimple_assign_rhs_code (def_stmt);
+	  op0a = gimple_assign_rhs1 (def_stmt);
+	  op0b = gimple_assign_rhs2 (def_stmt);
+
+	  tree op0a_type = TREE_TYPE (op0a);
+	  if (used_vec_cond_exprs >= 2
+	      && (get_vcond_mask_icode (mode, TYPE_MODE (op0a_type))
+		  != CODE_FOR_nothing)
+	      && expand_vec_cmp_expr_p (op0a_type, TREE_TYPE (lhs), tcode))
+	    {
+	      /* Keep the SSA name and use vcond_mask.  */
+	      tcode = TREE_CODE (op0);
+	    }
+	}
+      else
+	tcode = TREE_CODE (op0);
+    }
+  else
+    tcode = TREE_CODE (op0);
+
+  if (TREE_CODE_CLASS (tcode) != tcc_comparison)
+    {
+      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
+      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
+	  != CODE_FOR_nothing)
+	return gimple_build_call_internal (IFN_VCOND_MASK, 3, op0, op1, op2);
+      /* Fake op0 < 0.  */
+      else
+	{
+	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
+		      == MODE_VECTOR_INT);
+	  op0a = op0;
+	  op0b = build_zero_cst (TREE_TYPE (op0));
+	  tcode = LT_EXPR;
+	}
+    }
+  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
+  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
+
+
+  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
+	      && known_eq (GET_MODE_NUNITS (mode),
+			   GET_MODE_NUNITS (cmp_op_mode)));
+
+  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
+  if (icode == CODE_FOR_nothing)
+    {
+      if (tcode == LT_EXPR
+	  && op0a == op0
+	  && TREE_CODE (op0) == VECTOR_CST)
+	{
+	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
+	     into a constant when only get_vcond_eq_icode is supported.
+	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
+	  unsigned HOST_WIDE_INT nelts;
+	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
+	    {
+	      if (VECTOR_CST_STEPPED_P (op0))
+		gcc_unreachable ();
+	      nelts = vector_cst_encoded_nelts (op0);
+	    }
+	  for (unsigned int i = 0; i < nelts; ++i)
+	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
+	      gcc_unreachable ();
+	  tcode = NE_EXPR;
+	}
+      if (tcode == EQ_EXPR || tcode == NE_EXPR)
+	{
+	  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+	  return gimple_build_call_internal (IFN_VCONDEQ, 5, op0a, op0b, op1,
+					     op2, tcode_tree);
+	}
+    }
+
+  gcc_assert (icode != CODE_FOR_nothing);
+  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+  return gimple_build_call_internal (unsignedp ? IFN_VCONDU : IFN_VCOND,
+				     5, op0a, op0b, op1, op2, tcode_tree);
+}
+
+/* Iterate all gimple statements and try to expand
+   VEC_COND_EXPR assignments.  */
+
+static unsigned int
+gimple_expand_vec_cond_exprs (void)
+{
+  gimple_stmt_iterator gsi;
+  basic_block bb;
+  bool cfg_changed = false;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	{
+	  gimple *g = gimple_expand_vec_cond_expr (&gsi);
+	  if (g != NULL)
+	    {
+	      tree lhs = gimple_assign_lhs (gsi_stmt (gsi));
+	      gimple_set_lhs (g, lhs);
+	      gsi_replace (&gsi, g, false);
+	    }
+	  /* ???  If we do not cleanup EH then we will ICE in
+	     verification.  But in reality we have created wrong-code
+	     as we did not properly transition EH info and edges to
+	     the piecewise computations.  */
+	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
+	      && gimple_purge_dead_eh_edges (bb))
+	    cfg_changed = true;
+	}
+    }
+
+  return cfg_changed ? TODO_cleanup_cfg : 0;
+}
+
 namespace {
 
 const pass_data pass_data_lower_vector =
@@ -2307,4 +2478,47 @@ make_pass_lower_vector_ssa (gcc::context *ctxt)
   return new pass_lower_vector_ssa (ctxt);
 }
 
+namespace {
+
+const pass_data pass_data_gimple_isel =
+{
+  GIMPLE_PASS, /* type */
+  "isel", /* name */
+  OPTGROUP_VEC, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_cfg, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_gimple_isel : public gimple_opt_pass
+{
+public:
+  pass_gimple_isel (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_gimple_isel, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return true;
+    }
+
+  virtual unsigned int execute (function *)
+    {
+      return gimple_expand_vec_cond_exprs ();
+    }
+
+}; // class pass_gimple_isel
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_gimple_isel (gcc::context *ctxt)
+{
+  return new pass_gimple_isel (ctxt);
+}
+
 #include "gt-tree-vect-generic.h"
-- 
2.26.0


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-04-01 10:19 ` [stage1][PATCH] Lower VEC_COND_EXPR into internal functions Martin Liška
@ 2020-04-06  9:17   ` Richard Sandiford
  2020-04-06 12:30     ` Richard Biener
  2020-04-06 12:33     ` Richard Biener
  0 siblings, 2 replies; 65+ messages in thread
From: Richard Sandiford @ 2020-04-06  9:17 UTC (permalink / raw)
  To: Martin Liška; +Cc: gcc-patches

Martin Liška <mliska@suse.cz> writes:
> Hello.
>
> This is second attempt to get rid of tcc_comparison GENERIC trees
> to be used as the first argument of VEC_COND_EXPR.
>
> The patch attempts achieves that in the following steps:
> 1) veclower pass expands all tcc_comparison expression into a SSA_NAME
> 2) since that tcc_comparsion can't be used as the first argument of VEC_COND_EXPR
>     (done in GIMPLE verifier)
> 3) I exposed new internal functions with:
> DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
> DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
> DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
> DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
>
> 4) logic of expand_vec_cond_expr is moved into the new pass_gimple_isel pass
> 5) the pass expands VEC_COND_EXPR into one of the internal functions defined in 3)
> 6) moreover, I've added a new logic that prefers expand_vec_cmp_expr_p when
>     a SSA_NAME is being used in multiple (2+) VEC_COND_EXPR statements
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> Moreover, I run SPEC2006 and SPEC2017 benchmarks on znver1, znver2 and skylake
> target and I don't see any reasonable change.
>
> Achieved benefits of the patch:
> - removal of a GENERIC expression being used in GIMPLE statements
> - extraction into SSA_NAMEs can enable proper tree optimizer (FRE, DOM, PRE)
> - possibility to expand smarter based on number of uses (expand_vec_cmp_expr_p)
>
> Future plans:
> - tcc_comparison removal just during gimplification
> - removal of a code where these expressions are handled for VEC_COND_EXPR
> - do the similar thing for COND_EXPR?
>
> The task was guided by Richi (Biener) and I bet he can help with both further questions
> and reasoning.

Thanks for doing this.  It definitely seems more friendly than the
four-operand version to targets where separate comparisons are the norm.

Just a couple of comments about the implementation:

> diff --git a/gcc/passes.def b/gcc/passes.def
> index 2bf2cb78fc5..d654e5ee9fe 100644
> --- a/gcc/passes.def
> +++ b/gcc/passes.def
> @@ -397,6 +397,7 @@ along with GCC; see the file COPYING3.  If not see
>    NEXT_PASS (pass_cleanup_eh);
>    NEXT_PASS (pass_lower_resx);
>    NEXT_PASS (pass_nrv);
> +  NEXT_PASS (pass_gimple_isel);
>    NEXT_PASS (pass_cleanup_cfg_post_optimizing);
>    NEXT_PASS (pass_warn_function_noreturn);
>    NEXT_PASS (pass_gen_hsail);

What was the reason for making this a separate pass, rather than doing
it as part of veclower?  If we do them separately, then it's harder for
veclower to know which VEC_COND_EXPRs it needs to open-code.  (OK, so
that's a general problem between veclower and expand already, but it
seems like the new approach could help to move away from that by
doing the instruction selection directly in veclower.)

> +/* Expand all VEC_COND_EXPR gimple assignments into calls to internal
> +   function based on type of selected expansion.  */
> +
> +static gimple *
> +gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi)
> +{
> +  tree lhs, op0a = NULL_TREE, op0b = NULL_TREE;
> +  enum tree_code code;
> +  enum tree_code tcode;
> +  machine_mode cmp_op_mode;
> +  bool unsignedp;
> +  enum insn_code icode;
> +  imm_use_iterator imm_iter;
> +
> +  /* Only consider code == GIMPLE_ASSIGN.  */
> +  gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi));
> +  if (!stmt)
> +    return NULL;
> +
> +  code = gimple_assign_rhs_code (stmt);
> +  if (code != VEC_COND_EXPR)
> +    return NULL;
> +
> +  tree op0 = gimple_assign_rhs1 (stmt);
> +  tree op1 = gimple_assign_rhs2 (stmt);
> +  tree op2 = gimple_assign_rhs3 (stmt);
> +  lhs = gimple_assign_lhs (stmt);
> +  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
> +
> +  gcc_assert (!COMPARISON_CLASS_P (op0));
> +  if (TREE_CODE (op0) == SSA_NAME)
> +    {
> +      unsigned int used_vec_cond_exprs = 0;
> +      gimple *use_stmt;
> +      FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, op0)
> +	{
> +	  gassign *assign = dyn_cast<gassign *> (use_stmt);
> +	  if (assign != NULL && gimple_assign_rhs_code (assign) == VEC_COND_EXPR
> +	      && gimple_assign_rhs1 (assign) == op0)
> +	    used_vec_cond_exprs++;
> +	}

This looks like it's quadratic in the worst case.  Could we check
this in a different way?

> +
> +      gassign *def_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (op0));
> +      if (def_stmt)
> +	{
> +	  tcode = gimple_assign_rhs_code (def_stmt);
> +	  op0a = gimple_assign_rhs1 (def_stmt);
> +	  op0b = gimple_assign_rhs2 (def_stmt);
> +
> +	  tree op0a_type = TREE_TYPE (op0a);
> +	  if (used_vec_cond_exprs >= 2

It would be good if targets were able to provide only vcond_mask.
In that case I guess we should go this path if the later one would fail.

> +	      && (get_vcond_mask_icode (mode, TYPE_MODE (op0a_type))
> +		  != CODE_FOR_nothing)
> +	      && expand_vec_cmp_expr_p (op0a_type, TREE_TYPE (lhs), tcode))
> +	    {
> +	      /* Keep the SSA name and use vcond_mask.  */
> +	      tcode = TREE_CODE (op0);
> +	    }
> +	}
> +      else
> +	tcode = TREE_CODE (op0);
> +    }
> +  else
> +    tcode = TREE_CODE (op0);

Might be easier to follow if tcode is TREE_CODE (op0) by default and
only gets changed when we want to fold in the comparison.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-04-06  9:17   ` Richard Sandiford
@ 2020-04-06 12:30     ` Richard Biener
  2020-05-21 12:51       ` Martin Liška
  2020-04-06 12:33     ` Richard Biener
  1 sibling, 1 reply; 65+ messages in thread
From: Richard Biener @ 2020-04-06 12:30 UTC (permalink / raw)
  To: Martin Liška, GCC Patches, Richard Sandiford

On Mon, Apr 6, 2020 at 11:18 AM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Martin Liška <mliska@suse.cz> writes:
> > Hello.
> >
> > This is second attempt to get rid of tcc_comparison GENERIC trees
> > to be used as the first argument of VEC_COND_EXPR.
> >
> > The patch attempts achieves that in the following steps:
> > 1) veclower pass expands all tcc_comparison expression into a SSA_NAME
> > 2) since that tcc_comparsion can't be used as the first argument of VEC_COND_EXPR
> >     (done in GIMPLE verifier)
> > 3) I exposed new internal functions with:
> > DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
> > DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
> > DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
> > DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
> >
> > 4) logic of expand_vec_cond_expr is moved into the new pass_gimple_isel pass
> > 5) the pass expands VEC_COND_EXPR into one of the internal functions defined in 3)
> > 6) moreover, I've added a new logic that prefers expand_vec_cmp_expr_p when
> >     a SSA_NAME is being used in multiple (2+) VEC_COND_EXPR statements
> >
> > Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> > Moreover, I run SPEC2006 and SPEC2017 benchmarks on znver1, znver2 and skylake
> > target and I don't see any reasonable change.
> >
> > Achieved benefits of the patch:
> > - removal of a GENERIC expression being used in GIMPLE statements
> > - extraction into SSA_NAMEs can enable proper tree optimizer (FRE, DOM, PRE)
> > - possibility to expand smarter based on number of uses (expand_vec_cmp_expr_p)
> >
> > Future plans:
> > - tcc_comparison removal just during gimplification
> > - removal of a code where these expressions are handled for VEC_COND_EXPR
> > - do the similar thing for COND_EXPR?
> >
> > The task was guided by Richi (Biener) and I bet he can help with both further questions
> > and reasoning.
>
> Thanks for doing this.  It definitely seems more friendly than the
> four-operand version to targets where separate comparisons are the norm.
>
> Just a couple of comments about the implementation:
>
> > diff --git a/gcc/passes.def b/gcc/passes.def
> > index 2bf2cb78fc5..d654e5ee9fe 100644
> > --- a/gcc/passes.def
> > +++ b/gcc/passes.def
> > @@ -397,6 +397,7 @@ along with GCC; see the file COPYING3.  If not see
> >    NEXT_PASS (pass_cleanup_eh);
> >    NEXT_PASS (pass_lower_resx);
> >    NEXT_PASS (pass_nrv);
> > +  NEXT_PASS (pass_gimple_isel);
> >    NEXT_PASS (pass_cleanup_cfg_post_optimizing);
> >    NEXT_PASS (pass_warn_function_noreturn);
> >    NEXT_PASS (pass_gen_hsail);
>
> What was the reason for making this a separate pass, rather than doing
> it as part of veclower?  If we do them separately, then it's harder for
> veclower to know which VEC_COND_EXPRs it needs to open-code.  (OK, so
> that's a general problem between veclower and expand already, but it
> seems like the new approach could help to move away from that by
> doing the instruction selection directly in veclower.)

As the name of the pass suggests it was supposed to be the starting point
of doing all the "complex" (multi-GIMPLE-stmt matching) RTL expansion tricks.

But most importantly veclower is too early to catch CSE opportunities from
loop opts on the conditions and if veclower lowers things then we also want
CSE to cleanup its mess.  I guess we also do not want veclower to be done
before vectorization since it should be easier to re-vectorize from unsupported
vector code than from what veclower makes out of it ... catch-22.

So I consider pass placement a secondary issue for now.

> > +/* Expand all VEC_COND_EXPR gimple assignments into calls to internal
> > +   function based on type of selected expansion.  */
> > +
> > +static gimple *
> > +gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi)
> > +{
> > +  tree lhs, op0a = NULL_TREE, op0b = NULL_TREE;
> > +  enum tree_code code;
> > +  enum tree_code tcode;
> > +  machine_mode cmp_op_mode;
> > +  bool unsignedp;
> > +  enum insn_code icode;
> > +  imm_use_iterator imm_iter;
> > +
> > +  /* Only consider code == GIMPLE_ASSIGN.  */
> > +  gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi));
> > +  if (!stmt)
> > +    return NULL;
> > +
> > +  code = gimple_assign_rhs_code (stmt);
> > +  if (code != VEC_COND_EXPR)
> > +    return NULL;
> > +
> > +  tree op0 = gimple_assign_rhs1 (stmt);
> > +  tree op1 = gimple_assign_rhs2 (stmt);
> > +  tree op2 = gimple_assign_rhs3 (stmt);
> > +  lhs = gimple_assign_lhs (stmt);
> > +  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
> > +
> > +  gcc_assert (!COMPARISON_CLASS_P (op0));
> > +  if (TREE_CODE (op0) == SSA_NAME)
> > +    {
> > +      unsigned int used_vec_cond_exprs = 0;
> > +      gimple *use_stmt;
> > +      FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, op0)
> > +     {
> > +       gassign *assign = dyn_cast<gassign *> (use_stmt);
> > +       if (assign != NULL && gimple_assign_rhs_code (assign) == VEC_COND_EXPR
> > +           && gimple_assign_rhs1 (assign) == op0)
> > +         used_vec_cond_exprs++;
> > +     }
>
> This looks like it's quadratic in the worst case.  Could we check
> this in a different way?
>
> > +
> > +      gassign *def_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (op0));
> > +      if (def_stmt)
> > +     {
> > +       tcode = gimple_assign_rhs_code (def_stmt);
> > +       op0a = gimple_assign_rhs1 (def_stmt);
> > +       op0b = gimple_assign_rhs2 (def_stmt);
> > +
> > +       tree op0a_type = TREE_TYPE (op0a);
> > +       if (used_vec_cond_exprs >= 2
>
> It would be good if targets were able to provide only vcond_mask.
> In that case I guess we should go this path if the later one would fail.
>
> > +           && (get_vcond_mask_icode (mode, TYPE_MODE (op0a_type))
> > +               != CODE_FOR_nothing)
> > +           && expand_vec_cmp_expr_p (op0a_type, TREE_TYPE (lhs), tcode))
> > +         {
> > +           /* Keep the SSA name and use vcond_mask.  */
> > +           tcode = TREE_CODE (op0);
> > +         }
> > +     }
> > +      else
> > +     tcode = TREE_CODE (op0);
> > +    }
> > +  else
> > +    tcode = TREE_CODE (op0);
>
> Might be easier to follow if tcode is TREE_CODE (op0) by default and
> only gets changed when we want to fold in the comparison.
>
> Thanks,
> Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-04-06  9:17   ` Richard Sandiford
  2020-04-06 12:30     ` Richard Biener
@ 2020-04-06 12:33     ` Richard Biener
  1 sibling, 0 replies; 65+ messages in thread
From: Richard Biener @ 2020-04-06 12:33 UTC (permalink / raw)
  To: Martin Liška, GCC Patches, Richard Sandiford

On Mon, Apr 6, 2020 at 11:18 AM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Martin Liška <mliska@suse.cz> writes:
> > Hello.
> >
> > This is second attempt to get rid of tcc_comparison GENERIC trees
> > to be used as the first argument of VEC_COND_EXPR.
> >
> > The patch attempts achieves that in the following steps:
> > 1) veclower pass expands all tcc_comparison expression into a SSA_NAME
> > 2) since that tcc_comparsion can't be used as the first argument of VEC_COND_EXPR
> >     (done in GIMPLE verifier)
> > 3) I exposed new internal functions with:
> > DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
> > DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
> > DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
> > DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
> >
> > 4) logic of expand_vec_cond_expr is moved into the new pass_gimple_isel pass
> > 5) the pass expands VEC_COND_EXPR into one of the internal functions defined in 3)
> > 6) moreover, I've added a new logic that prefers expand_vec_cmp_expr_p when
> >     a SSA_NAME is being used in multiple (2+) VEC_COND_EXPR statements
> >
> > Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> > Moreover, I run SPEC2006 and SPEC2017 benchmarks on znver1, znver2 and skylake
> > target and I don't see any reasonable change.
> >
> > Achieved benefits of the patch:
> > - removal of a GENERIC expression being used in GIMPLE statements
> > - extraction into SSA_NAMEs can enable proper tree optimizer (FRE, DOM, PRE)
> > - possibility to expand smarter based on number of uses (expand_vec_cmp_expr_p)
> >
> > Future plans:
> > - tcc_comparison removal just during gimplification
> > - removal of a code where these expressions are handled for VEC_COND_EXPR
> > - do the similar thing for COND_EXPR?
> >
> > The task was guided by Richi (Biener) and I bet he can help with both further questions
> > and reasoning.
>
> Thanks for doing this.  It definitely seems more friendly than the
> four-operand version to targets where separate comparisons are the norm.
>
> Just a couple of comments about the implementation:
>
> > diff --git a/gcc/passes.def b/gcc/passes.def
> > index 2bf2cb78fc5..d654e5ee9fe 100644
> > --- a/gcc/passes.def
> > +++ b/gcc/passes.def
> > @@ -397,6 +397,7 @@ along with GCC; see the file COPYING3.  If not see
> >    NEXT_PASS (pass_cleanup_eh);
> >    NEXT_PASS (pass_lower_resx);
> >    NEXT_PASS (pass_nrv);
> > +  NEXT_PASS (pass_gimple_isel);
> >    NEXT_PASS (pass_cleanup_cfg_post_optimizing);
> >    NEXT_PASS (pass_warn_function_noreturn);
> >    NEXT_PASS (pass_gen_hsail);
>
> What was the reason for making this a separate pass, rather than doing
> it as part of veclower?  If we do them separately, then it's harder for
> veclower to know which VEC_COND_EXPRs it needs to open-code.  (OK, so
> that's a general problem between veclower and expand already, but it
> seems like the new approach could help to move away from that by
> doing the instruction selection directly in veclower.)
>
> > +/* Expand all VEC_COND_EXPR gimple assignments into calls to internal
> > +   function based on type of selected expansion.  */
> > +
> > +static gimple *
> > +gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi)
> > +{
> > +  tree lhs, op0a = NULL_TREE, op0b = NULL_TREE;
> > +  enum tree_code code;
> > +  enum tree_code tcode;
> > +  machine_mode cmp_op_mode;
> > +  bool unsignedp;
> > +  enum insn_code icode;
> > +  imm_use_iterator imm_iter;
> > +
> > +  /* Only consider code == GIMPLE_ASSIGN.  */
> > +  gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi));
> > +  if (!stmt)
> > +    return NULL;
> > +
> > +  code = gimple_assign_rhs_code (stmt);
> > +  if (code != VEC_COND_EXPR)
> > +    return NULL;
> > +
> > +  tree op0 = gimple_assign_rhs1 (stmt);
> > +  tree op1 = gimple_assign_rhs2 (stmt);
> > +  tree op2 = gimple_assign_rhs3 (stmt);
> > +  lhs = gimple_assign_lhs (stmt);
> > +  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
> > +
> > +  gcc_assert (!COMPARISON_CLASS_P (op0));
> > +  if (TREE_CODE (op0) == SSA_NAME)
> > +    {
> > +      unsigned int used_vec_cond_exprs = 0;
> > +      gimple *use_stmt;
> > +      FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, op0)
> > +     {
> > +       gassign *assign = dyn_cast<gassign *> (use_stmt);
> > +       if (assign != NULL && gimple_assign_rhs_code (assign) == VEC_COND_EXPR
> > +           && gimple_assign_rhs1 (assign) == op0)
> > +         used_vec_cond_exprs++;
> > +     }
>
> This looks like it's quadratic in the worst case.  Could we check
> this in a different way?

We could remember a SSA names cond-expr-uses and thus only compute it
once.

> > +
> > +      gassign *def_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (op0));
> > +      if (def_stmt)
> > +     {
> > +       tcode = gimple_assign_rhs_code (def_stmt);
> > +       op0a = gimple_assign_rhs1 (def_stmt);
> > +       op0b = gimple_assign_rhs2 (def_stmt);
> > +
> > +       tree op0a_type = TREE_TYPE (op0a);
> > +       if (used_vec_cond_exprs >= 2
>
> It would be good if targets were able to provide only vcond_mask.
> In that case I guess we should go this path if the later one would fail.
>
> > +           && (get_vcond_mask_icode (mode, TYPE_MODE (op0a_type))
> > +               != CODE_FOR_nothing)
> > +           && expand_vec_cmp_expr_p (op0a_type, TREE_TYPE (lhs), tcode))
> > +         {
> > +           /* Keep the SSA name and use vcond_mask.  */
> > +           tcode = TREE_CODE (op0);
> > +         }
> > +     }
> > +      else
> > +     tcode = TREE_CODE (op0);
> > +    }
> > +  else
> > +    tcode = TREE_CODE (op0);
>
> Might be easier to follow if tcode is TREE_CODE (op0) by default and
> only gets changed when we want to fold in the comparison.
>
> Thanks,
> Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-04-06 12:30     ` Richard Biener
@ 2020-05-21 12:51       ` Martin Liška
  2020-05-21 13:29         ` Martin Liška
  0 siblings, 1 reply; 65+ messages in thread
From: Martin Liška @ 2020-05-21 12:51 UTC (permalink / raw)
  To: Richard Biener, GCC Patches, Richard Sandiford

Hi.

Back to this I noticed that ppc64le target build is broken due to:

g++  -fno-PIE -c   -g   -DIN_GCC  -DCROSS_DIRECTORY_STRUCTURE   -fno-exceptions -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings -Wcast-qual -Wmissing-format-attribute -Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -fno-common  -DHAVE_CONFIG_H -I. -I. -I/home/marxin/Programming/gcc/gcc -I/home/marxin/Programming/gcc/gcc/. -I/home/marxin/Programming/gcc/gcc/../include -I/home/marxin/Programming/gcc/gcc/../libcpp/include  -I/home/marxin/Programming/gcc/gcc/../libdecnumber -I/home/marxin/Programming/gcc/gcc/../libdecnumber/dpd -I../libdecnumber -I/home/marxin/Programming/gcc/gcc/../libbacktrace   -o insn-emit.o -MT insn-emit.o -MMD -MP -MF ./.deps/insn-emit.TPo insn-emit.c
/home/marxin/Programming/gcc/gcc/config/rs6000/vector.md:357:11: error: vcondv4sfv4sf cannot FAIL
   357 |     FAIL;
       |           ^
/home/marxin/Programming/gcc/gcc/config/rs6000/vector.md:357:11: error: vcondv2dfv2df cannot FAIL
   357 |     FAIL;
       |           ^
/home/marxin/Programming/gcc/gcc/config/rs6000/vector.md:374:11: error: vcondv16qiv16qi cannot FAIL
   374 |     FAIL;
       |           ^
/home/marxin/Programming/gcc/gcc/config/rs6000/vector.md:374:11: error: vcondv8hiv8hi cannot FAIL
   374 |     FAIL;
       |           ^
...


which is caused by the 4 added optabs:

+DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
+DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
+DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
+DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)

looking at the generator:

Breakpoint 6, gen_expand (info=0x7fffffffe160) at /home/marxin/Programming/gcc/gcc/genemit.c:516
516	      if (find_optab (&p, XSTR (expand, 0)))
(gdb) bt
#0  emit_c_code (code=0x7fa0f0 "{\n  if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],\n\t\t\t\t    operands[3], operands[4], operands[5]))\n    DONE;\n  else\n    FAIL;\n}", can_fail_p=false, name=0x7fa190 "vcondv4sfv4sf")
     at /home/marxin/Programming/gcc/gcc/genemit.c:306
#1  0x00000000004039b5 in gen_expand (info=0x7fffffffe160) at /home/marxin/Programming/gcc/gcc/genemit.c:522
#2  0x0000000000404912 in main (argc=4, argv=0x7fffffffe288) at /home/marxin/Programming/gcc/gcc/genemit.c:916

I get there due to:

B- │516               if (find_optab (&p, XSTR (expand, 0)))│
    │517                 {                                   │
    │518                   gcc_assert (p.op < NUM_OPTABS);   │
    │519                   if (nofail_optabs[p.op])          │
    │520                     can_fail_p = false;             │
    │521                 }                                   │


#define DEF_INTERNAL_OPTAB_FN(NAME, FLAGS, OPTAB, TYPE) \
   nofail_optabs[OPTAB##_optab] = true;
#include "internal-fn.def"

Any hint what's bad? Note that x86_64-linux-gnu is fine.
Do I miss a target hook?

Martin

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-21 12:51       ` Martin Liška
@ 2020-05-21 13:29         ` Martin Liška
  2020-05-21 20:16           ` Segher Boessenkool
  0 siblings, 1 reply; 65+ messages in thread
From: Martin Liška @ 2020-05-21 13:29 UTC (permalink / raw)
  To: Richard Biener, GCC Patches, Richard Sandiford, Segher Boessenkool

Adding Segher to CC, he can help us.

Martin

On 5/21/20 2:51 PM, Martin Liška wrote:
> Hi.
> 
> Back to this I noticed that ppc64le target build is broken due to:
> 
> g++  -fno-PIE -c   -g   -DIN_GCC  -DCROSS_DIRECTORY_STRUCTURE   -fno-exceptions -fno-rtti -fasynchronous-unwind-tables -W -Wall -Wno-narrowing -Wwrite-strings -Wcast-qual -Wmissing-format-attribute -Woverloaded-virtual -pedantic -Wno-long-long -Wno-variadic-macros -Wno-overlength-strings -fno-common  -DHAVE_CONFIG_H -I. -I. -I/home/marxin/Programming/gcc/gcc -I/home/marxin/Programming/gcc/gcc/. -I/home/marxin/Programming/gcc/gcc/../include -I/home/marxin/Programming/gcc/gcc/../libcpp/include  -I/home/marxin/Programming/gcc/gcc/../libdecnumber -I/home/marxin/Programming/gcc/gcc/../libdecnumber/dpd -I../libdecnumber -I/home/marxin/Programming/gcc/gcc/../libbacktrace   -o insn-emit.o -MT insn-emit.o -MMD -MP -MF ./.deps/insn-emit.TPo insn-emit.c
> /home/marxin/Programming/gcc/gcc/config/rs6000/vector.md:357:11: error: vcondv4sfv4sf cannot FAIL
>    357 |     FAIL;
>        |           ^
> /home/marxin/Programming/gcc/gcc/config/rs6000/vector.md:357:11: error: vcondv2dfv2df cannot FAIL
>    357 |     FAIL;
>        |           ^
> /home/marxin/Programming/gcc/gcc/config/rs6000/vector.md:374:11: error: vcondv16qiv16qi cannot FAIL
>    374 |     FAIL;
>        |           ^
> /home/marxin/Programming/gcc/gcc/config/rs6000/vector.md:374:11: error: vcondv8hiv8hi cannot FAIL
>    374 |     FAIL;
>        |           ^
> ...
> 
> 
> which is caused by the 4 added optabs:
> 
> +DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
> +DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
> +DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
> +DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
> 
> looking at the generator:
> 
> Breakpoint 6, gen_expand (info=0x7fffffffe160) at /home/marxin/Programming/gcc/gcc/genemit.c:516
> 516          if (find_optab (&p, XSTR (expand, 0)))
> (gdb) bt
> #0  emit_c_code (code=0x7fa0f0 "{\n  if (rs6000_emit_vector_cond_expr (operands[0], operands[1], operands[2],\n\t\t\t\t    operands[3], operands[4], operands[5]))\n    DONE;\n  else\n    FAIL;\n}", can_fail_p=false, name=0x7fa190 "vcondv4sfv4sf")
>      at /home/marxin/Programming/gcc/gcc/genemit.c:306
> #1  0x00000000004039b5 in gen_expand (info=0x7fffffffe160) at /home/marxin/Programming/gcc/gcc/genemit.c:522
> #2  0x0000000000404912 in main (argc=4, argv=0x7fffffffe288) at /home/marxin/Programming/gcc/gcc/genemit.c:916
> 
> I get there due to:
> 
> B- │516               if (find_optab (&p, XSTR (expand, 0)))│
>     │517                 {                                   │
>     │518                   gcc_assert (p.op < NUM_OPTABS);   │
>     │519                   if (nofail_optabs[p.op])          │
>     │520                     can_fail_p = false;             │
>     │521                 }                                   │
> 
> 
> #define DEF_INTERNAL_OPTAB_FN(NAME, FLAGS, OPTAB, TYPE) \
>    nofail_optabs[OPTAB##_optab] = true;
> #include "internal-fn.def"
> 
> Any hint what's bad? Note that x86_64-linux-gnu is fine.
> Do I miss a target hook?
> 
> Martin


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-21 13:29         ` Martin Liška
@ 2020-05-21 20:16           ` Segher Boessenkool
  2020-05-22 11:14             ` Richard Biener
  0 siblings, 1 reply; 65+ messages in thread
From: Segher Boessenkool @ 2020-05-21 20:16 UTC (permalink / raw)
  To: Martin Liška; +Cc: Richard Biener, GCC Patches, Richard Sandiford

Hi!

On Thu, May 21, 2020 at 03:29:49PM +0200, Martin Liška wrote:
> Adding Segher to CC, he can help us.

Oh dear.  Are you sure?

> On 5/21/20 2:51 PM, Martin Liška wrote:
> >Back to this I noticed that ppc64le target build is broken due to:

> >insn-emit.o -MMD -MP -MF ./.deps/insn-emit.TPo insn-emit.c
> >/home/marxin/Programming/gcc/gcc/config/rs6000/vector.md:357:11: error: 
> >vcondv4sfv4sf cannot FAIL
> >   357 |     FAIL;

Is it new that vcond cannot FAIL?  Because we have done that for years.

Since this breaks bootstrap on a primary target, please revert the patch
until it is sorted.

> >which is caused by the 4 added optabs:
> >
> >+DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
> >+DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
> >+DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
> >+DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)

> >looking at the generator:

> >I get there due to:
> >
> >B- │516               if (find_optab (&p, XSTR (expand, 
> >0)))│
> >    │517                 
> > {                                   │
> >    │518                   gcc_assert (p.op < 
> > NUM_OPTABS);   │
> >    │519                   if 
> > (nofail_optabs[p.op])          │
> >    │520                     can_fail_p = 
> > false;             │
> >    │521                 
> > }                                   │
> >
> >
> >#define DEF_INTERNAL_OPTAB_FN(NAME, FLAGS, OPTAB, TYPE) \
> >   nofail_optabs[OPTAB##_optab] = true;

So yes it is new.  Please fix :-(

> >Any hint what's bad? Note that x86_64-linux-gnu is fine.
> >Do I miss a target hook?

There is a new IFN that requires the existing optabs to never fail.  But
they *do* sometimes fail.  That is what I understand from this anyway,
please correct if needed :-)

We can make the rs6000 patterns never FAIL if that is a good idea (I am
not convinced however), but this should be documented, and all existing
targets need to be checked.

In general it is not pleasant at all to have patterns that cannot FAIL,
it makes writing a (new) port much harder, and there can be cases where
there is no sane code at all that can be generated for some cases, etc.


Segher

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-21 20:16           ` Segher Boessenkool
@ 2020-05-22 11:14             ` Richard Biener
  2020-05-26 10:15               ` Richard Sandiford
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Biener @ 2020-05-22 11:14 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: Martin Liška, GCC Patches, Richard Sandiford

On Thu, May 21, 2020 at 10:17 PM Segher Boessenkool
<segher@kernel.crashing.org> wrote:
>
> Hi!
>
> On Thu, May 21, 2020 at 03:29:49PM +0200, Martin Liška wrote:
> > Adding Segher to CC, he can help us.
>
> Oh dear.  Are you sure?
>
> > On 5/21/20 2:51 PM, Martin Liška wrote:
> > >Back to this I noticed that ppc64le target build is broken due to:
>
> > >insn-emit.o -MMD -MP -MF ./.deps/insn-emit.TPo insn-emit.c
> > >/home/marxin/Programming/gcc/gcc/config/rs6000/vector.md:357:11: error:
> > >vcondv4sfv4sf cannot FAIL
> > >   357 |     FAIL;
>
> Is it new that vcond cannot FAIL?  Because we have done that for years.
>
> Since this breaks bootstrap on a primary target, please revert the patch
> until it is sorted.
>
> > >which is caused by the 4 added optabs:
> > >
> > >+DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
> > >+DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
> > >+DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
> > >+DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
>
> > >looking at the generator:
>
> > >I get there due to:
> > >
> > >B- │516               if (find_optab (&p, XSTR (expand,
> > >0)))│
> > >    │517
> > > {                                   │
> > >    │518                   gcc_assert (p.op <
> > > NUM_OPTABS);   │
> > >    │519                   if
> > > (nofail_optabs[p.op])          │
> > >    │520                     can_fail_p =
> > > false;             │
> > >    │521
> > > }                                   │

OK, so this is an "artifact" of direct internal functions.  We do check that
expansion does not actually FAIL before emitting calls to those IFNs.

I guess this simply makes direct internal functions not a 100% match for
our use and the way out is to add regular internal functions mapping to
the optabs.  That is, I guess, for direct-internal functions it should be
enough to check direct_internal_function_supported_p which it is not
for the case of vcond*.

Richard, do you agree?

Thanks,
Richard.

> > >
> > >#define DEF_INTERNAL_OPTAB_FN(NAME, FLAGS, OPTAB, TYPE) \
> > >   nofail_optabs[OPTAB##_optab] = true;
>
> So yes it is new.  Please fix :-(
>
> > >Any hint what's bad? Note that x86_64-linux-gnu is fine.
> > >Do I miss a target hook?
>
> There is a new IFN that requires the existing optabs to never fail.  But
> they *do* sometimes fail.  That is what I understand from this anyway,
> please correct if needed :-)
>
> We can make the rs6000 patterns never FAIL if that is a good idea (I am
> not convinced however), but this should be documented, and all existing
> targets need to be checked.
>
> In general it is not pleasant at all to have patterns that cannot FAIL,
> it makes writing a (new) port much harder, and there can be cases where
> there is no sane code at all that can be generated for some cases, etc.
>
>
> Segher

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-22 11:14             ` Richard Biener
@ 2020-05-26 10:15               ` Richard Sandiford
  2020-05-27 14:04                 ` Martin Liška
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Sandiford @ 2020-05-26 10:15 UTC (permalink / raw)
  To: Richard Biener; +Cc: Segher Boessenkool, Martin Liška, GCC Patches

Richard Biener <richard.guenther@gmail.com> writes:
> On Thu, May 21, 2020 at 10:17 PM Segher Boessenkool
> <segher@kernel.crashing.org> wrote:
>>
>> Hi!
>>
>> On Thu, May 21, 2020 at 03:29:49PM +0200, Martin Liška wrote:
>> > Adding Segher to CC, he can help us.
>>
>> Oh dear.  Are you sure?
>>
>> > On 5/21/20 2:51 PM, Martin Liška wrote:
>> > >Back to this I noticed that ppc64le target build is broken due to:
>>
>> > >insn-emit.o -MMD -MP -MF ./.deps/insn-emit.TPo insn-emit.c
>> > >/home/marxin/Programming/gcc/gcc/config/rs6000/vector.md:357:11: error:
>> > >vcondv4sfv4sf cannot FAIL
>> > >   357 |     FAIL;
>>
>> Is it new that vcond cannot FAIL?  Because we have done that for years.
>>
>> Since this breaks bootstrap on a primary target, please revert the patch
>> until it is sorted.
>>
>> > >which is caused by the 4 added optabs:
>> > >
>> > >+DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
>> > >+DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
>> > >+DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
>> > >+DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
>>
>> > >looking at the generator:
>>
>> > >I get there due to:
>> > >
>> > >B- │516               if (find_optab (&p, XSTR (expand,
>> > >0)))│
>> > >    │517
>> > > {                                   │
>> > >    │518                   gcc_assert (p.op <
>> > > NUM_OPTABS);   │
>> > >    │519                   if
>> > > (nofail_optabs[p.op])          │
>> > >    │520                     can_fail_p =
>> > > false;             │
>> > >    │521
>> > > }                                   │
>
> OK, so this is an "artifact" of direct internal functions.  We do check that
> expansion does not actually FAIL before emitting calls to those IFNs.
>
> I guess this simply makes direct internal functions not a 100% match for
> our use and the way out is to add regular internal functions mapping to
> the optabs.  That is, I guess, for direct-internal functions it should be
> enough to check direct_internal_function_supported_p which it is not
> for the case of vcond*.
>
> Richard, do you agree?

Sorry for the late reply, been off for a few days.

I guess that would be OK for VCOND(U) as an intermediate step,
but long term, I think we should try to make all VCOND* directly-mapped.
If we're doing instruction selection on gimple (a good thing IMO)
we need to know before expand whether an operation is supported.

So longer-term, I think we should replace VCOND(U) with individual ifns,
like for VCONDEQ.  We could reduce the number of optabs needed by
canonicalising greater-based tests to lesser-based tests.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-26 10:15               ` Richard Sandiford
@ 2020-05-27 14:04                 ` Martin Liška
  2020-05-27 16:13                   ` Richard Sandiford
  0 siblings, 1 reply; 65+ messages in thread
From: Martin Liška @ 2020-05-27 14:04 UTC (permalink / raw)
  To: Richard Biener, Segher Boessenkool, GCC Patches, richard.sandiford

[-- Attachment #1: Type: text/plain, Size: 511 bytes --]

On 5/26/20 12:15 PM, Richard Sandiford wrote:
> So longer-term, I think we should replace VCOND(U) with individual ifns,
> like for VCONDEQ.  We could reduce the number of optabs needed by
> canonicalising greater-based tests to lesser-based tests.

Hello.

Thanks for the feedback. So would it be possible to go with something
like DEF_INTERNAL_OPTAB_CAN_FAIL (see the attachment)?

I'm sending the complete patch that survives bootstrap and regression
tests on x86_64-linux-gnu and ppc64le-linux-gnu.

Martin

[-- Attachment #2: optab.patch --]
[-- Type: text/x-patch, Size: 2708 bytes --]

diff --git a/gcc/genemit.c b/gcc/genemit.c
index 84d07d388ee..23c89dbf4e9 100644
--- a/gcc/genemit.c
+++ b/gcc/genemit.c
@@ -857,6 +857,9 @@ main (int argc, const char **argv)
 
 #define DEF_INTERNAL_OPTAB_FN(NAME, FLAGS, OPTAB, TYPE) \
   nofail_optabs[OPTAB##_optab] = true;
+
+#define DEF_INTERNAL_OPTAB_CAN_FAIL(OPTAB) \
+  nofail_optabs[OPTAB##_optab] = false;
 #include "internal-fn.def"
 
   /* Assign sequential codes to all entries in the machine description
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 0c6fc371190..373273de2c2 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 				   UNSIGNED_OPTAB, TYPE)
      DEF_INTERNAL_FLT_FN (NAME, FLAGS, OPTAB, TYPE)
      DEF_INTERNAL_INT_FN (NAME, FLAGS, OPTAB, TYPE)
+     DEF_INTERNAL_OPTAB_CAN_FAIL (OPTAB)
 
    where NAME is the name of the function, FLAGS is a set of
    ECF_* flags and FNSPEC is a string describing functions fnspec.
@@ -86,7 +87,10 @@ along with GCC; see the file COPYING3.  If not see
 
    where STMT is the statement that performs the call.  These are generated
    automatically for optab functions and call out to a function or macro
-   called expand_<TYPE>_optab_fn.  */
+   called expand_<TYPE>_optab_fn.
+
+   DEF_INTERNAL_OPTAB_CAN_FAIL defines tables that are used for GIMPLE
+   instruction selection and do not map directly to instructions.  */
 
 #ifndef DEF_INTERNAL_FN
 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC)
@@ -118,6 +122,10 @@ along with GCC; see the file COPYING3.  If not see
   DEF_INTERNAL_OPTAB_FN (NAME, FLAGS, OPTAB, TYPE)
 #endif
 
+#ifndef DEF_INTERNAL_OPTAB_CAN_FAIL
+#define DEF_INTERNAL_OPTAB_CAN_FAIL(OPTAB)
+#endif
+
 DEF_INTERNAL_OPTAB_FN (MASK_LOAD, ECF_PURE, maskload, mask_load)
 DEF_INTERNAL_OPTAB_FN (LOAD_LANES, ECF_CONST, vec_load_lanes, load_lanes)
 DEF_INTERNAL_OPTAB_FN (MASK_LOAD_LANES, ECF_PURE,
@@ -141,6 +149,11 @@ DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
 DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
 DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
 
+DEF_INTERNAL_OPTAB_CAN_FAIL (vcond)
+DEF_INTERNAL_OPTAB_CAN_FAIL (vcondu)
+DEF_INTERNAL_OPTAB_CAN_FAIL (vcondeq)
+DEF_INTERNAL_OPTAB_CAN_FAIL (vcond_mask)
+
 DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
 DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW,
 		       check_raw_ptrs, check_ptrs)
@@ -385,4 +398,5 @@ DEF_INTERNAL_FN (NOP, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 #undef DEF_INTERNAL_FLT_FLOATN_FN
 #undef DEF_INTERNAL_SIGNED_OPTAB_FN
 #undef DEF_INTERNAL_OPTAB_FN
+#undef DEF_INTERNAL_OPTAB_CAN_FAIL
 #undef DEF_INTERNAL_FN

[-- Attachment #3: 0001-Lower-VEC_COND_EXPR-into-internal-functions.patch --]
[-- Type: text/x-patch, Size: 28409 bytes --]

From 981952917dea9aaef3be13375fbf452566d926b3 Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Mon, 9 Mar 2020 13:23:03 +0100
Subject: [PATCH] Lower VEC_COND_EXPR into internal functions.

gcc/ChangeLog:

2020-03-30  Martin Liska  <mliska@suse.cz>

	* expr.c (expand_expr_real_2): Put gcc_unreachable, we should reach
	this path.
	(do_store_flag): Likewise here.
	* internal-fn.c (vec_cond_mask_direct): New.
	(vec_cond_direct): Likewise.
	(vec_condu_direct): Likewise.
	(vec_condeq_direct): Likewise.
	(expand_vect_cond_optab_fn): Move from optabs.c.
	(expand_vec_cond_optab_fn): New alias.
	(expand_vec_condu_optab_fn): Likewise.
	(expand_vec_condeq_optab_fn): Likewise.
	(expand_vect_cond_mask_optab_fn): Moved from optabs.c.
	(expand_vec_cond_mask_optab_fn): New alias.
	(direct_vec_cond_mask_optab_supported_p): New.
	(direct_vec_cond_optab_supported_p): Likewise.
	(direct_vec_condu_optab_supported_p): Likewise.
	(direct_vec_condeq_optab_supported_p): Likewise.
	* internal-fn.def (DEF_INTERNAL_OPTAB_CAN_FAIL):
	(VCOND): New new internal optab
	function.
	(VCONDU): Likewise.
	(VCONDEQ): Likewise.
	(VCOND_MASK): Likewise.
	* optabs.c (expand_vec_cond_mask_expr): Removed.
	(expand_vec_cond_expr): Likewise.
	* optabs.h (expand_vec_cond_expr): Likewise.
	(vector_compare_rtx): Likewise.
	* passes.def: Add pass_gimple_isel.
	* tree-cfg.c (verify_gimple_assign_ternary): Add new
	GIMPLE check.
	* tree-pass.h (make_pass_gimple_isel): New.
	* tree-ssa-forwprop.c (pass_forwprop::execute): Do not forward
	to already lowered VEC_COND_EXPR.
	* tree-vect-generic.c (expand_vector_divmod): Expand to SSA_NAME.
	(expand_vector_condition): Expand tcc_comparison of a VEC_COND_EXPR
	into a SSA_NAME.
	(gimple_expand_vec_cond_expr): New.
	(gimple_expand_vec_cond_exprs): New.
	(class pass_gimple_isel): New.
	(make_pass_gimple_isel): New.
	* genemit.c (DEF_INTERNAL_OPTAB_CAN_FAIL): Support optabs that
	can fail.
---
 gcc/expr.c              |  25 +----
 gcc/genemit.c           |   3 +
 gcc/internal-fn.c       |  89 +++++++++++++++
 gcc/internal-fn.def     |  21 +++-
 gcc/optabs.c            | 124 +--------------------
 gcc/optabs.h            |   7 +-
 gcc/passes.def          |   1 +
 gcc/tree-cfg.c          |   8 ++
 gcc/tree-pass.h         |   1 +
 gcc/tree-ssa-forwprop.c |   6 +
 gcc/tree-vect-generic.c | 237 +++++++++++++++++++++++++++++++++++++++-
 11 files changed, 367 insertions(+), 155 deletions(-)

diff --git a/gcc/expr.c b/gcc/expr.c
index dfbeae71518..a757394f436 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9205,17 +9205,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
       if (temp != 0)
 	return temp;
 
-      /* For vector MIN <x, y>, expand it a VEC_COND_EXPR <x <= y, x, y>
-	 and similarly for MAX <x, y>.  */
       if (VECTOR_TYPE_P (type))
-	{
-	  tree t0 = make_tree (type, op0);
-	  tree t1 = make_tree (type, op1);
-	  tree comparison = build2 (code == MIN_EXPR ? LE_EXPR : GE_EXPR,
-				    type, t0, t1);
-	  return expand_vec_cond_expr (type, comparison, t0, t1,
-				       original_target);
-	}
+	gcc_unreachable ();
 
       /* At this point, a MEM target is no longer useful; we will get better
 	 code without it.  */
@@ -9804,10 +9795,6 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 	return temp;
       }
 
-    case VEC_COND_EXPR:
-      target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
-      return target;
-
     case VEC_DUPLICATE_EXPR:
       op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier);
       target = expand_vector_broadcast (mode, op0);
@@ -12138,8 +12125,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
   STRIP_NOPS (arg1);
 
   /* For vector typed comparisons emit code to generate the desired
-     all-ones or all-zeros mask.  Conveniently use the VEC_COND_EXPR
-     expander for this.  */
+     all-ones or all-zeros mask.  */
   if (TREE_CODE (ops->type) == VECTOR_TYPE)
     {
       tree ifexp = build2 (ops->code, ops->type, arg0, arg1);
@@ -12147,12 +12133,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
 	  && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type, ops->code))
 	return expand_vec_cmp_expr (ops->type, ifexp, target);
       else
-	{
-	  tree if_true = constant_boolean_node (true, ops->type);
-	  tree if_false = constant_boolean_node (false, ops->type);
-	  return expand_vec_cond_expr (ops->type, ifexp, if_true,
-				       if_false, target);
-	}
+	gcc_unreachable ();
     }
 
   /* Optimize (x % C1) == C2 or (x % C1) != C2 if it is beneficial
diff --git a/gcc/genemit.c b/gcc/genemit.c
index 84d07d388ee..23c89dbf4e9 100644
--- a/gcc/genemit.c
+++ b/gcc/genemit.c
@@ -857,6 +857,9 @@ main (int argc, const char **argv)
 
 #define DEF_INTERNAL_OPTAB_FN(NAME, FLAGS, OPTAB, TYPE) \
   nofail_optabs[OPTAB##_optab] = true;
+
+#define DEF_INTERNAL_OPTAB_CAN_FAIL(OPTAB) \
+  nofail_optabs[OPTAB##_optab] = false;
 #include "internal-fn.def"
 
   /* Assign sequential codes to all entries in the machine description
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 5e9aa60721e..644f234e087 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-ssa.h"
 #include "tree-phinodes.h"
 #include "ssa-iterators.h"
+#include "explow.h"
 
 /* The names of each internal function, indexed by function number.  */
 const char *const internal_fn_name_array[] = {
@@ -107,6 +108,10 @@ init_internal_fns ()
 #define mask_store_direct { 3, 2, false }
 #define store_lanes_direct { 0, 0, false }
 #define mask_store_lanes_direct { 0, 0, false }
+#define vec_cond_mask_direct { 0, 0, false }
+#define vec_cond_direct { 0, 0, false }
+#define vec_condu_direct { 0, 0, false }
+#define vec_condeq_direct { 0, 0, false }
 #define scatter_store_direct { 3, 1, false }
 #define unary_direct { 0, 0, true }
 #define binary_direct { 0, 0, true }
@@ -2548,6 +2553,86 @@ expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 
 #define expand_mask_store_lanes_optab_fn expand_mask_store_optab_fn
 
+/* Expand VCOND, VCONDU and VCONDEQ optab internal functions.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+  class expand_operand ops[6];
+  insn_code icode;
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0a = gimple_call_arg (stmt, 0);
+  tree op0b = gimple_call_arg (stmt, 1);
+  tree op1 = gimple_call_arg (stmt, 2);
+  tree op2 = gimple_call_arg (stmt, 3);
+  enum tree_code tcode = (tree_code) int_cst_value (gimple_call_arg (stmt, 4));
+
+  tree vec_cond_type = TREE_TYPE (lhs);
+  tree op_mode = TREE_TYPE (op0a);
+  bool unsignedp = TYPE_UNSIGNED (op_mode);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode cmp_op_mode = TYPE_MODE (op_mode);
+
+  icode = convert_optab_handler (optab, mode, cmp_op_mode);
+  rtx comparison
+    = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp, icode, 4);
+  rtx rtx_op1 = expand_normal (op1);
+  rtx rtx_op2 = expand_normal (op2);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_fixed_operand (&ops[3], comparison);
+  create_fixed_operand (&ops[4], XEXP (comparison, 0));
+  create_fixed_operand (&ops[5], XEXP (comparison, 1));
+  expand_insn (icode, 6, ops);
+}
+
+#define expand_vec_cond_optab_fn expand_vect_cond_optab_fn
+#define expand_vec_condu_optab_fn expand_vect_cond_optab_fn
+#define expand_vec_condeq_optab_fn expand_vect_cond_optab_fn
+
+/* Expand VCOND_MASK optab internal function.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+  class expand_operand ops[4];
+
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0 = gimple_call_arg (stmt, 0);
+  tree op1 = gimple_call_arg (stmt, 1);
+  tree op2 = gimple_call_arg (stmt, 2);
+  tree vec_cond_type = TREE_TYPE (lhs);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
+  enum insn_code icode = convert_optab_handler (optab, mode, mask_mode);
+  rtx mask, rtx_op1, rtx_op2;
+
+  gcc_assert (icode != CODE_FOR_nothing);
+
+  mask = expand_normal (op0);
+  rtx_op1 = expand_normal (op1);
+  rtx_op2 = expand_normal (op2);
+
+  mask = force_reg (mask_mode, mask);
+  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_input_operand (&ops[3], mask, mask_mode);
+  expand_insn (icode, 4, ops);
+}
+
+#define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn
+
 static void
 expand_ABNORMAL_DISPATCHER (internal_fn, gcall *)
 {
@@ -3131,6 +3216,10 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 #define direct_mask_store_optab_supported_p direct_optab_supported_p
 #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_cond_mask_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_cond_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_condu_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_condeq_optab_supported_p multi_vector_optab_supported_p
 #define direct_scatter_store_optab_supported_p convert_optab_supported_p
 #define direct_while_optab_supported_p convert_optab_supported_p
 #define direct_fold_extract_optab_supported_p direct_optab_supported_p
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 1d190d492ff..373273de2c2 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
 				   UNSIGNED_OPTAB, TYPE)
      DEF_INTERNAL_FLT_FN (NAME, FLAGS, OPTAB, TYPE)
      DEF_INTERNAL_INT_FN (NAME, FLAGS, OPTAB, TYPE)
+     DEF_INTERNAL_OPTAB_CAN_FAIL (OPTAB)
 
    where NAME is the name of the function, FLAGS is a set of
    ECF_* flags and FNSPEC is a string describing functions fnspec.
@@ -86,7 +87,10 @@ along with GCC; see the file COPYING3.  If not see
 
    where STMT is the statement that performs the call.  These are generated
    automatically for optab functions and call out to a function or macro
-   called expand_<TYPE>_optab_fn.  */
+   called expand_<TYPE>_optab_fn.
+
+   DEF_INTERNAL_OPTAB_CAN_FAIL defines tables that are used for GIMPLE
+   instruction selection and do not map directly to instructions.  */
 
 #ifndef DEF_INTERNAL_FN
 #define DEF_INTERNAL_FN(CODE, FLAGS, FNSPEC)
@@ -118,6 +122,10 @@ along with GCC; see the file COPYING3.  If not see
   DEF_INTERNAL_OPTAB_FN (NAME, FLAGS, OPTAB, TYPE)
 #endif
 
+#ifndef DEF_INTERNAL_OPTAB_CAN_FAIL
+#define DEF_INTERNAL_OPTAB_CAN_FAIL(OPTAB)
+#endif
+
 DEF_INTERNAL_OPTAB_FN (MASK_LOAD, ECF_PURE, maskload, mask_load)
 DEF_INTERNAL_OPTAB_FN (LOAD_LANES, ECF_CONST, vec_load_lanes, load_lanes)
 DEF_INTERNAL_OPTAB_FN (MASK_LOAD_LANES, ECF_PURE,
@@ -136,6 +144,16 @@ DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
 DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0,
 		       vec_mask_store_lanes, mask_store_lanes)
 
+DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
+DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
+DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
+DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
+
+DEF_INTERNAL_OPTAB_CAN_FAIL (vcond)
+DEF_INTERNAL_OPTAB_CAN_FAIL (vcondu)
+DEF_INTERNAL_OPTAB_CAN_FAIL (vcondeq)
+DEF_INTERNAL_OPTAB_CAN_FAIL (vcond_mask)
+
 DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
 DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW,
 		       check_raw_ptrs, check_ptrs)
@@ -380,4 +398,5 @@ DEF_INTERNAL_FN (NOP, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 #undef DEF_INTERNAL_FLT_FLOATN_FN
 #undef DEF_INTERNAL_SIGNED_OPTAB_FN
 #undef DEF_INTERNAL_OPTAB_FN
+#undef DEF_INTERNAL_OPTAB_CAN_FAIL
 #undef DEF_INTERNAL_FN
diff --git a/gcc/optabs.c b/gcc/optabs.c
index d85ce47f762..5c19e4271e7 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -5439,7 +5439,7 @@ get_rtx_code (enum tree_code tcode, bool unsignedp)
    first comparison operand for insn ICODE.  Do not generate the
    compare instruction itself.  */
 
-static rtx
+rtx
 vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
 		    tree t_op0, tree t_op1, bool unsignedp,
 		    enum insn_code icode, unsigned int opno)
@@ -5806,128 +5806,6 @@ expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
   return tmp;
 }
 
-/* Generate insns for a VEC_COND_EXPR with mask, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_mask_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-			   rtx target)
-{
-  class expand_operand ops[4];
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
-  enum insn_code icode = get_vcond_mask_icode (mode, mask_mode);
-  rtx mask, rtx_op1, rtx_op2;
-
-  if (icode == CODE_FOR_nothing)
-    return 0;
-
-  mask = expand_normal (op0);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  mask = force_reg (mask_mode, mask);
-  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_input_operand (&ops[3], mask, mask_mode);
-  expand_insn (icode, 4, ops);
-
-  return ops[0].value;
-}
-
-/* Generate insns for a VEC_COND_EXPR, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-		      rtx target)
-{
-  class expand_operand ops[6];
-  enum insn_code icode;
-  rtx comparison, rtx_op1, rtx_op2;
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode cmp_op_mode;
-  bool unsignedp;
-  tree op0a, op0b;
-  enum tree_code tcode;
-
-  if (COMPARISON_CLASS_P (op0))
-    {
-      op0a = TREE_OPERAND (op0, 0);
-      op0b = TREE_OPERAND (op0, 1);
-      tcode = TREE_CODE (op0);
-    }
-  else
-    {
-      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
-      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
-	  != CODE_FOR_nothing)
-	return expand_vec_cond_mask_expr (vec_cond_type, op0, op1,
-					  op2, target);
-      /* Fake op0 < 0.  */
-      else
-	{
-	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
-		      == MODE_VECTOR_INT);
-	  op0a = op0;
-	  op0b = build_zero_cst (TREE_TYPE (op0));
-	  tcode = LT_EXPR;
-	}
-    }
-  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
-  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
-
-
-  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
-	      && known_eq (GET_MODE_NUNITS (mode),
-			   GET_MODE_NUNITS (cmp_op_mode)));
-
-  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
-  if (icode == CODE_FOR_nothing)
-    {
-      if (tcode == LT_EXPR
-	  && op0a == op0
-	  && TREE_CODE (op0) == VECTOR_CST)
-	{
-	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
-	     into a constant when only get_vcond_eq_icode is supported.
-	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
-	  unsigned HOST_WIDE_INT nelts;
-	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
-	    {
-	      if (VECTOR_CST_STEPPED_P (op0))
-		return 0;
-	      nelts = vector_cst_encoded_nelts (op0);
-	    }
-	  for (unsigned int i = 0; i < nelts; ++i)
-	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
-	      return 0;
-	  tcode = NE_EXPR;
-	}
-      if (tcode == EQ_EXPR || tcode == NE_EXPR)
-	icode = get_vcond_eq_icode (mode, cmp_op_mode);
-      if (icode == CODE_FOR_nothing)
-	return 0;
-    }
-
-  comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp,
-				   icode, 4);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_fixed_operand (&ops[3], comparison);
-  create_fixed_operand (&ops[4], XEXP (comparison, 0));
-  create_fixed_operand (&ops[5], XEXP (comparison, 1));
-  expand_insn (icode, 6, ops);
-  return ops[0].value;
-}
-
 /* Generate VEC_SERIES_EXPR <OP0, OP1>, returning a value of mode VMODE.
    Use TARGET for the result if nonnull and convenient.  */
 
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 5bd19503a0a..7c2ec257cb0 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -321,9 +321,6 @@ extern rtx expand_vec_perm_const (machine_mode, rtx, rtx,
 /* Generate code for vector comparison.  */
 extern rtx expand_vec_cmp_expr (tree, tree, rtx);
 
-/* Generate code for VEC_COND_EXPR.  */
-extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
-
 /* Generate code for VEC_SERIES_EXPR.  */
 extern rtx expand_vec_series_expr (machine_mode, rtx, rtx, rtx);
 
@@ -364,5 +361,9 @@ extern void expand_jump_insn (enum insn_code icode, unsigned int nops,
 			      class expand_operand *ops);
 
 extern enum rtx_code get_rtx_code (enum tree_code tcode, bool unsignedp);
+extern rtx vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
+			       tree t_op0, tree t_op1, bool unsignedp,
+			       enum insn_code icode, unsigned int opno);
+
 
 #endif /* GCC_OPTABS_H */
diff --git a/gcc/passes.def b/gcc/passes.def
index 92cbe587a8a..e9f59d756c9 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -398,6 +398,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_cleanup_eh);
   NEXT_PASS (pass_lower_resx);
   NEXT_PASS (pass_nrv);
+  NEXT_PASS (pass_gimple_isel);
   NEXT_PASS (pass_cleanup_cfg_post_optimizing);
   NEXT_PASS (pass_warn_function_noreturn);
   NEXT_PASS (pass_gen_hsail);
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index d06a479e570..16ff06fbf88 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4199,6 +4199,14 @@ verify_gimple_assign_ternary (gassign *stmt)
 	  debug_generic_expr (rhs1_type);
 	  return true;
 	}
+      else if (cfun->curr_properties & PROP_gimple_lvec
+	       && TREE_CODE_CLASS (TREE_CODE (rhs1)) == tcc_comparison)
+	{
+	  error ("the first argument of %<VEC_COND_EXPR%> cannot be "
+		 "a %<GENERIC%> tree comparison expression");
+	  debug_generic_expr (rhs1);
+	  return true;
+	}
       /* Fallthrough.  */
     case COND_EXPR:
       if (!is_gimple_val (rhs1)
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 576b3f67434..4efece1b35b 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -626,6 +626,7 @@ extern gimple_opt_pass *make_pass_local_fn_summary (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lower_vaarg (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_gimple_isel (gcc::context *ctxt);
 
 /* Current optimization pass.  */
 extern opt_pass *current_pass;
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 759baf56897..fce392e204c 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -3125,6 +3125,12 @@ pass_forwprop::execute (function *fun)
 		    if (code == COND_EXPR
 			|| code == VEC_COND_EXPR)
 		      {
+			/* Do not propagate into VEC_COND_EXPRs after they are
+			   vector lowering pass.  */
+			if (code == VEC_COND_EXPR
+			    && (fun->curr_properties & PROP_gimple_lvec))
+			  break;
+
 			/* In this case the entire COND_EXPR is in rhs1. */
 			if (forward_propagate_into_cond (&gsi))
 			  {
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index a7fe83da0e3..8f6d63f01c5 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -694,12 +694,14 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 	  if (addend == NULL_TREE
 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
 	    {
-	      tree zero, cst, cond, mask_type;
-	      gimple *stmt;
+	      tree zero, cst, mask_type, mask;
+	      gimple *stmt, *cond;
 
 	      mask_type = truth_type_for (type);
 	      zero = build_zero_cst (type);
-	      cond = build2 (LT_EXPR, mask_type, op0, zero);
+	      mask = make_ssa_name (mask_type);
+	      cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
+	      gsi_insert_before (gsi, cond, GSI_SAME_STMT);
 	      tree_vector_builder vec (type, nunits, 1);
 	      for (i = 0; i < nunits; i++)
 		vec.quick_push (build_int_cst (TREE_TYPE (type),
@@ -707,8 +709,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 						<< shifts[i]) - 1));
 	      cst = vec.build ();
 	      addend = make_ssa_name (type);
-	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
-					  cst, zero);
+	      stmt
+		= gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
 	    }
 	}
@@ -964,7 +966,17 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
     }
 
   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
-    return;
+    {
+      if (a_is_comparison)
+	{
+	  a = gimplify_build2 (gsi, TREE_CODE (a), TREE_TYPE (a), a1, a2);
+	  gimple_assign_set_rhs1 (stmt, a);
+	  update_stmt (stmt);
+	  return;
+	}
+      gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
+      return;
+    }
 
   /* Handle vector boolean types with bitmasks.  If there is a comparison
      and we can expand the comparison into the vector boolean bitmask,
@@ -2241,6 +2253,176 @@ expand_vector_operations (void)
   return cfg_changed ? TODO_cleanup_cfg : 0;
 }
 
+/* Expand all VEC_COND_EXPR gimple assignments into calls to internal
+   function based on type of selected expansion.  */
+
+static gimple *
+gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
+			     hash_map<tree, unsigned int> *vec_cond_ssa_name_uses)
+{
+  tree lhs, op0a = NULL_TREE, op0b = NULL_TREE;
+  enum tree_code code;
+  enum tree_code tcode;
+  machine_mode cmp_op_mode;
+  bool unsignedp;
+  enum insn_code icode;
+  imm_use_iterator imm_iter;
+
+  /* Only consider code == GIMPLE_ASSIGN.  */
+  gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi));
+  if (!stmt)
+    return NULL;
+
+  code = gimple_assign_rhs_code (stmt);
+  if (code != VEC_COND_EXPR)
+    return NULL;
+
+  tree op0 = gimple_assign_rhs1 (stmt);
+  tree op1 = gimple_assign_rhs2 (stmt);
+  tree op2 = gimple_assign_rhs3 (stmt);
+  lhs = gimple_assign_lhs (stmt);
+  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+
+  gcc_assert (!COMPARISON_CLASS_P (op0));
+  if (TREE_CODE (op0) == SSA_NAME)
+    {
+      unsigned int used_vec_cond_exprs = 0;
+      unsigned int *slot = vec_cond_ssa_name_uses->get (op0);
+      if (slot)
+	used_vec_cond_exprs = *slot;
+      else
+	{
+	  gimple *use_stmt;
+	  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, op0)
+	    {
+	      gassign *assign = dyn_cast<gassign *> (use_stmt);
+	      if (assign != NULL
+		  && gimple_assign_rhs_code (assign) == VEC_COND_EXPR
+		  && gimple_assign_rhs1 (assign) == op0)
+		used_vec_cond_exprs++;
+	    }
+	  vec_cond_ssa_name_uses->put (op0, used_vec_cond_exprs);
+	}
+
+      gassign *def_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (op0));
+      if (def_stmt)
+	{
+	  tcode = gimple_assign_rhs_code (def_stmt);
+	  op0a = gimple_assign_rhs1 (def_stmt);
+	  op0b = gimple_assign_rhs2 (def_stmt);
+
+	  tree op0a_type = TREE_TYPE (op0a);
+	  if (used_vec_cond_exprs >= 2
+	      && (get_vcond_mask_icode (mode, TYPE_MODE (op0a_type))
+		  != CODE_FOR_nothing)
+	      && expand_vec_cmp_expr_p (op0a_type, TREE_TYPE (lhs), tcode))
+	    {
+	      /* Keep the SSA name and use vcond_mask.  */
+	      tcode = TREE_CODE (op0);
+	    }
+	}
+      else
+	tcode = TREE_CODE (op0);
+    }
+  else
+    tcode = TREE_CODE (op0);
+
+  if (TREE_CODE_CLASS (tcode) != tcc_comparison)
+    {
+      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
+      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
+	  != CODE_FOR_nothing)
+	return gimple_build_call_internal (IFN_VCOND_MASK, 3, op0, op1, op2);
+      /* Fake op0 < 0.  */
+      else
+	{
+	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
+		      == MODE_VECTOR_INT);
+	  op0a = op0;
+	  op0b = build_zero_cst (TREE_TYPE (op0));
+	  tcode = LT_EXPR;
+	}
+    }
+  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
+  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
+
+
+  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
+	      && known_eq (GET_MODE_NUNITS (mode),
+			   GET_MODE_NUNITS (cmp_op_mode)));
+
+  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
+  if (icode == CODE_FOR_nothing)
+    {
+      if (tcode == LT_EXPR
+	  && op0a == op0
+	  && TREE_CODE (op0) == VECTOR_CST)
+	{
+	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
+	     into a constant when only get_vcond_eq_icode is supported.
+	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
+	  unsigned HOST_WIDE_INT nelts;
+	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
+	    {
+	      if (VECTOR_CST_STEPPED_P (op0))
+		gcc_unreachable ();
+	      nelts = vector_cst_encoded_nelts (op0);
+	    }
+	  for (unsigned int i = 0; i < nelts; ++i)
+	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
+	      gcc_unreachable ();
+	  tcode = NE_EXPR;
+	}
+      if (tcode == EQ_EXPR || tcode == NE_EXPR)
+	{
+	  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+	  return gimple_build_call_internal (IFN_VCONDEQ, 5, op0a, op0b, op1,
+					     op2, tcode_tree);
+	}
+    }
+
+  gcc_assert (icode != CODE_FOR_nothing);
+  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+  return gimple_build_call_internal (unsignedp ? IFN_VCONDU : IFN_VCOND,
+				     5, op0a, op0b, op1, op2, tcode_tree);
+}
+
+/* Iterate all gimple statements and try to expand
+   VEC_COND_EXPR assignments.  */
+
+static unsigned int
+gimple_expand_vec_cond_exprs (void)
+{
+  gimple_stmt_iterator gsi;
+  basic_block bb;
+  bool cfg_changed = false;
+  hash_map<tree, unsigned int> vec_cond_ssa_name_uses;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	{
+	  gimple *g = gimple_expand_vec_cond_expr (&gsi,
+						   &vec_cond_ssa_name_uses);
+	  if (g != NULL)
+	    {
+	      tree lhs = gimple_assign_lhs (gsi_stmt (gsi));
+	      gimple_set_lhs (g, lhs);
+	      gsi_replace (&gsi, g, false);
+	    }
+	  /* ???  If we do not cleanup EH then we will ICE in
+	     verification.  But in reality we have created wrong-code
+	     as we did not properly transition EH info and edges to
+	     the piecewise computations.  */
+	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
+	      && gimple_purge_dead_eh_edges (bb))
+	    cfg_changed = true;
+	}
+    }
+
+  return cfg_changed ? TODO_cleanup_cfg : 0;
+}
+
 namespace {
 
 const pass_data pass_data_lower_vector =
@@ -2324,4 +2506,47 @@ make_pass_lower_vector_ssa (gcc::context *ctxt)
   return new pass_lower_vector_ssa (ctxt);
 }
 
+namespace {
+
+const pass_data pass_data_gimple_isel =
+{
+  GIMPLE_PASS, /* type */
+  "isel", /* name */
+  OPTGROUP_VEC, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_cfg, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_gimple_isel : public gimple_opt_pass
+{
+public:
+  pass_gimple_isel (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_gimple_isel, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return true;
+    }
+
+  virtual unsigned int execute (function *)
+    {
+      return gimple_expand_vec_cond_exprs ();
+    }
+
+}; // class pass_gimple_isel
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_gimple_isel (gcc::context *ctxt)
+{
+  return new pass_gimple_isel (ctxt);
+}
+
 #include "gt-tree-vect-generic.h"
-- 
2.26.2


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-27 14:04                 ` Martin Liška
@ 2020-05-27 16:13                   ` Richard Sandiford
  2020-05-27 16:32                     ` Richard Biener
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Sandiford @ 2020-05-27 16:13 UTC (permalink / raw)
  To: Martin Liška; +Cc: Richard Biener, Segher Boessenkool, GCC Patches

Martin Liška <mliska@suse.cz> writes:
> On 5/26/20 12:15 PM, Richard Sandiford wrote:
>> So longer-term, I think we should replace VCOND(U) with individual ifns,
>> like for VCONDEQ.  We could reduce the number of optabs needed by
>> canonicalising greater-based tests to lesser-based tests.
>
> Hello.
>
> Thanks for the feedback. So would it be possible to go with something
> like DEF_INTERNAL_OPTAB_CAN_FAIL (see the attachment)?

It doesn't look like this will solve the problem.  The reason that we
don't allow optabs for directly-mapped IFNs to FAIL is that:

  expand_insn (icode, 6, ops);

will (deliberately) ICE when the pattern FAILs.  Code that copes with
FAILing optabs instead needs to do:

  rtx_insn *watermark = get_last_insn (); <-- position whether it should go.
  ...
  if (maybe_expand_insn (icode, 6, ops))
    {
      ...Success...;
    }

  delete_insns_since (watermark);
  ...fallback code that implements the IFN without optab support...

At this point the IFN isn't really directly-mapped in the intended sense:
the optab is “just” a way of optimising the IFN.

So I think the effect of the patch will be to suppress the build failure,
but instead ICE for PowerPC when the FAIL condition is hit.  It might
be quite difficult to trigger though.  (That's why the static checking
is there. :-))

I think instead we should treat VCOND(U) as not directly-mapped,
as Richard suggested (IIRC).  The internal-fn.c code should then handle
the case in which we have an IFN_VCOND(U) call and the associated
optab fails.  Of course, this is only going to be exercised on targets
like powerpc* that having failing patterns, so it'll need testing there.

What I meant by the quote above is that I think this shows the flaw in
using IFN_VCOND(U) rather than splitting it up further.  Longer term,
we should have a separate IFN_VCOND* and optab for each necessary
condition.  There would then be no need (IMO) to allow the patterns
to FAIL, and we could use directly-mapped IFNs with no fallback.
There'd also be no need for the tree comparison operand to the IFN.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-27 16:13                   ` Richard Sandiford
@ 2020-05-27 16:32                     ` Richard Biener
  2020-05-28 14:46                       ` Martin Liška
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Biener @ 2020-05-27 16:32 UTC (permalink / raw)
  To: Richard Sandiford, Martin Liška; +Cc: Segher Boessenkool, GCC Patches

On May 27, 2020 6:13:24 PM GMT+02:00, Richard Sandiford <richard.sandiford@arm.com> wrote:
>Martin Liška <mliska@suse.cz> writes:
>> On 5/26/20 12:15 PM, Richard Sandiford wrote:
>>> So longer-term, I think we should replace VCOND(U) with individual
>ifns,
>>> like for VCONDEQ.  We could reduce the number of optabs needed by
>>> canonicalising greater-based tests to lesser-based tests.
>>
>> Hello.
>>
>> Thanks for the feedback. So would it be possible to go with something
>> like DEF_INTERNAL_OPTAB_CAN_FAIL (see the attachment)?
>
>It doesn't look like this will solve the problem.  The reason that we
>don't allow optabs for directly-mapped IFNs to FAIL is that:
>
>  expand_insn (icode, 6, ops);
>
>will (deliberately) ICE when the pattern FAILs.  Code that copes with
>FAILing optabs instead needs to do:
>
>rtx_insn *watermark = get_last_insn (); <-- position whether it should
>go.
>  ...
>  if (maybe_expand_insn (icode, 6, ops))
>    {
>      ...Success...;
>    }
>
>  delete_insns_since (watermark);
>  ...fallback code that implements the IFN without optab support...
>
>At this point the IFN isn't really directly-mapped in the intended
>sense:
>the optab is “just” a way of optimising the IFN.
>
>So I think the effect of the patch will be to suppress the build
>failure,
>but instead ICE for PowerPC when the FAIL condition is hit.  It might
>be quite difficult to trigger though.  (That's why the static checking
>is there. :-))
>
>I think instead we should treat VCOND(U) as not directly-mapped,
>as Richard suggested (IIRC).  The internal-fn.c code should then handle
>the case in which we have an IFN_VCOND(U) call and the associated
>optab fails.  Of course, this is only going to be exercised on targets
>like powerpc* that having failing patterns, so it'll need testing
>there.
>
>What I meant by the quote above is that I think this shows the flaw in
>using IFN_VCOND(U) rather than splitting it up further.  Longer term,
>we should have a separate IFN_VCOND* and optab for each necessary
>condition.  There would then be no need (IMO) to allow the patterns
>to FAIL, and we could use directly-mapped IFNs with no fallback.
>There'd also be no need for the tree comparison operand to the IFN.

That might be indeed a good idea. 

Richard. 

>Thanks,
>Richard


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-27 16:32                     ` Richard Biener
@ 2020-05-28 14:46                       ` Martin Liška
  2020-05-28 15:28                         ` Richard Sandiford
  0 siblings, 1 reply; 65+ messages in thread
From: Martin Liška @ 2020-05-28 14:46 UTC (permalink / raw)
  To: Richard Biener, Richard Sandiford; +Cc: Segher Boessenkool, GCC Patches

[-- Attachment #1: Type: text/plain, Size: 206 bytes --]

Hi.

There's a new patch that adds normal internal functions for the 4
VCOND* functions.

The patch that survives bootstrap and regression
tests on x86_64-linux-gnu and ppc64le-linux-gnu.

Thoughts?
Martin

[-- Attachment #2: 0001-Lower-VEC_COND_EXPR-into-internal-functions.patch --]
[-- Type: text/x-patch, Size: 24612 bytes --]

From 9a8880a601c7820eb2d0c9104367ea454571681e Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Mon, 9 Mar 2020 13:23:03 +0100
Subject: [PATCH] Lower VEC_COND_EXPR into internal functions.

gcc/ChangeLog:

2020-03-30  Martin Liska  <mliska@suse.cz>

	* expr.c (expand_expr_real_2): Put gcc_unreachable, we should reach
	this path.
	(do_store_flag): Likewise here.
	* internal-fn.c (expand_vect_cond_optab_fn): New.
	(expand_VCOND): Likewise.
	(expand_VCONDU): Likewise.
	(expand_VCONDEQ): Likewise.
	(expand_vect_cond_mask_optab_fn): Likewise.
	(expand_VCOND_MASK): Likewise.
	* internal-fn.def (VCOND): New.
	(VCONDU): Likewise.
	(VCONDEQ): Likewise.
	(VCOND_MASK): Likewise.
	* optabs.c (expand_vec_cond_mask_expr): Removed.
	(expand_vec_cond_expr): Likewise.
	* optabs.h (expand_vec_cond_expr): Likewise.
	(vector_compare_rtx): Likewise.
	* passes.def: Add pass_gimple_isel.
	* tree-cfg.c (verify_gimple_assign_ternary): Add new
	GIMPLE check.
	* tree-pass.h (make_pass_gimple_isel): New.
	* tree-ssa-forwprop.c (pass_forwprop::execute): Do not forward
	to already lowered VEC_COND_EXPR.
	* tree-vect-generic.c (expand_vector_divmod): Expand to SSA_NAME.
	(expand_vector_condition): Expand tcc_comparison of a VEC_COND_EXPR
	into a SSA_NAME.
	(gimple_expand_vec_cond_expr): New.
	(gimple_expand_vec_cond_exprs): New.
	(class pass_gimple_isel): New.
	(make_pass_gimple_isel): New.
---
 gcc/expr.c              |  25 +----
 gcc/internal-fn.c       |  98 +++++++++++++++++
 gcc/internal-fn.def     |   5 +
 gcc/optabs.c            | 124 +--------------------
 gcc/optabs.h            |   7 +-
 gcc/passes.def          |   1 +
 gcc/tree-cfg.c          |   8 ++
 gcc/tree-pass.h         |   1 +
 gcc/tree-ssa-forwprop.c |   6 +
 gcc/tree-vect-generic.c | 237 +++++++++++++++++++++++++++++++++++++++-
 10 files changed, 358 insertions(+), 154 deletions(-)

diff --git a/gcc/expr.c b/gcc/expr.c
index dfbeae71518..a757394f436 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9205,17 +9205,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
       if (temp != 0)
 	return temp;
 
-      /* For vector MIN <x, y>, expand it a VEC_COND_EXPR <x <= y, x, y>
-	 and similarly for MAX <x, y>.  */
       if (VECTOR_TYPE_P (type))
-	{
-	  tree t0 = make_tree (type, op0);
-	  tree t1 = make_tree (type, op1);
-	  tree comparison = build2 (code == MIN_EXPR ? LE_EXPR : GE_EXPR,
-				    type, t0, t1);
-	  return expand_vec_cond_expr (type, comparison, t0, t1,
-				       original_target);
-	}
+	gcc_unreachable ();
 
       /* At this point, a MEM target is no longer useful; we will get better
 	 code without it.  */
@@ -9804,10 +9795,6 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 	return temp;
       }
 
-    case VEC_COND_EXPR:
-      target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
-      return target;
-
     case VEC_DUPLICATE_EXPR:
       op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier);
       target = expand_vector_broadcast (mode, op0);
@@ -12138,8 +12125,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
   STRIP_NOPS (arg1);
 
   /* For vector typed comparisons emit code to generate the desired
-     all-ones or all-zeros mask.  Conveniently use the VEC_COND_EXPR
-     expander for this.  */
+     all-ones or all-zeros mask.  */
   if (TREE_CODE (ops->type) == VECTOR_TYPE)
     {
       tree ifexp = build2 (ops->code, ops->type, arg0, arg1);
@@ -12147,12 +12133,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
 	  && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type, ops->code))
 	return expand_vec_cmp_expr (ops->type, ifexp, target);
       else
-	{
-	  tree if_true = constant_boolean_node (true, ops->type);
-	  tree if_false = constant_boolean_node (false, ops->type);
-	  return expand_vec_cond_expr (ops->type, ifexp, if_true,
-				       if_false, target);
-	}
+	gcc_unreachable ();
     }
 
   /* Optimize (x % C1) == C2 or (x % C1) != C2 if it is beneficial
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 5e9aa60721e..aa41b4f6870 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-ssa.h"
 #include "tree-phinodes.h"
 #include "ssa-iterators.h"
+#include "explow.h"
 
 /* The names of each internal function, indexed by function number.  */
 const char *const internal_fn_name_array[] = {
@@ -2548,6 +2549,103 @@ expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 
 #define expand_mask_store_lanes_optab_fn expand_mask_store_optab_fn
 
+/* Expand VCOND, VCONDU and VCONDEQ internal functions.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_optab_fn (internal_fn ifn, gcall *stmt)
+{
+  class expand_operand ops[6];
+  insn_code icode;
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0a = gimple_call_arg (stmt, 0);
+  tree op0b = gimple_call_arg (stmt, 1);
+  tree op1 = gimple_call_arg (stmt, 2);
+  tree op2 = gimple_call_arg (stmt, 3);
+  enum tree_code tcode = (tree_code) int_cst_value (gimple_call_arg (stmt, 4));
+
+  tree vec_cond_type = TREE_TYPE (lhs);
+  tree op_mode = TREE_TYPE (op0a);
+  bool unsignedp = TYPE_UNSIGNED (op_mode);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode cmp_op_mode = TYPE_MODE (op_mode);
+
+  enum optab_tag optab;
+  switch (ifn)
+    {
+    case IFN_VCOND:
+      optab = vcond_optab;
+      break;
+    case IFN_VCONDU:
+      optab = vcondu_optab;
+      break;
+    case IFN_VCONDEQ:
+      optab = vcondeq_optab;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  icode = convert_optab_handler (optab, mode, cmp_op_mode);
+  rtx comparison
+    = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp, icode, 4);
+  rtx rtx_op1 = expand_normal (op1);
+  rtx rtx_op2 = expand_normal (op2);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_fixed_operand (&ops[3], comparison);
+  create_fixed_operand (&ops[4], XEXP (comparison, 0));
+  create_fixed_operand (&ops[5], XEXP (comparison, 1));
+  expand_insn (icode, 6, ops);
+}
+
+#define expand_VCOND expand_vect_cond_optab_fn
+#define expand_VCONDU expand_vect_cond_optab_fn
+#define expand_VCONDEQ expand_vect_cond_optab_fn
+
+/* Expand VCOND_MASK internal function.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt)
+{
+  class expand_operand ops[4];
+
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0 = gimple_call_arg (stmt, 0);
+  tree op1 = gimple_call_arg (stmt, 1);
+  tree op2 = gimple_call_arg (stmt, 2);
+  tree vec_cond_type = TREE_TYPE (lhs);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
+
+  enum insn_code icode = convert_optab_handler (vcond_mask_optab, mode, mask_mode);
+  rtx mask, rtx_op1, rtx_op2;
+
+  gcc_assert (icode != CODE_FOR_nothing);
+
+  mask = expand_normal (op0);
+  rtx_op1 = expand_normal (op1);
+  rtx_op2 = expand_normal (op2);
+
+  mask = force_reg (mask_mode, mask);
+  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_input_operand (&ops[3], mask, mask_mode);
+  expand_insn (icode, 4, ops);
+}
+
+#define expand_VCOND_MASK expand_vect_cond_mask_optab_fn
+
 static void
 expand_ABNORMAL_DISPATCHER (internal_fn, gcall *)
 {
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 1d190d492ff..5602619fd2a 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -319,6 +319,11 @@ DEF_INTERNAL_FN (TSAN_FUNC_EXIT, ECF_NOVOPS | ECF_LEAF | ECF_NOTHROW, NULL)
 DEF_INTERNAL_FN (VA_ARG, ECF_NOTHROW | ECF_LEAF, NULL)
 DEF_INTERNAL_FN (VEC_CONVERT, ECF_CONST | ECF_LEAF | ECF_NOTHROW, NULL)
 
+DEF_INTERNAL_FN(VCOND, ECF_NOTHROW | ECF_LEAF, NULL)
+DEF_INTERNAL_FN(VCONDU, ECF_NOTHROW | ECF_LEAF, NULL)
+DEF_INTERNAL_FN(VCONDEQ, ECF_NOTHROW | ECF_LEAF, NULL)
+DEF_INTERNAL_FN(VCOND_MASK, ECF_NOTHROW | ECF_LEAF, NULL)
+
 /* An unduplicable, uncombinable function.  Generally used to preserve
    a CFG property in the face of jump threading, tail merging or
    other such optimizations.  The first argument distinguishes
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 7a4ec1ec01c..6621a1462b9 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -5442,7 +5442,7 @@ get_rtx_code (enum tree_code tcode, bool unsignedp)
    first comparison operand for insn ICODE.  Do not generate the
    compare instruction itself.  */
 
-static rtx
+rtx
 vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
 		    tree t_op0, tree t_op1, bool unsignedp,
 		    enum insn_code icode, unsigned int opno)
@@ -5809,128 +5809,6 @@ expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
   return tmp;
 }
 
-/* Generate insns for a VEC_COND_EXPR with mask, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_mask_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-			   rtx target)
-{
-  class expand_operand ops[4];
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
-  enum insn_code icode = get_vcond_mask_icode (mode, mask_mode);
-  rtx mask, rtx_op1, rtx_op2;
-
-  if (icode == CODE_FOR_nothing)
-    return 0;
-
-  mask = expand_normal (op0);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  mask = force_reg (mask_mode, mask);
-  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_input_operand (&ops[3], mask, mask_mode);
-  expand_insn (icode, 4, ops);
-
-  return ops[0].value;
-}
-
-/* Generate insns for a VEC_COND_EXPR, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-		      rtx target)
-{
-  class expand_operand ops[6];
-  enum insn_code icode;
-  rtx comparison, rtx_op1, rtx_op2;
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode cmp_op_mode;
-  bool unsignedp;
-  tree op0a, op0b;
-  enum tree_code tcode;
-
-  if (COMPARISON_CLASS_P (op0))
-    {
-      op0a = TREE_OPERAND (op0, 0);
-      op0b = TREE_OPERAND (op0, 1);
-      tcode = TREE_CODE (op0);
-    }
-  else
-    {
-      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
-      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
-	  != CODE_FOR_nothing)
-	return expand_vec_cond_mask_expr (vec_cond_type, op0, op1,
-					  op2, target);
-      /* Fake op0 < 0.  */
-      else
-	{
-	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
-		      == MODE_VECTOR_INT);
-	  op0a = op0;
-	  op0b = build_zero_cst (TREE_TYPE (op0));
-	  tcode = LT_EXPR;
-	}
-    }
-  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
-  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
-
-
-  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
-	      && known_eq (GET_MODE_NUNITS (mode),
-			   GET_MODE_NUNITS (cmp_op_mode)));
-
-  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
-  if (icode == CODE_FOR_nothing)
-    {
-      if (tcode == LT_EXPR
-	  && op0a == op0
-	  && TREE_CODE (op0) == VECTOR_CST)
-	{
-	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
-	     into a constant when only get_vcond_eq_icode is supported.
-	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
-	  unsigned HOST_WIDE_INT nelts;
-	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
-	    {
-	      if (VECTOR_CST_STEPPED_P (op0))
-		return 0;
-	      nelts = vector_cst_encoded_nelts (op0);
-	    }
-	  for (unsigned int i = 0; i < nelts; ++i)
-	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
-	      return 0;
-	  tcode = NE_EXPR;
-	}
-      if (tcode == EQ_EXPR || tcode == NE_EXPR)
-	icode = get_vcond_eq_icode (mode, cmp_op_mode);
-      if (icode == CODE_FOR_nothing)
-	return 0;
-    }
-
-  comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp,
-				   icode, 4);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_fixed_operand (&ops[3], comparison);
-  create_fixed_operand (&ops[4], XEXP (comparison, 0));
-  create_fixed_operand (&ops[5], XEXP (comparison, 1));
-  expand_insn (icode, 6, ops);
-  return ops[0].value;
-}
-
 /* Generate VEC_SERIES_EXPR <OP0, OP1>, returning a value of mode VMODE.
    Use TARGET for the result if nonnull and convenient.  */
 
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 5bd19503a0a..7c2ec257cb0 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -321,9 +321,6 @@ extern rtx expand_vec_perm_const (machine_mode, rtx, rtx,
 /* Generate code for vector comparison.  */
 extern rtx expand_vec_cmp_expr (tree, tree, rtx);
 
-/* Generate code for VEC_COND_EXPR.  */
-extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
-
 /* Generate code for VEC_SERIES_EXPR.  */
 extern rtx expand_vec_series_expr (machine_mode, rtx, rtx, rtx);
 
@@ -364,5 +361,9 @@ extern void expand_jump_insn (enum insn_code icode, unsigned int nops,
 			      class expand_operand *ops);
 
 extern enum rtx_code get_rtx_code (enum tree_code tcode, bool unsignedp);
+extern rtx vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
+			       tree t_op0, tree t_op1, bool unsignedp,
+			       enum insn_code icode, unsigned int opno);
+
 
 #endif /* GCC_OPTABS_H */
diff --git a/gcc/passes.def b/gcc/passes.def
index 92cbe587a8a..e9f59d756c9 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -398,6 +398,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_cleanup_eh);
   NEXT_PASS (pass_lower_resx);
   NEXT_PASS (pass_nrv);
+  NEXT_PASS (pass_gimple_isel);
   NEXT_PASS (pass_cleanup_cfg_post_optimizing);
   NEXT_PASS (pass_warn_function_noreturn);
   NEXT_PASS (pass_gen_hsail);
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index d06a479e570..16ff06fbf88 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4199,6 +4199,14 @@ verify_gimple_assign_ternary (gassign *stmt)
 	  debug_generic_expr (rhs1_type);
 	  return true;
 	}
+      else if (cfun->curr_properties & PROP_gimple_lvec
+	       && TREE_CODE_CLASS (TREE_CODE (rhs1)) == tcc_comparison)
+	{
+	  error ("the first argument of %<VEC_COND_EXPR%> cannot be "
+		 "a %<GENERIC%> tree comparison expression");
+	  debug_generic_expr (rhs1);
+	  return true;
+	}
       /* Fallthrough.  */
     case COND_EXPR:
       if (!is_gimple_val (rhs1)
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 576b3f67434..4efece1b35b 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -626,6 +626,7 @@ extern gimple_opt_pass *make_pass_local_fn_summary (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lower_vaarg (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_gimple_isel (gcc::context *ctxt);
 
 /* Current optimization pass.  */
 extern opt_pass *current_pass;
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 759baf56897..fce392e204c 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -3125,6 +3125,12 @@ pass_forwprop::execute (function *fun)
 		    if (code == COND_EXPR
 			|| code == VEC_COND_EXPR)
 		      {
+			/* Do not propagate into VEC_COND_EXPRs after they are
+			   vector lowering pass.  */
+			if (code == VEC_COND_EXPR
+			    && (fun->curr_properties & PROP_gimple_lvec))
+			  break;
+
 			/* In this case the entire COND_EXPR is in rhs1. */
 			if (forward_propagate_into_cond (&gsi))
 			  {
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index a7fe83da0e3..8f6d63f01c5 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -694,12 +694,14 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 	  if (addend == NULL_TREE
 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
 	    {
-	      tree zero, cst, cond, mask_type;
-	      gimple *stmt;
+	      tree zero, cst, mask_type, mask;
+	      gimple *stmt, *cond;
 
 	      mask_type = truth_type_for (type);
 	      zero = build_zero_cst (type);
-	      cond = build2 (LT_EXPR, mask_type, op0, zero);
+	      mask = make_ssa_name (mask_type);
+	      cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
+	      gsi_insert_before (gsi, cond, GSI_SAME_STMT);
 	      tree_vector_builder vec (type, nunits, 1);
 	      for (i = 0; i < nunits; i++)
 		vec.quick_push (build_int_cst (TREE_TYPE (type),
@@ -707,8 +709,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 						<< shifts[i]) - 1));
 	      cst = vec.build ();
 	      addend = make_ssa_name (type);
-	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
-					  cst, zero);
+	      stmt
+		= gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
 	    }
 	}
@@ -964,7 +966,17 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
     }
 
   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
-    return;
+    {
+      if (a_is_comparison)
+	{
+	  a = gimplify_build2 (gsi, TREE_CODE (a), TREE_TYPE (a), a1, a2);
+	  gimple_assign_set_rhs1 (stmt, a);
+	  update_stmt (stmt);
+	  return;
+	}
+      gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
+      return;
+    }
 
   /* Handle vector boolean types with bitmasks.  If there is a comparison
      and we can expand the comparison into the vector boolean bitmask,
@@ -2241,6 +2253,176 @@ expand_vector_operations (void)
   return cfg_changed ? TODO_cleanup_cfg : 0;
 }
 
+/* Expand all VEC_COND_EXPR gimple assignments into calls to internal
+   function based on type of selected expansion.  */
+
+static gimple *
+gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
+			     hash_map<tree, unsigned int> *vec_cond_ssa_name_uses)
+{
+  tree lhs, op0a = NULL_TREE, op0b = NULL_TREE;
+  enum tree_code code;
+  enum tree_code tcode;
+  machine_mode cmp_op_mode;
+  bool unsignedp;
+  enum insn_code icode;
+  imm_use_iterator imm_iter;
+
+  /* Only consider code == GIMPLE_ASSIGN.  */
+  gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi));
+  if (!stmt)
+    return NULL;
+
+  code = gimple_assign_rhs_code (stmt);
+  if (code != VEC_COND_EXPR)
+    return NULL;
+
+  tree op0 = gimple_assign_rhs1 (stmt);
+  tree op1 = gimple_assign_rhs2 (stmt);
+  tree op2 = gimple_assign_rhs3 (stmt);
+  lhs = gimple_assign_lhs (stmt);
+  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+
+  gcc_assert (!COMPARISON_CLASS_P (op0));
+  if (TREE_CODE (op0) == SSA_NAME)
+    {
+      unsigned int used_vec_cond_exprs = 0;
+      unsigned int *slot = vec_cond_ssa_name_uses->get (op0);
+      if (slot)
+	used_vec_cond_exprs = *slot;
+      else
+	{
+	  gimple *use_stmt;
+	  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, op0)
+	    {
+	      gassign *assign = dyn_cast<gassign *> (use_stmt);
+	      if (assign != NULL
+		  && gimple_assign_rhs_code (assign) == VEC_COND_EXPR
+		  && gimple_assign_rhs1 (assign) == op0)
+		used_vec_cond_exprs++;
+	    }
+	  vec_cond_ssa_name_uses->put (op0, used_vec_cond_exprs);
+	}
+
+      gassign *def_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (op0));
+      if (def_stmt)
+	{
+	  tcode = gimple_assign_rhs_code (def_stmt);
+	  op0a = gimple_assign_rhs1 (def_stmt);
+	  op0b = gimple_assign_rhs2 (def_stmt);
+
+	  tree op0a_type = TREE_TYPE (op0a);
+	  if (used_vec_cond_exprs >= 2
+	      && (get_vcond_mask_icode (mode, TYPE_MODE (op0a_type))
+		  != CODE_FOR_nothing)
+	      && expand_vec_cmp_expr_p (op0a_type, TREE_TYPE (lhs), tcode))
+	    {
+	      /* Keep the SSA name and use vcond_mask.  */
+	      tcode = TREE_CODE (op0);
+	    }
+	}
+      else
+	tcode = TREE_CODE (op0);
+    }
+  else
+    tcode = TREE_CODE (op0);
+
+  if (TREE_CODE_CLASS (tcode) != tcc_comparison)
+    {
+      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
+      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
+	  != CODE_FOR_nothing)
+	return gimple_build_call_internal (IFN_VCOND_MASK, 3, op0, op1, op2);
+      /* Fake op0 < 0.  */
+      else
+	{
+	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
+		      == MODE_VECTOR_INT);
+	  op0a = op0;
+	  op0b = build_zero_cst (TREE_TYPE (op0));
+	  tcode = LT_EXPR;
+	}
+    }
+  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
+  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
+
+
+  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
+	      && known_eq (GET_MODE_NUNITS (mode),
+			   GET_MODE_NUNITS (cmp_op_mode)));
+
+  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
+  if (icode == CODE_FOR_nothing)
+    {
+      if (tcode == LT_EXPR
+	  && op0a == op0
+	  && TREE_CODE (op0) == VECTOR_CST)
+	{
+	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
+	     into a constant when only get_vcond_eq_icode is supported.
+	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
+	  unsigned HOST_WIDE_INT nelts;
+	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
+	    {
+	      if (VECTOR_CST_STEPPED_P (op0))
+		gcc_unreachable ();
+	      nelts = vector_cst_encoded_nelts (op0);
+	    }
+	  for (unsigned int i = 0; i < nelts; ++i)
+	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
+	      gcc_unreachable ();
+	  tcode = NE_EXPR;
+	}
+      if (tcode == EQ_EXPR || tcode == NE_EXPR)
+	{
+	  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+	  return gimple_build_call_internal (IFN_VCONDEQ, 5, op0a, op0b, op1,
+					     op2, tcode_tree);
+	}
+    }
+
+  gcc_assert (icode != CODE_FOR_nothing);
+  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+  return gimple_build_call_internal (unsignedp ? IFN_VCONDU : IFN_VCOND,
+				     5, op0a, op0b, op1, op2, tcode_tree);
+}
+
+/* Iterate all gimple statements and try to expand
+   VEC_COND_EXPR assignments.  */
+
+static unsigned int
+gimple_expand_vec_cond_exprs (void)
+{
+  gimple_stmt_iterator gsi;
+  basic_block bb;
+  bool cfg_changed = false;
+  hash_map<tree, unsigned int> vec_cond_ssa_name_uses;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	{
+	  gimple *g = gimple_expand_vec_cond_expr (&gsi,
+						   &vec_cond_ssa_name_uses);
+	  if (g != NULL)
+	    {
+	      tree lhs = gimple_assign_lhs (gsi_stmt (gsi));
+	      gimple_set_lhs (g, lhs);
+	      gsi_replace (&gsi, g, false);
+	    }
+	  /* ???  If we do not cleanup EH then we will ICE in
+	     verification.  But in reality we have created wrong-code
+	     as we did not properly transition EH info and edges to
+	     the piecewise computations.  */
+	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
+	      && gimple_purge_dead_eh_edges (bb))
+	    cfg_changed = true;
+	}
+    }
+
+  return cfg_changed ? TODO_cleanup_cfg : 0;
+}
+
 namespace {
 
 const pass_data pass_data_lower_vector =
@@ -2324,4 +2506,47 @@ make_pass_lower_vector_ssa (gcc::context *ctxt)
   return new pass_lower_vector_ssa (ctxt);
 }
 
+namespace {
+
+const pass_data pass_data_gimple_isel =
+{
+  GIMPLE_PASS, /* type */
+  "isel", /* name */
+  OPTGROUP_VEC, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_cfg, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_gimple_isel : public gimple_opt_pass
+{
+public:
+  pass_gimple_isel (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_gimple_isel, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return true;
+    }
+
+  virtual unsigned int execute (function *)
+    {
+      return gimple_expand_vec_cond_exprs ();
+    }
+
+}; // class pass_gimple_isel
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_gimple_isel (gcc::context *ctxt)
+{
+  return new pass_gimple_isel (ctxt);
+}
+
 #include "gt-tree-vect-generic.h"
-- 
2.26.2


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-28 14:46                       ` Martin Liška
@ 2020-05-28 15:28                         ` Richard Sandiford
  2020-05-29 12:17                           ` Richard Biener
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Sandiford @ 2020-05-28 15:28 UTC (permalink / raw)
  To: Martin Liška; +Cc: Richard Biener, Segher Boessenkool, GCC Patches

Martin Liška <mliska@suse.cz> writes:
> Hi.
>
> There's a new patch that adds normal internal functions for the 4
> VCOND* functions.
>
> The patch that survives bootstrap and regression
> tests on x86_64-linux-gnu and ppc64le-linux-gnu.

I think this has the same problem as the previous one.  What I meant
in yesterday's message is that:

  expand_insn (icode, 6, ops);

is simply not valid when icode is allowed to FAIL.  That's true in
any context, not just internal functions.  If icode does FAIL,
the expand_insn call will ICE:

  if (!maybe_expand_insn (icode, nops, ops))
    gcc_unreachable ();

When using optabs you either:

(a) declare that the md patterns aren't allowed to FAIL.  expand_insn
    is for this case.

(b) allow the md patterns to FAIL and provide a fallback when they do.
    maybe_expand_insn is for this case.

So if we keep IFN_VCOND, we need to use maybe_expand_insn and find some
way of implementing the IFN_VCOND when the pattern FAILs.

Thanks,
Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-28 15:28                         ` Richard Sandiford
@ 2020-05-29 12:17                           ` Richard Biener
  2020-05-29 12:43                             ` Richard Biener
  2020-05-29 15:39                             ` Segher Boessenkool
  0 siblings, 2 replies; 65+ messages in thread
From: Richard Biener @ 2020-05-29 12:17 UTC (permalink / raw)
  To: Martin Liška, Richard Biener, Segher Boessenkool,
	GCC Patches, Richard Sandiford

On Thu, May 28, 2020 at 5:28 PM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Martin Liška <mliska@suse.cz> writes:
> > Hi.
> >
> > There's a new patch that adds normal internal functions for the 4
> > VCOND* functions.
> >
> > The patch that survives bootstrap and regression
> > tests on x86_64-linux-gnu and ppc64le-linux-gnu.
>
> I think this has the same problem as the previous one.  What I meant
> in yesterday's message is that:
>
>   expand_insn (icode, 6, ops);
>
> is simply not valid when icode is allowed to FAIL.  That's true in
> any context, not just internal functions.  If icode does FAIL,
> the expand_insn call will ICE:
>
>   if (!maybe_expand_insn (icode, nops, ops))
>     gcc_unreachable ();
>
> When using optabs you either:
>
> (a) declare that the md patterns aren't allowed to FAIL.  expand_insn
>     is for this case.
>
> (b) allow the md patterns to FAIL and provide a fallback when they do.
>     maybe_expand_insn is for this case.
>
> So if we keep IFN_VCOND, we need to use maybe_expand_insn and find some
> way of implementing the IFN_VCOND when the pattern FAILs.

But we should not have generated the pattern in that case - we actually verify
we can expand at the time we do this "instruction selection".  This is in-line
with other vectorizations where we also do not expect things to FAIL.

See also the expanders that are removed in the patch.

But adding a comment in the internal function expander to reflect this
is probably good, also pointing to the verification routines (the
preexisting expand_vec_cond_expr_p and expand_vec_cmp_expr_p
routines).  Because of this pre-verification I suggested the direct
internal function first, not being aware of the static cannot-FAIL logic.

Now it looks like that those verification also simply checks optab
availability only but then this is just a preexisting issue (and we can
possibly build a testcase that FAILs RTL expansion for power...).

So given that this means the latent bug in the powerpc backend
should be fixed and we should use a direct internal function instead?

Thanks,
Richard.

> Thanks,
> Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-29 12:17                           ` Richard Biener
@ 2020-05-29 12:43                             ` Richard Biener
  2020-05-29 16:47                               ` Segher Boessenkool
  2020-05-29 15:39                             ` Segher Boessenkool
  1 sibling, 1 reply; 65+ messages in thread
From: Richard Biener @ 2020-05-29 12:43 UTC (permalink / raw)
  To: Martin Liška, Richard Biener, Segher Boessenkool,
	GCC Patches, Richard Sandiford

On Fri, May 29, 2020 at 2:17 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Thu, May 28, 2020 at 5:28 PM Richard Sandiford
> <richard.sandiford@arm.com> wrote:
> >
> > Martin Liška <mliska@suse.cz> writes:
> > > Hi.
> > >
> > > There's a new patch that adds normal internal functions for the 4
> > > VCOND* functions.
> > >
> > > The patch that survives bootstrap and regression
> > > tests on x86_64-linux-gnu and ppc64le-linux-gnu.
> >
> > I think this has the same problem as the previous one.  What I meant
> > in yesterday's message is that:
> >
> >   expand_insn (icode, 6, ops);
> >
> > is simply not valid when icode is allowed to FAIL.  That's true in
> > any context, not just internal functions.  If icode does FAIL,
> > the expand_insn call will ICE:
> >
> >   if (!maybe_expand_insn (icode, nops, ops))
> >     gcc_unreachable ();
> >
> > When using optabs you either:
> >
> > (a) declare that the md patterns aren't allowed to FAIL.  expand_insn
> >     is for this case.
> >
> > (b) allow the md patterns to FAIL and provide a fallback when they do.
> >     maybe_expand_insn is for this case.
> >
> > So if we keep IFN_VCOND, we need to use maybe_expand_insn and find some
> > way of implementing the IFN_VCOND when the pattern FAILs.
>
> But we should not have generated the pattern in that case - we actually verify
> we can expand at the time we do this "instruction selection".  This is in-line
> with other vectorizations where we also do not expect things to FAIL.
>
> See also the expanders that are removed in the patch.
>
> But adding a comment in the internal function expander to reflect this
> is probably good, also pointing to the verification routines (the
> preexisting expand_vec_cond_expr_p and expand_vec_cmp_expr_p
> routines).  Because of this pre-verification I suggested the direct
> internal function first, not being aware of the static cannot-FAIL logic.
>
> Now it looks like that those verification also simply checks optab
> availability only but then this is just a preexisting issue (and we can
> possibly build a testcase that FAILs RTL expansion for power...).
>
> So given that this means the latent bug in the powerpc backend
> should be fixed and we should use a direct internal function instead?

So I tried to understand the circumstances the rs6000 patterns FAIL
but FAILed ;)  It looks like some outs of rs6000_emit_vector_cond_expr
are unwarranted and the following should work:

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 8435bc15d72..5503215a00a 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -14638,8 +14638,7 @@ rs6000_emit_vector_compare (enum rtx_code rcode,
        rtx mask2;

        rev_code = reverse_condition_maybe_unordered (rcode);
-       if (rev_code == UNKNOWN)
-         return NULL_RTX;
+       gcc_assert (rev_code != UNKNOWN);

        nor_code = optab_handler (one_cmpl_optab, dmode);
        if (nor_code == CODE_FOR_nothing)
@@ -14737,8 +14736,7 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx
op_true, rtx op_false,
   rtx cond2;
   bool invert_move = false;

-  if (VECTOR_UNIT_NONE_P (dest_mode))
-    return 0;
+  gcc_assert (VECTOR_UNIT_NONE_P (dest_mode));

   gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
              && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
@@ -14756,8 +14754,7 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx
op_true, rtx op_false,
         e.g., A  = (B != C) ? D : E becomes A = (B == C) ? E : D.  */
       invert_move = true;
       rcode = reverse_condition_maybe_unordered (rcode);
-      if (rcode == UNKNOWN)
-       return 0;
+      gcc_assert (rcode != UNKNOWN);
       break;

     case GE:

which leaves the

  /* Get the vector mask for the given relational operations.  */
  mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);

  if (!mask)
    return 0;

fail but that function recurses heavily - from reading
rs6000_emit_vector_compare_inner
it looks like power can do a lot of compares but floating-point LT which
reverse_condition_maybe_unordered would turn into UNGE which is not
handled either.
But then rs6000_emit_vector_compare just tries GT for that anyway (not UNGE) so
it is actually be handled (but should not?).

So I bet the expansion of the patterns cannot fail at the moment.  Thus I'd
replace the FAIL with a gcc_unreachable () and see if we have test
coverage for those
FAILs.

Segher - do you actually know this code to guess why the patterns are defensive?

Thanks,
Richard.

> Thanks,
> Richard.
>
> > Thanks,
> > Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-29 12:17                           ` Richard Biener
  2020-05-29 12:43                             ` Richard Biener
@ 2020-05-29 15:39                             ` Segher Boessenkool
  2020-05-29 16:57                               ` Richard Sandiford
  1 sibling, 1 reply; 65+ messages in thread
From: Segher Boessenkool @ 2020-05-29 15:39 UTC (permalink / raw)
  To: Richard Biener; +Cc: Martin Liška, GCC Patches, Richard Sandiford

On Fri, May 29, 2020 at 02:17:00PM +0200, Richard Biener wrote:
> Now it looks like that those verification also simply checks optab
> availability only but then this is just a preexisting issue (and we can
> possibly build a testcase that FAILs RTL expansion for power...).
> 
> So given that this means the latent bug in the powerpc backend
> should be fixed and we should use a direct internal function instead?

I don't see what you consider a bug in the backend here?  The expansion
FAILs, and it is explicitly allowed to do that.

Not allowed to FAIL are:
-- The "lanes" things;
-- vec_duplicate, vec_series;
-- maskload, maskstore;
-- fmin, fmax;
-- madd and friends;
-- sqrt, rsqrt;
-- fmod, remainder;
-- scalb, ldexp;
-- sin, cos, tan, asin, acos, atan;
-- exp, expm1, exp10, exp2, log, log1p, log10, log2, logb;
-- significand, pow, atan2, floor, btrunc, round, ceil, nearbyint, rint;
-- copysign, xorsign;
-- ffs, clrsb, clz, ctz, popcount, parity.

All vcond* patterns are allowed to fail.

Maybe ours don't *need* to, but that doesn't change a thing.

In general, it is a Very Good Thing if patterns are allowed to fail: if
they are not allowed to fail, they have to duplicate all the code that
the generic expander should have, into ever target that needs it.  It
also makes writing a (new) backend easier.


Segher

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-29 12:43                             ` Richard Biener
@ 2020-05-29 16:47                               ` Segher Boessenkool
  2020-05-29 17:05                                 ` Richard Sandiford
  0 siblings, 1 reply; 65+ messages in thread
From: Segher Boessenkool @ 2020-05-29 16:47 UTC (permalink / raw)
  To: Richard Biener; +Cc: Martin Liška, GCC Patches, Richard Sandiford

On Fri, May 29, 2020 at 02:43:12PM +0200, Richard Biener wrote:
> So I tried to understand the circumstances the rs6000 patterns FAIL
> but FAILed ;)  It looks like some outs of rs6000_emit_vector_cond_expr
> are unwarranted and the following should work:
> 
> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
> index 8435bc15d72..5503215a00a 100644
> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -14638,8 +14638,7 @@ rs6000_emit_vector_compare (enum rtx_code rcode,

(Different function, btw)

>         rtx mask2;
> 
>         rev_code = reverse_condition_maybe_unordered (rcode);
> -       if (rev_code == UNKNOWN)
> -         return NULL_RTX;
> +       gcc_assert (rev_code != UNKNOWN);

reverse_condition_maybe_unordered is documented as possibly returning
UNKNOWN.  The current implementation doesn't, sure.  But fix that first?

rs6000_emit_vector_compare can fail for several other reasons, too --
including when rs6000_emit_vector_compare_inner fails.

> @@ -14737,8 +14736,7 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx
> op_true, rtx op_false,
>    rtx cond2;
>    bool invert_move = false;
> 
> -  if (VECTOR_UNIT_NONE_P (dest_mode))
> -    return 0;
> +  gcc_assert (VECTOR_UNIT_NONE_P (dest_mode));

Why can this condition never be true?  (Missing a ! btw)

It needs a big comment if you want to make wide assumptions like that,
in any case.  Pretty much *all* (non-trivial) asserts need an explanation.

(And perhaps VECTOR_UNIT_ALTIVEC_OR_VSX_P is better).

>   /* Get the vector mask for the given relational operations.  */
>   mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
> 
>   if (!mask)
>     return 0;
> 
> fail but that function recurses heavily - from reading
> rs6000_emit_vector_compare_inner
> it looks like power can do a lot of compares but floating-point LT which
> reverse_condition_maybe_unordered would turn into UNGE which is not
> handled either.
> But then rs6000_emit_vector_compare just tries GT for that anyway (not UNGE) so
> it is actually be handled (but should not?).
> 
> So I bet the expansion of the patterns cannot fail at the moment.  Thus I'd
> replace the FAIL with a gcc_unreachable () and see if we have test
> coverage for those
> FAILs.

I am not comfortable with that at all.

> Segher - do you actually know this code to guess why the patterns are defensive?

Yes.


If you want to change the documented semantics of widely used functions,
please propose that?


Segher

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-29 15:39                             ` Segher Boessenkool
@ 2020-05-29 16:57                               ` Richard Sandiford
  2020-05-29 17:09                                 ` Segher Boessenkool
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Sandiford @ 2020-05-29 16:57 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: Richard Biener, Martin Liška, GCC Patches

Segher Boessenkool <segher@kernel.crashing.org> writes:
> On Fri, May 29, 2020 at 02:17:00PM +0200, Richard Biener wrote:
>> Now it looks like that those verification also simply checks optab
>> availability only but then this is just a preexisting issue (and we can
>> possibly build a testcase that FAILs RTL expansion for power...).
>> 
>> So given that this means the latent bug in the powerpc backend
>> should be fixed and we should use a direct internal function instead?
>
> I don't see what you consider a bug in the backend here?  The expansion
> FAILs, and it is explicitly allowed to do that.

Well, the docs say:

  …  For **certain** named patterns, it may invoke @code{FAIL} to tell the
  compiler to use an alternate way of performing that task.  …

(my emphasis).  Later on they say:

  @findex FAIL
  @item FAIL
  …

  Failure is currently supported only for binary (addition, multiplication,
  shifting, etc.) and bit-field (@code{extv}, @code{extzv}, and @code{insv})
  operations.

which explicitly says that vcond* isn't allowed to fail.

OK, so that list looks out of date.  But still. :-)

We now explicitly say that some patterns aren't allowed to FAIL,
which I guess gives the (implicit) impression that all the others can.
But that wasn't the intention.  The lines were just added for emphasis.
(AFAIK 7f9844caf1ebd513 was the first patch to do this.)

Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-29 16:47                               ` Segher Boessenkool
@ 2020-05-29 17:05                                 ` Richard Sandiford
  2020-05-29 17:30                                   ` Segher Boessenkool
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Sandiford @ 2020-05-29 17:05 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: Richard Biener, Martin Liška, GCC Patches

Segher Boessenkool <segher@kernel.crashing.org> writes:
> On Fri, May 29, 2020 at 02:43:12PM +0200, Richard Biener wrote:
>> So I tried to understand the circumstances the rs6000 patterns FAIL
>> but FAILed ;)  It looks like some outs of rs6000_emit_vector_cond_expr
>> are unwarranted and the following should work:
>> 
>> diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
>> index 8435bc15d72..5503215a00a 100644
>> --- a/gcc/config/rs6000/rs6000.c
>> +++ b/gcc/config/rs6000/rs6000.c
>> @@ -14638,8 +14638,7 @@ rs6000_emit_vector_compare (enum rtx_code rcode,
>
> (Different function, btw)
>
>>         rtx mask2;
>> 
>>         rev_code = reverse_condition_maybe_unordered (rcode);
>> -       if (rev_code == UNKNOWN)
>> -         return NULL_RTX;
>> +       gcc_assert (rev_code != UNKNOWN);
>
> reverse_condition_maybe_unordered is documented as possibly returning
> UNKNOWN.  The current implementation doesn't, sure.  But fix that first?
>
> rs6000_emit_vector_compare can fail for several other reasons, too --
> including when rs6000_emit_vector_compare_inner fails.
>
>> @@ -14737,8 +14736,7 @@ rs6000_emit_vector_cond_expr (rtx dest, rtx
>> op_true, rtx op_false,
>>    rtx cond2;
>>    bool invert_move = false;
>> 
>> -  if (VECTOR_UNIT_NONE_P (dest_mode))
>> -    return 0;
>> +  gcc_assert (VECTOR_UNIT_NONE_P (dest_mode));
>
> Why can this condition never be true?  (Missing a ! btw)
>
> It needs a big comment if you want to make wide assumptions like that,
> in any case.  Pretty much *all* (non-trivial) asserts need an explanation.
>
> (And perhaps VECTOR_UNIT_ALTIVEC_OR_VSX_P is better).
>
>>   /* Get the vector mask for the given relational operations.  */
>>   mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
>> 
>>   if (!mask)
>>     return 0;
>> 
>> fail but that function recurses heavily - from reading
>> rs6000_emit_vector_compare_inner
>> it looks like power can do a lot of compares but floating-point LT which
>> reverse_condition_maybe_unordered would turn into UNGE which is not
>> handled either.
>> But then rs6000_emit_vector_compare just tries GT for that anyway (not UNGE) so
>> it is actually be handled (but should not?).
>> 
>> So I bet the expansion of the patterns cannot fail at the moment.  Thus I'd
>> replace the FAIL with a gcc_unreachable () and see if we have test
>> coverage for those
>> FAILs.
>
> I am not comfortable with that at all.
>
>> Segher - do you actually know this code to guess why the patterns are defensive?
>
> Yes.

In that case, can you give a specific example in which the patterns do
actually fail?

I think Richard's point is that even the current compiler will ICE if
the vcond* patterns fail.  All Martin's patch did was expose that via
the extra static checking we get for directly-mapped internal fns.
If you want us to fix that by providing a fallback, we need to know what
the fallback should do.  E.g. the obvious thing would be to emit the
embedded comparison separately and then emit bitwise operations to
implement the select.  But in the powerpc case, it's actually the
comparison that's the potential problem, so that expansion would just
kick the can further down the road.

So which vector comparisons doesn't powerpc support, and what should the
fallback vcond* expansion for them be?

Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-29 16:57                               ` Richard Sandiford
@ 2020-05-29 17:09                                 ` Segher Boessenkool
  2020-05-29 17:26                                   ` Richard Sandiford
  0 siblings, 1 reply; 65+ messages in thread
From: Segher Boessenkool @ 2020-05-29 17:09 UTC (permalink / raw)
  To: Richard Biener, Martin Liška, GCC Patches, richard.sandiford

On Fri, May 29, 2020 at 05:57:13PM +0100, Richard Sandiford wrote:
> Segher Boessenkool <segher@kernel.crashing.org> writes:
> > On Fri, May 29, 2020 at 02:17:00PM +0200, Richard Biener wrote:
> >> Now it looks like that those verification also simply checks optab
> >> availability only but then this is just a preexisting issue (and we can
> >> possibly build a testcase that FAILs RTL expansion for power...).
> >> 
> >> So given that this means the latent bug in the powerpc backend
> >> should be fixed and we should use a direct internal function instead?
> >
> > I don't see what you consider a bug in the backend here?  The expansion
> > FAILs, and it is explicitly allowed to do that.
> 
> Well, the docs say:
> 
>   …  For **certain** named patterns, it may invoke @code{FAIL} to tell the
>   compiler to use an alternate way of performing that task.  …
> 
> (my emphasis).  Later on they say:
> 
>   @findex FAIL
>   @item FAIL
>   …
> 
>   Failure is currently supported only for binary (addition, multiplication,
>   shifting, etc.) and bit-field (@code{extv}, @code{extzv}, and @code{insv})
>   operations.
> 
> which explicitly says that vcond* isn't allowed to fail.
> 
> OK, so that list looks out of date.  But still. :-)
> 
> We now explicitly say that some patterns aren't allowed to FAIL,
> which I guess gives the (implicit) impression that all the others can.
> But that wasn't the intention.  The lines were just added for emphasis.
> (AFAIK 7f9844caf1ebd513 was the first patch to do this.)

Most patterns *do* FAIL on some target.  We cannot rewind time.


Segher

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-29 17:09                                 ` Segher Boessenkool
@ 2020-05-29 17:26                                   ` Richard Sandiford
  2020-05-29 17:37                                     ` Segher Boessenkool
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Sandiford @ 2020-05-29 17:26 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: Richard Biener, Martin Liška, GCC Patches

Segher Boessenkool <segher@kernel.crashing.org> writes:
> On Fri, May 29, 2020 at 05:57:13PM +0100, Richard Sandiford wrote:
>> Segher Boessenkool <segher@kernel.crashing.org> writes:
>> > On Fri, May 29, 2020 at 02:17:00PM +0200, Richard Biener wrote:
>> >> Now it looks like that those verification also simply checks optab
>> >> availability only but then this is just a preexisting issue (and we can
>> >> possibly build a testcase that FAILs RTL expansion for power...).
>> >> 
>> >> So given that this means the latent bug in the powerpc backend
>> >> should be fixed and we should use a direct internal function instead?
>> >
>> > I don't see what you consider a bug in the backend here?  The expansion
>> > FAILs, and it is explicitly allowed to do that.
>> 
>> Well, the docs say:
>> 
>>   …  For **certain** named patterns, it may invoke @code{FAIL} to tell the
>>   compiler to use an alternate way of performing that task.  …
>> 
>> (my emphasis).  Later on they say:
>> 
>>   @findex FAIL
>>   @item FAIL
>>   …
>> 
>>   Failure is currently supported only for binary (addition, multiplication,
>>   shifting, etc.) and bit-field (@code{extv}, @code{extzv}, and @code{insv})
>>   operations.
>> 
>> which explicitly says that vcond* isn't allowed to fail.
>> 
>> OK, so that list looks out of date.  But still. :-)
>> 
>> We now explicitly say that some patterns aren't allowed to FAIL,
>> which I guess gives the (implicit) impression that all the others can.
>> But that wasn't the intention.  The lines were just added for emphasis.
>> (AFAIK 7f9844caf1ebd513 was the first patch to do this.)
>
> Most patterns *do* FAIL on some target.  We cannot rewind time.

Sure.  But the point is that FAILing isn't “explicitly allowed” for vcond*.
In fact it's the opposite.

If we ignore the docs and look at what the status quo actually is --
which I agree seems safest for GCC :-) -- then patterns are allowed to
FAIL if target-independent code provides an expand-time fallback for
the FAILing case.  But that isn't true for vcond either.
expand_vec_cond_expr does:

  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
  if (icode == CODE_FOR_nothing)
    ...

  comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp,
				   icode, 4);
  rtx_op1 = expand_normal (op1);
  rtx_op2 = expand_normal (op2);

  create_output_operand (&ops[0], target, mode);
  create_input_operand (&ops[1], rtx_op1, mode);
  create_input_operand (&ops[2], rtx_op2, mode);
  create_fixed_operand (&ops[3], comparison);
  create_fixed_operand (&ops[4], XEXP (comparison, 0));
  create_fixed_operand (&ops[5], XEXP (comparison, 1));
  expand_insn (icode, 6, ops);
  return ops[0].value;

which ICEs if the expander FAILs.

So whether you go from the docs or from what's actually implemented,
vcond* isn't currently allowed to FAIL.  All Richard's gcc_unreachable
suggestion would do is change where the ICE happens.

Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-29 17:05                                 ` Richard Sandiford
@ 2020-05-29 17:30                                   ` Segher Boessenkool
  0 siblings, 0 replies; 65+ messages in thread
From: Segher Boessenkool @ 2020-05-29 17:30 UTC (permalink / raw)
  To: Richard Biener, Martin Liška, GCC Patches, richard.sandiford

On Fri, May 29, 2020 at 06:05:14PM +0100, Richard Sandiford wrote:
> Segher Boessenkool <segher@kernel.crashing.org> writes:
> > On Fri, May 29, 2020 at 02:43:12PM +0200, Richard Biener wrote:
> >> Segher - do you actually know this code to guess why the patterns are defensive?
> >
> > Yes.
> 
> In that case, can you give a specific example in which the patterns do
> actually fail?

That is a very different question.  (And this is shifting the burden of
proof again.)

> I think Richard's point is that even the current compiler will ICE if
> the vcond* patterns fail.  All Martin's patch did was expose that via
> the extra static checking we get for directly-mapped internal fns.

How will they ICE?

> If you want us to fix that by providing a fallback, we need to know what
> the fallback should do.

Just whatever vcond* is documented to do, of course ;-)

> E.g. the obvious thing would be to emit the
> embedded comparison separately and then emit bitwise operations to
> implement the select.  But in the powerpc case, it's actually the
> comparison that's the potential problem, so that expansion would just
> kick the can further down the road.
> 
> So which vector comparisons doesn't powerpc support, and what should the
> fallback vcond* expansion for them be?

It depends on which set of vector registers is in use, and on the ISA
version as well, what the hardware can do.  What the backend can do --
well, it is allowed to FAIL these patterns, and it sometimes does.
That's the whole point isn't it?

vec_cmp* won't FAIL.  I don't know if there is a portable variant of
this?


Segher

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-29 17:26                                   ` Richard Sandiford
@ 2020-05-29 17:37                                     ` Segher Boessenkool
  2020-05-30  7:15                                       ` Richard Sandiford
  0 siblings, 1 reply; 65+ messages in thread
From: Segher Boessenkool @ 2020-05-29 17:37 UTC (permalink / raw)
  To: Richard Biener, Martin Liška, GCC Patches, richard.sandiford

On Fri, May 29, 2020 at 06:26:55PM +0100, Richard Sandiford wrote:
> Segher Boessenkool <segher@kernel.crashing.org> writes:
> > Most patterns *do* FAIL on some target.  We cannot rewind time.
> 
> Sure.  But the point is that FAILing isn't “explicitly allowed” for vcond*.
> In fact it's the opposite.

It has FAILed on rs6000 since 2004.

> If we ignore the docs and look at what the status quo actually is --
> which I agree seems safest for GCC :-) -- then patterns are allowed to
> FAIL if target-independent code provides an expand-time fallback for
> the FAILing case.  But that isn't true for vcond either.

That is a bug in the callers then :-)

> expand_vec_cond_expr does:
> 
>   icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
>   if (icode == CODE_FOR_nothing)
>     ...
> 
>   comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp,
> 				   icode, 4);
>   rtx_op1 = expand_normal (op1);
>   rtx_op2 = expand_normal (op2);
> 
>   create_output_operand (&ops[0], target, mode);
>   create_input_operand (&ops[1], rtx_op1, mode);
>   create_input_operand (&ops[2], rtx_op2, mode);
>   create_fixed_operand (&ops[3], comparison);
>   create_fixed_operand (&ops[4], XEXP (comparison, 0));
>   create_fixed_operand (&ops[5], XEXP (comparison, 1));
>   expand_insn (icode, 6, ops);
>   return ops[0].value;
> 
> which ICEs if the expander FAILs.
> 
> So whether you go from the docs or from what's actually implemented,
> vcond* isn't currently allowed to FAIL.  All Richard's gcc_unreachable
> suggestion would do is change where the ICE happens.


>   icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
>   if (icode == CODE_FOR_nothing)
>     ...

Of course it is allowed to FAIL, based on this code.  That is: the RTL
pattern is allowed to FAIL.  Whatever optabs do, I never understood :-)

Is this vec_cmp that is used by the fallback?  That will never FAIL
for us (if it is enabled at all, natch, same as for any other target).


Segher

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-29 17:37                                     ` Segher Boessenkool
@ 2020-05-30  7:15                                       ` Richard Sandiford
  2020-05-30 13:08                                         ` Segher Boessenkool
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Sandiford @ 2020-05-30  7:15 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: Richard Biener, Martin Liška, GCC Patches

Segher Boessenkool <segher@kernel.crashing.org> writes:
> On Fri, May 29, 2020 at 06:26:55PM +0100, Richard Sandiford wrote:
>> Segher Boessenkool <segher@kernel.crashing.org> writes:
>> > Most patterns *do* FAIL on some target.  We cannot rewind time.
>> 
>> Sure.  But the point is that FAILing isn't “explicitly allowed” for vcond*.
>> In fact it's the opposite.
>
> It has FAILed on rs6000 since 2004.

But that just means that the powerpc bug has been there since 2004,
assuming these FAILs can actually trigger in practice.  At that time,
the corresponding expand code was:

/* Generate insns for VEC_COND_EXPR.  */

rtx
expand_vec_cond_expr (tree vec_cond_expr, rtx target)
{
  enum insn_code icode;
  rtx comparison, rtx_op1, rtx_op2, cc_op0, cc_op1;
  enum machine_mode mode = TYPE_MODE (TREE_TYPE (vec_cond_expr));
  bool unsignedp = TYPE_UNSIGNED (TREE_TYPE (vec_cond_expr));

  icode = get_vcond_icode (vec_cond_expr, mode);
  if (icode == CODE_FOR_nothing)
    return 0;

  if (!target)
    target = gen_reg_rtx (mode);

  /* Get comparison rtx.  First expand both cond expr operands.  */
  comparison = vector_compare_rtx (TREE_OPERAND (vec_cond_expr, 0), 
                                   unsignedp, icode);
  cc_op0 = XEXP (comparison, 0);
  cc_op1 = XEXP (comparison, 1);
  /* Expand both operands and force them in reg, if required.  */
  rtx_op1 = expand_expr (TREE_OPERAND (vec_cond_expr, 1),
                         NULL_RTX, VOIDmode, 1);
  if (!(*insn_data[icode].operand[1].predicate) (rtx_op1, mode)
      && mode != VOIDmode)
    rtx_op1 = force_reg (mode, rtx_op1);

  rtx_op2 = expand_expr (TREE_OPERAND (vec_cond_expr, 2),
                         NULL_RTX, VOIDmode, 1);
  if (!(*insn_data[icode].operand[2].predicate) (rtx_op2, mode)
      && mode != VOIDmode)
    rtx_op2 = force_reg (mode, rtx_op2);

  /* Emit instruction! */
  emit_insn (GEN_FCN (icode) (target, rtx_op1, rtx_op2, 
                              comparison, cc_op0,  cc_op1));

  return target;
}

i.e. no fallbacks, and no checking whether the expansion even
succeeded.  Since FAIL just causes the generator to return null,
and since emit_insn is a no-op for null insns, the effect for
FAILs was to emit no instructions and return an uninitialised
target register.

The silent use of an uninitialised register was changed in 2011
to an ICE, via the introduction of expand_insn.

>> If we ignore the docs and look at what the status quo actually is --
>> which I agree seems safest for GCC :-) -- then patterns are allowed to
>> FAIL if target-independent code provides an expand-time fallback for
>> the FAILing case.  But that isn't true for vcond either.
>
> That is a bug in the callers then :-)

It was a bug in the powerpc patch you cited that added the FAILs
without changing target-independent code to cope with them.

The fact that we've had no code to handle the FAILs for 15+ years
without apparent problems makes it even more likely that the FAILs
never happen in practice.

If you think the FAILs do trigger in practice, please provide an example.

Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-30  7:15                                       ` Richard Sandiford
@ 2020-05-30 13:08                                         ` Segher Boessenkool
  2020-06-02 11:09                                           ` Richard Biener
  0 siblings, 1 reply; 65+ messages in thread
From: Segher Boessenkool @ 2020-05-30 13:08 UTC (permalink / raw)
  To: Richard Biener, Martin Liška, GCC Patches, richard.sandiford

Hi!

On Sat, May 30, 2020 at 08:15:55AM +0100, Richard Sandiford wrote:
> Segher Boessenkool <segher@kernel.crashing.org> writes:
> >> Sure.  But the point is that FAILing isn't “explicitly allowed” for vcond*.
> >> In fact it's the opposite.

I disagree btw, and no one else has noticed for 16 years either.

In general, almost all patterns can FAIL, and those that can not are
simply because no one wrote fallback code.  Which means that all
targets that need a fallback need to implement the same thing for
themselves, which is just a waste and causes extra errors.

So, "cannot FAIL" should be a temporary thing, and should change to
"can FAIL" as soon as someone implements that, and never be changed
back -- and it should be how almost everything is in the first place
(and it still is, thankfully).

> > It has FAILed on rs6000 since 2004.
> 
> But that just means that the powerpc bug has been there since 2004,
> assuming these FAILs can actually trigger in practice.  At that time,
> the corresponding expand code was:

I, and I think most other people, thought it was allowed to FAIL (and
I still do).

> rtx
> expand_vec_cond_expr (tree vec_cond_expr, rtx target)

[ snip ]

So this was buggy.

> i.e. no fallbacks, and no checking whether the expansion even
> succeeded.  Since FAIL just causes the generator to return null,
> and since emit_insn is a no-op for null insns, the effect for
> FAILs was to emit no instructions and return an uninitialised
> target register.
> 
> The silent use of an uninitialised register was changed in 2011
> to an ICE, via the introduction of expand_insn.

Yeah, I ran into some of that in 2015, at least then not all of that
was fixed.  That was some very basic insn I think, that really should
never fail, a simple branch or something...  Was surprising though, a
good reminder to always check return values :-)

> The fact that we've had no code to handle the FAILs for 15+ years
> without apparent problems makes it even more likely that the FAILs
> never happen in practice.

AltiVec can do a lot less than VSX (and VSX on p7 can do less than on
p8, and that can do less than p9, etc.), so I am pretty certain it
could fail for some cases.  Only up to not so very long ago these
patterns were mainly (or only?) used via builtins, and the code for
those handles all those cases already.

> If you think the FAILs do trigger in practice, please provide an example.

As I said before, that is completely beside the point.

vcond is allowed to FAIL.  No pattern that can FAIL should ever be
changed to not allow that anymore.  This would make no sense at all.


Segher

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-05-30 13:08                                         ` Segher Boessenkool
@ 2020-06-02 11:09                                           ` Richard Biener
  2020-06-02 15:00                                             ` Martin Liška
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Biener @ 2020-06-02 11:09 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: Martin Liška, GCC Patches, Richard Sandiford, David Edelsohn

On Sat, May 30, 2020 at 3:08 PM Segher Boessenkool
<segher@kernel.crashing.org> wrote:
>
> Hi!
>
> On Sat, May 30, 2020 at 08:15:55AM +0100, Richard Sandiford wrote:
> > Segher Boessenkool <segher@kernel.crashing.org> writes:
> > >> Sure.  But the point is that FAILing isn't “explicitly allowed” for vcond*.
> > >> In fact it's the opposite.
>
> I disagree btw, and no one else has noticed for 16 years either.
>
> In general, almost all patterns can FAIL, and those that can not are
> simply because no one wrote fallback code.  Which means that all
> targets that need a fallback need to implement the same thing for
> themselves, which is just a waste and causes extra errors.
>
> So, "cannot FAIL" should be a temporary thing, and should change to
> "can FAIL" as soon as someone implements that, and never be changed
> back -- and it should be how almost everything is in the first place
> (and it still is, thankfully).
>
> > > It has FAILed on rs6000 since 2004.
> >
> > But that just means that the powerpc bug has been there since 2004,
> > assuming these FAILs can actually trigger in practice.  At that time,
> > the corresponding expand code was:
>
> I, and I think most other people, thought it was allowed to FAIL (and
> I still do).
>
> > rtx
> > expand_vec_cond_expr (tree vec_cond_expr, rtx target)
>
> [ snip ]
>
> So this was buggy.
>
> > i.e. no fallbacks, and no checking whether the expansion even
> > succeeded.  Since FAIL just causes the generator to return null,
> > and since emit_insn is a no-op for null insns, the effect for
> > FAILs was to emit no instructions and return an uninitialised
> > target register.
> >
> > The silent use of an uninitialised register was changed in 2011
> > to an ICE, via the introduction of expand_insn.
>
> Yeah, I ran into some of that in 2015, at least then not all of that
> was fixed.  That was some very basic insn I think, that really should
> never fail, a simple branch or something...  Was surprising though, a
> good reminder to always check return values :-)
>
> > The fact that we've had no code to handle the FAILs for 15+ years
> > without apparent problems makes it even more likely that the FAILs
> > never happen in practice.
>
> AltiVec can do a lot less than VSX (and VSX on p7 can do less than on
> p8, and that can do less than p9, etc.), so I am pretty certain it
> could fail for some cases.  Only up to not so very long ago these
> patterns were mainly (or only?) used via builtins, and the code for
> those handles all those cases already.
>
> > If you think the FAILs do trigger in practice, please provide an example.
>
> As I said before, that is completely beside the point.
>
> vcond is allowed to FAIL.  No pattern that can FAIL should ever be
> changed to not allow that anymore.  This would make no sense at all.

Fact is if the FAIL happens we ICE _currently_.

The patterns may not FAIL since otherwise the vectorizer has no way
to check whether the backend can code-generate and fail vectorization
if not.  FAIL is a _very_ unspecific fail and I guess the middle-end would
need to cope with a pattern (considered as may-FAIL) doing

  if (random() == 5)
    FAIL;

specifically not FAIL when invoked during vectorization but FAIL when
re-invoked during RTL expansion just because some internal state
at the point of RTL expansion cannot be simulated at vectorization time.

Now the real issue here is of course that if vcond expansion may
FAIL then of course, based on your reasoning, vec_cmp expansion
may so as well.  In that case there's _no_ way to code generate
either of the two.  Well, spill, do elementwise compare (are cmp*
allowed to FAIL?), store, load would do the trick - but that defeats
the attempt to cost during vectorization.

So please be constructive.  Like, provide a testcase that ICEs
with the FAILs replaced by gcc_unreachable ().  Martin, may I suggest
to do this replacement and bootstrap/test?  I think it would be nice
to have testsuite coverage for the FAILs, and maybe we have that
already.

To get out of the deadlock here I'll approve a patch variant that
uses a separate internal function (thus without the static non-FAIL
checking) that preserves current behavior - thus ICE if the pattern
FAILs.  This change is then not a regression.  (please re-post for
appropriate approval)

Unless we come to a consensus in this discussion which seems
to dance around the latent vectorizer <-> rs6000 interface issue.

CCing David as other rs6000 maintainer (the subject isn't
specifically pointing to rs6000 so not sure if you followed the
discussion).

Thanks,
Richard.

>
> Segher

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-02 11:09                                           ` Richard Biener
@ 2020-06-02 15:00                                             ` Martin Liška
  2020-06-03  7:38                                               ` Richard Biener
  2020-06-03 18:27                                               ` Segher Boessenkool
  0 siblings, 2 replies; 65+ messages in thread
From: Martin Liška @ 2020-06-02 15:00 UTC (permalink / raw)
  To: Richard Biener, Segher Boessenkool
  Cc: GCC Patches, Richard Sandiford, David Edelsohn

[-- Attachment #1: Type: text/plain, Size: 440 bytes --]

On 6/2/20 1:09 PM, Richard Biener wrote:
> So please be constructive.  Like, provide a testcase that ICEs
> with the FAILs replaced by gcc_unreachable ().  Martin, may I suggest
> to do this replacement and bootstrap/test?  I think it would be nice
> to have testsuite coverage for the FAILs, and maybe we have that
> already.

Hello.

There's the suggested patch that survives bootstrap on ppc64le-linux-gnu
and passes test-suite.

Martin

[-- Attachment #2: 0001-rs6000-replace-FAIL-with-gcc_unreachable.patch --]
[-- Type: text/x-patch, Size: 2413 bytes --]

From 22db04d058c9bbd140041e7aa2caf1613767095a Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Tue, 2 Jun 2020 15:29:37 +0200
Subject: [PATCH] rs6000: replace FAIL with gcc_unreachable.

gcc/ChangeLog:

	* config/rs6000/vector.md: Replace FAIL with gcc_unreachable
	in all vcond* patterns.
---
 gcc/config/rs6000/vector.md | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 662521e74fe..796345c80d3 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -354,7 +354,7 @@ (define_expand "vcond<mode><mode>"
 				    operands[3], operands[4], operands[5]))
     DONE;
   else
-    FAIL;
+    gcc_unreachable ();
 })
 
 (define_expand "vcond<mode><mode>"
@@ -371,7 +371,7 @@ (define_expand "vcond<mode><mode>"
 				    operands[3], operands[4], operands[5]))
     DONE;
   else
-    FAIL;
+    gcc_unreachable ();
 })
 
 (define_expand "vcondv4sfv4si"
@@ -389,7 +389,7 @@ (define_expand "vcondv4sfv4si"
 				    operands[3], operands[4], operands[5]))
     DONE;
   else
-    FAIL;
+    gcc_unreachable ();
 })
 
 (define_expand "vcondv4siv4sf"
@@ -407,7 +407,7 @@ (define_expand "vcondv4siv4sf"
 				    operands[3], operands[4], operands[5]))
     DONE;
   else
-    FAIL;
+    gcc_unreachable ();
 })
 
 (define_expand "vcondv2dfv2di"
@@ -425,7 +425,7 @@ (define_expand "vcondv2dfv2di"
 				    operands[3], operands[4], operands[5]))
     DONE;
   else
-    FAIL;
+    gcc_unreachable ();
 })
 
 (define_expand "vcondv2div2df"
@@ -443,7 +443,7 @@ (define_expand "vcondv2div2df"
 				    operands[3], operands[4], operands[5]))
     DONE;
   else
-    FAIL;
+    gcc_unreachable ();
 })
 
 (define_expand "vcondu<mode><mode>"
@@ -460,7 +460,7 @@ (define_expand "vcondu<mode><mode>"
 				    operands[3], operands[4], operands[5]))
     DONE;
   else
-    FAIL;
+    gcc_unreachable ();
 })
 
 (define_expand "vconduv4sfv4si"
@@ -478,7 +478,7 @@ (define_expand "vconduv4sfv4si"
 				    operands[3], operands[4], operands[5]))
     DONE;
   else
-    FAIL;
+    gcc_unreachable ();
 })
 
 (define_expand "vconduv2dfv2di"
@@ -496,7 +496,7 @@ (define_expand "vconduv2dfv2di"
 				    operands[3], operands[4], operands[5]))
     DONE;
   else
-    FAIL;
+    gcc_unreachable ();
 })
 
 ;; To support vector condition vectorization, define vcond_mask and vec_cmp.
-- 
2.26.2


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-02 15:00                                             ` Martin Liška
@ 2020-06-03  7:38                                               ` Richard Biener
  2020-06-03 13:41                                                 ` Richard Sandiford
  2020-06-03 18:27                                               ` Segher Boessenkool
  1 sibling, 1 reply; 65+ messages in thread
From: Richard Biener @ 2020-06-03  7:38 UTC (permalink / raw)
  To: Martin Liška
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

On Tue, Jun 2, 2020 at 5:00 PM Martin Liška <mliska@suse.cz> wrote:
>
> On 6/2/20 1:09 PM, Richard Biener wrote:
> > So please be constructive.  Like, provide a testcase that ICEs
> > with the FAILs replaced by gcc_unreachable ().  Martin, may I suggest
> > to do this replacement and bootstrap/test?  I think it would be nice
> > to have testsuite coverage for the FAILs, and maybe we have that
> > already.
>
> Hello.
>
> There's the suggested patch that survives bootstrap on ppc64le-linux-gnu
> and passes test-suite.

OK, so can you please re-post the version of the VEC_COND_EXPR
patch that uses a regular IFN (without the static non-FAIL checking)
in a new thread?  If there's no OK from rs6000 maintainers to remove
the FAILs then we'll go ahead with that version, unless Richard objects
here.

Thanks,
Richard.

> Martin

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-03  7:38                                               ` Richard Biener
@ 2020-06-03 13:41                                                 ` Richard Sandiford
  2020-06-03 14:17                                                   ` David Edelsohn
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Sandiford @ 2020-06-03 13:41 UTC (permalink / raw)
  To: Richard Biener
  Cc: Martin Liška, Segher Boessenkool, GCC Patches, David Edelsohn

Richard Biener <richard.guenther@gmail.com> writes:
> On Tue, Jun 2, 2020 at 5:00 PM Martin Liška <mliska@suse.cz> wrote:
>>
>> On 6/2/20 1:09 PM, Richard Biener wrote:
>> > So please be constructive.  Like, provide a testcase that ICEs
>> > with the FAILs replaced by gcc_unreachable ().  Martin, may I suggest
>> > to do this replacement and bootstrap/test?  I think it would be nice
>> > to have testsuite coverage for the FAILs, and maybe we have that
>> > already.
>>
>> Hello.
>>
>> There's the suggested patch that survives bootstrap on ppc64le-linux-gnu
>> and passes test-suite.
>
> OK, so can you please re-post the version of the VEC_COND_EXPR
> patch that uses a regular IFN (without the static non-FAIL checking)
> in a new thread?  If there's no OK from rs6000 maintainers to remove
> the FAILs then we'll go ahead with that version, unless Richard objects
> here.

Well, it seems unfortunate to have to do that.

I think Martin's powerpc patch is the correct one.  But assuming that
the powerpc maintainers still object, I guess the options are:

- Find enough global reviewers who are prepared to approve that patch,
  to override the powerpc maintainers.

- Avoid conflict by going with the regular IFN patch.  To be clear,
  this will ICE in exactly the same cases that Martin's powerpc patch
  does (and current master does), so there's no real benefit to the
  powerpc port from doing this.  It just makes the code more complicated
  and means that other ports don't benefit from the static checking.

In the circumstances, I agree the second is probably the most practical
way forward.

I can't help but think this is a process failure though.  I don't think
using regular IFNs has any technical merits, and it doesn't give Segher
what he wants either (i.e. code that copes with failing vconds).

Thanks,
Richard

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-03 13:41                                                 ` Richard Sandiford
@ 2020-06-03 14:17                                                   ` David Edelsohn
  2020-06-03 14:46                                                     ` Richard Biener
  0 siblings, 1 reply; 65+ messages in thread
From: David Edelsohn @ 2020-06-03 14:17 UTC (permalink / raw)
  To: Richard Biener, Martin Liška, Segher Boessenkool,
	GCC Patches, David Edelsohn, Richard Sandiford

On Wed, Jun 3, 2020 at 9:41 AM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Richard Biener <richard.guenther@gmail.com> writes:
> > On Tue, Jun 2, 2020 at 5:00 PM Martin Liška <mliska@suse.cz> wrote:
> >>
> >> On 6/2/20 1:09 PM, Richard Biener wrote:
> >> > So please be constructive.  Like, provide a testcase that ICEs
> >> > with the FAILs replaced by gcc_unreachable ().  Martin, may I suggest
> >> > to do this replacement and bootstrap/test?  I think it would be nice
> >> > to have testsuite coverage for the FAILs, and maybe we have that
> >> > already.
> >>
> >> Hello.
> >>
> >> There's the suggested patch that survives bootstrap on ppc64le-linux-gnu
> >> and passes test-suite.
> >
> > OK, so can you please re-post the version of the VEC_COND_EXPR
> > patch that uses a regular IFN (without the static non-FAIL checking)
> > in a new thread?  If there's no OK from rs6000 maintainers to remove
> > the FAILs then we'll go ahead with that version, unless Richard objects
> > here.
>
> Well, it seems unfortunate to have to do that.
>
> I think Martin's powerpc patch is the correct one.  But assuming that
> the powerpc maintainers still object, I guess the options are:
>
> - Find enough global reviewers who are prepared to approve that patch,
>   to override the powerpc maintainers.

Luckily GCC Development does not operate this way.

How about (3) help to remove reliance on this incorrect behavior from
the PowerPC port?

I didn't formally check, but if this is 16 years old, then it's from
the original RHES Altivec work.

I don't believe that anyone fundamentally is objecting to "fixing this
correctly".  I don't know the entire history of this discussion, but
my objection is to a fix that breaks a long-time assumption of the
PowerPC port and leaves it as an exercise to the PowerPC maintainers
to fix it.

Thanks, David

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-03 14:17                                                   ` David Edelsohn
@ 2020-06-03 14:46                                                     ` Richard Biener
  2020-06-03 17:01                                                       ` Segher Boessenkool
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Biener @ 2020-06-03 14:46 UTC (permalink / raw)
  To: David Edelsohn
  Cc: Martin Liška, Segher Boessenkool, GCC Patches, Richard Sandiford

On Wed, Jun 3, 2020 at 4:17 PM David Edelsohn <dje.gcc@gmail.com> wrote:
>
> On Wed, Jun 3, 2020 at 9:41 AM Richard Sandiford
> <richard.sandiford@arm.com> wrote:
> >
> > Richard Biener <richard.guenther@gmail.com> writes:
> > > On Tue, Jun 2, 2020 at 5:00 PM Martin Liška <mliska@suse.cz> wrote:
> > >>
> > >> On 6/2/20 1:09 PM, Richard Biener wrote:
> > >> > So please be constructive.  Like, provide a testcase that ICEs
> > >> > with the FAILs replaced by gcc_unreachable ().  Martin, may I suggest
> > >> > to do this replacement and bootstrap/test?  I think it would be nice
> > >> > to have testsuite coverage for the FAILs, and maybe we have that
> > >> > already.
> > >>
> > >> Hello.
> > >>
> > >> There's the suggested patch that survives bootstrap on ppc64le-linux-gnu
> > >> and passes test-suite.
> > >
> > > OK, so can you please re-post the version of the VEC_COND_EXPR
> > > patch that uses a regular IFN (without the static non-FAIL checking)
> > > in a new thread?  If there's no OK from rs6000 maintainers to remove
> > > the FAILs then we'll go ahead with that version, unless Richard objects
> > > here.
> >
> > Well, it seems unfortunate to have to do that.
> >
> > I think Martin's powerpc patch is the correct one.  But assuming that
> > the powerpc maintainers still object, I guess the options are:
> >
> > - Find enough global reviewers who are prepared to approve that patch,
> >   to override the powerpc maintainers.
>
> Luckily GCC Development does not operate this way.
>
> How about (3) help to remove reliance on this incorrect behavior from
> the PowerPC port?
>
> I didn't formally check, but if this is 16 years old, then it's from
> the original RHES Altivec work.
>
> I don't believe that anyone fundamentally is objecting to "fixing this
> correctly".  I don't know the entire history of this discussion, but
> my objection is to a fix that breaks a long-time assumption of the
> PowerPC port and leaves it as an exercise to the PowerPC maintainers
> to fix it.

I _think_ there's nothing to fix besides removing the FAIL.  And I would
have no idea how to "fix" the powerpc port here since a) we lack a testcase
that actually FAILs, b) I'm not familiar with the ISA.  So we did (3) by
replacing the FAILs with gcc_unreachable () and bootstrap/regtest this
without any regression which I think "proves" the failure modes do not
actually exist.

So I'm not sure how we can help.

There's the option to remove the vcond_* patterns entirely which makes
us fall back to the vec_cmp_* ones which do never FAIL on powerpc.
But I suspect this might regress code generation.

It's suspicious that vec_cmp_* never FAIL when vcond_* do btw. - see
the case I pointed out where it appears to have inconsistencies/wrong-code
issues regarding ordered compare support.  But see above (I do not know
the powerpc ISA).

A vcond can usually be emulated by vec_cmp plus masking.  So if
we ever get a testcase that runs into the gcc_unreachable () I'll promise
to fix it up using this strategy in the vcond expander.  But without a
testcase and powerpc ISA knowledge it's really hard.  Or do you want
us to stick the vec_cmp expansion fallback in place of the FAILs?
I'm sure the powerpc maintainers are better suited to do that even though
I'll probably manage with some cut&paste.  To recap: vcond is
equal to

  mask = vec_cmp of the comparison
  true_masked = true_op & mask;
  false_masked = false_op & ~mask;
  result = true_masked | false_masked;

but I believe this would be dead code never triggered.

Richard.

>
> Thanks, David

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-03 14:46                                                     ` Richard Biener
@ 2020-06-03 17:01                                                       ` Segher Boessenkool
  2020-06-03 17:23                                                         ` Richard Biener
  0 siblings, 1 reply; 65+ messages in thread
From: Segher Boessenkool @ 2020-06-03 17:01 UTC (permalink / raw)
  To: Richard Biener
  Cc: David Edelsohn, Martin Liška, GCC Patches, Richard Sandiford

Hi!

On Wed, Jun 03, 2020 at 04:46:12PM +0200, Richard Biener wrote:
> On Wed, Jun 3, 2020 at 4:17 PM David Edelsohn <dje.gcc@gmail.com> wrote:
> > On Wed, Jun 3, 2020 at 9:41 AM Richard Sandiford
> > <richard.sandiford@arm.com> wrote:
> > > Well, it seems unfortunate to have to do that.
> > >
> > > I think Martin's powerpc patch is the correct one.

It is papering over the issues a little -- the same assumption is made
at lower levels as well, so all *that* needs to be changed as well (not
"fixed", it is not a bug, we have a change in the vcond* interface here;
oh and that should be documented as well).

> > How about (3) help to remove reliance on this incorrect behavior from
> > the PowerPC port?

It is not a reliance on incorrect behaviour.  This is a change.  Which
pretty much everyone seems to want, so fine, but that takes time.

> > I didn't formally check, but if this is 16 years old, then it's from
> > the original RHES Altivec work.

It is, exactly.

> > I don't believe that anyone fundamentally is objecting to "fixing this
> > correctly".  I don't know the entire history of this discussion, but
> > my objection is to a fix that breaks a long-time assumption of the
> > PowerPC port and leaves it as an exercise to the PowerPC maintainers
> > to fix it.

*Exactly*.  This is changing an ancient interface, claiming "it always
was that way" (which very obviously isn't true), and leaving the rs6000
people to deal with all the fallout.  Again.

> I _think_ there's nothing to fix besides removing the FAIL.

All the lower levels need to get asserts as well.  We need a week or so
to put the whole thing through the wringer.  The documentation needs to
be changed by whoever changes the vcond* semantics.  All other ports
should be checked, too.

> And I would
> have no idea how to "fix" the powerpc port here since a) we lack a testcase
> that actually FAILs, b) I'm not familiar with the ISA.  So we did (3) by
> replacing the FAILs with gcc_unreachable () and bootstrap/regtest this
> without any regression which I think "proves" the failure modes do not
> actually exist.

Heh, assuming the testsuite is comprehensive?  Heh.  (Bootstrap doesn't
mean much for vector code).

> So I'm not sure how we can help.

You'll have to document the "vcond* is not allowed to FAIL" change.
We'll deal with the rest.  But testing needs a week or so.  (That is
an extremely short timescale already).

> A vcond can usually be emulated by vec_cmp plus masking.

That would be the generic way to implement this of course, but apparently
such code doesn't yet exist?  If there is a generic implementation it
should be trivial to deal with FAILs.

> So if
> we ever get a testcase that runs into the gcc_unreachable () I'll promise
> to fix it up using this strategy in the vcond expander.  But without a
> testcase and powerpc ISA knowledge it's really hard.  Or do you want
> us to stick the vec_cmp expansion fallback in place of the FAILs?
> I'm sure the powerpc maintainers are better suited to do that even though
> I'll probably manage with some cut&paste.  To recap: vcond is
> equal to
> 
>   mask = vec_cmp of the comparison
>   true_masked = true_op & mask;
>   false_masked = false_op & ~mask;
>   result = true_masked | false_masked;
> 
> but I believe this would be dead code never triggered.

But that would be the generic code as well?  Is that not useful to have
in any case?


Segher

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-03 17:01                                                       ` Segher Boessenkool
@ 2020-06-03 17:23                                                         ` Richard Biener
  2020-06-03 18:23                                                           ` Segher Boessenkool
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Biener @ 2020-06-03 17:23 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: David Edelsohn, Martin Liška, GCC Patches, Richard Sandiford

On June 3, 2020 7:01:39 PM GMT+02:00, Segher Boessenkool <segher@kernel.crashing.org> wrote:
>Hi!
>
>On Wed, Jun 03, 2020 at 04:46:12PM +0200, Richard Biener wrote:
>> On Wed, Jun 3, 2020 at 4:17 PM David Edelsohn <dje.gcc@gmail.com>
>wrote:
>> > On Wed, Jun 3, 2020 at 9:41 AM Richard Sandiford
>> > <richard.sandiford@arm.com> wrote:
>> > > Well, it seems unfortunate to have to do that.
>> > >
>> > > I think Martin's powerpc patch is the correct one.
>
>It is papering over the issues a little -- the same assumption is made
>at lower levels as well, so all *that* needs to be changed as well (not
>"fixed", it is not a bug, we have a change in the vcond* interface
>here;
>oh and that should be documented as well).
>
>> > How about (3) help to remove reliance on this incorrect behavior
>from
>> > the PowerPC port?
>
>It is not a reliance on incorrect behaviour.  This is a change.  Which
>pretty much everyone seems to want, so fine, but that takes time.
>
>> > I didn't formally check, but if this is 16 years old, then it's
>from
>> > the original RHES Altivec work.
>
>It is, exactly.
>
>> > I don't believe that anyone fundamentally is objecting to "fixing
>this
>> > correctly".  I don't know the entire history of this discussion,
>but
>> > my objection is to a fix that breaks a long-time assumption of the
>> > PowerPC port and leaves it as an exercise to the PowerPC
>maintainers
>> > to fix it.
>
>*Exactly*.  This is changing an ancient interface, claiming "it always
>was that way" (which very obviously isn't true), and leaving the rs6000
>people to deal with all the fallout.  Again.
>
>> I _think_ there's nothing to fix besides removing the FAIL.
>
>All the lower levels need to get asserts as well.  We need a week or so
>to put the whole thing through the wringer.  The documentation needs to
>be changed by whoever changes the vcond* semantics.  All other ports
>should be checked, too.
>
>> And I would
>> have no idea how to "fix" the powerpc port here since a) we lack a
>testcase
>> that actually FAILs, b) I'm not familiar with the ISA.  So we did (3)
>by
>> replacing the FAILs with gcc_unreachable () and bootstrap/regtest
>this
>> without any regression which I think "proves" the failure modes do
>not
>> actually exist.
>
>Heh, assuming the testsuite is comprehensive?  Heh.  (Bootstrap doesn't
>mean much for vector code).
>
>> So I'm not sure how we can help.
>
>You'll have to document the "vcond* is not allowed to FAIL" change.
>We'll deal with the rest.  But testing needs a week or so.  (That is
>an extremely short timescale already).
>
>> A vcond can usually be emulated by vec_cmp plus masking.
>
>That would be the generic way to implement this of course, but
>apparently
>such code doesn't yet exist?  If there is a generic implementation it
>should be trivial to deal with FAILs.
>
>> So if
>> we ever get a testcase that runs into the gcc_unreachable () I'll
>promise
>> to fix it up using this strategy in the vcond expander.  But without
>a
>> testcase and powerpc ISA knowledge it's really hard.  Or do you want
>> us to stick the vec_cmp expansion fallback in place of the FAILs?
>> I'm sure the powerpc maintainers are better suited to do that even
>though
>> I'll probably manage with some cut&paste.  To recap: vcond is
>> equal to
>> 
>>   mask = vec_cmp of the comparison
>>   true_masked = true_op & mask;
>>   false_masked = false_op & ~mask;
>>   result = true_masked | false_masked;
>> 
>> but I believe this would be dead code never triggered.
>
>But that would be the generic code as well?  Is that not useful to have
>in any case?

Sure. If you remove the vcond patterns from your port the vectorizer will do this transparently for you. So if you do not actually have a more clever way of representing this in the ISA there's no point of the vcond patterns. (though I think the vec_cmp ones didn't originally exist) 

The point is the vectorizer relies on a optab query for querying backend support and power claims vcond support here. If you then FAIL you have lied. (not in your interpretation of the pattern docs but in the implementations since introduction of vcond named patterns) 

So if you're happy I'll document explicitly that vector named patterns may not FAIL. 

Richard. 

>
>Segher


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-03 17:23                                                         ` Richard Biener
@ 2020-06-03 18:23                                                           ` Segher Boessenkool
  2020-06-03 18:38                                                             ` Richard Biener
  0 siblings, 1 reply; 65+ messages in thread
From: Segher Boessenkool @ 2020-06-03 18:23 UTC (permalink / raw)
  To: Richard Biener
  Cc: David Edelsohn, Martin Liška, GCC Patches, Richard Sandiford

On Wed, Jun 03, 2020 at 07:23:47PM +0200, Richard Biener wrote:
> >>   mask = vec_cmp of the comparison
> >>   true_masked = true_op & mask;
> >>   false_masked = false_op & ~mask;
> >>   result = true_masked | false_masked;
> >> 
> >> but I believe this would be dead code never triggered.
> >
> >But that would be the generic code as well?  Is that not useful to have
> >in any case?
> 
> Sure. If you remove the vcond patterns from your port the vectorizer will do this transparently for you. So if you do not actually have a more clever way of representing this in the ISA there's no point of the vcond patterns. (though I think the vec_cmp ones didn't originally exist) 

So why can the expander not just do that whenever the patterns FAIL as
well?

> The point is the vectorizer relies on a optab query for querying backend support and power claims vcond support here. If you then FAIL you have lied. (not in your interpretation of the pattern docs but in the implementations since introduction of vcond named patterns) 

Almost all RTL patterns are allowed to FAIL, and that is a very good
thing.  If the vectoriser does not allow that, *it* is buggy.

> So if you're happy I'll document explicitly that vector named patterns may not FAIL. 

That will not work in general at all, no.  Please document it for only
those RTL patterns you need it for (and it is documented per pattern
currently, anyway).


Segher

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-02 15:00                                             ` Martin Liška
  2020-06-03  7:38                                               ` Richard Biener
@ 2020-06-03 18:27                                               ` Segher Boessenkool
  2020-06-08 11:04                                                 ` Martin Liška
  1 sibling, 1 reply; 65+ messages in thread
From: Segher Boessenkool @ 2020-06-03 18:27 UTC (permalink / raw)
  To: Martin Liška
  Cc: Richard Biener, GCC Patches, Richard Sandiford, David Edelsohn

Hi Martin,

Okay, let's try this out.  Okay for trunk.  Thanks for the work!

On Tue, Jun 02, 2020 at 05:00:56PM +0200, Martin Liška wrote:
> >From 22db04d058c9bbd140041e7aa2caf1613767095a Mon Sep 17 00:00:00 2001
> From: Martin Liska <mliska@suse.cz>
> Date: Tue, 2 Jun 2020 15:29:37 +0200
> Subject: [PATCH] rs6000: replace FAIL with gcc_unreachable.

("Replace", and no dot at the end).

> 	* config/rs6000/vector.md: Replace FAIL with gcc_unreachable
> 	in all vcond* patterns.


Segher

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-03 18:23                                                           ` Segher Boessenkool
@ 2020-06-03 18:38                                                             ` Richard Biener
  2020-06-03 18:46                                                               ` David Edelsohn
  2020-06-03 19:09                                                               ` Segher Boessenkool
  0 siblings, 2 replies; 65+ messages in thread
From: Richard Biener @ 2020-06-03 18:38 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: David Edelsohn, Martin Liška, GCC Patches, Richard Sandiford

On June 3, 2020 8:23:14 PM GMT+02:00, Segher Boessenkool <segher@kernel.crashing.org> wrote:
>On Wed, Jun 03, 2020 at 07:23:47PM +0200, Richard Biener wrote:
>> >>   mask = vec_cmp of the comparison
>> >>   true_masked = true_op & mask;
>> >>   false_masked = false_op & ~mask;
>> >>   result = true_masked | false_masked;
>> >> 
>> >> but I believe this would be dead code never triggered.
>> >
>> >But that would be the generic code as well?  Is that not useful to
>have
>> >in any case?
>> 
>> Sure. If you remove the vcond patterns from your port the vectorizer
>will do this transparently for you. So if you do not actually have a
>more clever way of representing this in the ISA there's no point of the
>vcond patterns. (though I think the vec_cmp ones didn't originally
>exist) 
>
>So why can the expander not just do that whenever the patterns FAIL as
>well?

It could but all the vectorizer costing assumed it goes the 'cheaper' way. So this is kind of a sanity check. And what when vec_cmp expansion fails as well? Resort to scalar soft FP support as ultimate fallback? That sounds very wrong as a auto vectorization result... 

>> The point is the vectorizer relies on a optab query for querying
>backend support and power claims vcond support here. If you then FAIL
>you have lied. (not in your interpretation of the pattern docs but in
>the implementations since introduction of vcond named patterns) 
>
>Almost all RTL patterns are allowed to FAIL, and that is a very good
>thing.  If the vectoriser does not allow that, *it* is buggy.

Your opinion. Please suggest a better way to query target vector capabilities. 

>> So if you're happy I'll document explicitly that vector named
>patterns may not FAIL. 
>
>That will not work in general at all, no.  Please document it for only
>those RTL patterns you need it for (and it is documented per pattern
>currently, anyway).

Sure, will do. But as Richard said, the documented list is the other way around. 
At least that was my interpretation. 

All vectorizer queried optabs have this constraint BTW.

Richard. 

>
>Segher


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-03 18:38                                                             ` Richard Biener
@ 2020-06-03 18:46                                                               ` David Edelsohn
  2020-06-03 19:09                                                               ` Segher Boessenkool
  1 sibling, 0 replies; 65+ messages in thread
From: David Edelsohn @ 2020-06-03 18:46 UTC (permalink / raw)
  To: Richard Biener
  Cc: Segher Boessenkool, Martin Liška, GCC Patches, Richard Sandiford

On Wed, Jun 3, 2020 at 2:38 PM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On June 3, 2020 8:23:14 PM GMT+02:00, Segher Boessenkool <segher@kernel.crashing.org> wrote:
> >On Wed, Jun 03, 2020 at 07:23:47PM +0200, Richard Biener wrote:
> >> >>   mask = vec_cmp of the comparison
> >> >>   true_masked = true_op & mask;
> >> >>   false_masked = false_op & ~mask;
> >> >>   result = true_masked | false_masked;
> >> >>
> >> >> but I believe this would be dead code never triggered.
> >> >
> >> >But that would be the generic code as well?  Is that not useful to
> >have
> >> >in any case?
> >>
> >> Sure. If you remove the vcond patterns from your port the vectorizer
> >will do this transparently for you. So if you do not actually have a
> >more clever way of representing this in the ISA there's no point of the
> >vcond patterns. (though I think the vec_cmp ones didn't originally
> >exist)
> >
> >So why can the expander not just do that whenever the patterns FAIL as
> >well?
>
> It could but all the vectorizer costing assumed it goes the 'cheaper' way. So this is kind of a sanity check. And what when vec_cmp expansion fails as well? Resort to scalar soft FP support as ultimate fallback? That sounds very wrong as a auto vectorization result...
>
> >> The point is the vectorizer relies on a optab query for querying
> >backend support and power claims vcond support here. If you then FAIL
> >you have lied. (not in your interpretation of the pattern docs but in
> >the implementations since introduction of vcond named patterns)
> >
> >Almost all RTL patterns are allowed to FAIL, and that is a very good
> >thing.  If the vectoriser does not allow that, *it* is buggy.
>
> Your opinion. Please suggest a better way to query target vector capabilities.
>
> >> So if you're happy I'll document explicitly that vector named
> >patterns may not FAIL.
> >
> >That will not work in general at all, no.  Please document it for only
> >those RTL patterns you need it for (and it is documented per pattern
> >currently, anyway).
>
> Sure, will do. But as Richard said, the documented list is the other way around.
> At least that was my interpretation.
>
> All vectorizer queried optabs have this constraint BTW.

Looking at other primary targets, like x86, ARM, AArch64, I agree that
other targets don't FAIL vector patterns in the same manner as scalar
patterns.  The design of Altivec support came with the work from RHES
and no one at the time mentioned that FAILing those named patterns was
incorrect.  As Segher said, allowing named patterns to fail is fairly
standard in GCC and the restriction for vector patterns seems to have
appeared without warning or documentation.  Maybe the vectorizer
started to assume that (because of x86 behavior) but no one announced
this.

Again, I'm not saying that it is an incorrect design or policy, but
that it seems to have been imposed retroactively.  This needs to be
documented. It needs to be applied uniformly throughout the common
parts of the vectorizer and RTL generation. It needs to allow time for
the Power port to adapt. And we need to test this thoroughly to catch
unanticipated fallout.

Thanks, David

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-03 18:38                                                             ` Richard Biener
  2020-06-03 18:46                                                               ` David Edelsohn
@ 2020-06-03 19:09                                                               ` Segher Boessenkool
  2020-06-03 19:13                                                                 ` Jakub Jelinek
  1 sibling, 1 reply; 65+ messages in thread
From: Segher Boessenkool @ 2020-06-03 19:09 UTC (permalink / raw)
  To: Richard Biener
  Cc: David Edelsohn, Martin Liška, GCC Patches, Richard Sandiford

On Wed, Jun 03, 2020 at 08:38:04PM +0200, Richard Biener wrote:
> On June 3, 2020 8:23:14 PM GMT+02:00, Segher Boessenkool <segher@kernel.crashing.org> wrote:
> >On Wed, Jun 03, 2020 at 07:23:47PM +0200, Richard Biener wrote:
> >> >>   mask = vec_cmp of the comparison
> >> >>   true_masked = true_op & mask;
> >> >>   false_masked = false_op & ~mask;
> >> >>   result = true_masked | false_masked;
> >> >> 
> >> >> but I believe this would be dead code never triggered.
> >> >
> >> >But that would be the generic code as well?  Is that not useful to
> >have
> >> >in any case?
> >> 
> >> Sure. If you remove the vcond patterns from your port the vectorizer
> >will do this transparently for you. So if you do not actually have a
> >more clever way of representing this in the ISA there's no point of the
> >vcond patterns. (though I think the vec_cmp ones didn't originally
> >exist) 
> >
> >So why can the expander not just do that whenever the patterns FAIL as
> >well?
> 
> It could but all the vectorizer costing assumed it goes the 'cheaper' way. So this is kind of a sanity check. And what when vec_cmp expansion fails as well? Resort to scalar soft FP support as ultimate fallback? That sounds very wrong as a auto vectorization result... 

Yeah, but that is specific to the vectoriser, not something that the
RTL expander should have to deal with.  A big impedance mismatch there.

> >> The point is the vectorizer relies on a optab query for querying
> >backend support and power claims vcond support here. If you then FAIL
> >you have lied. (not in your interpretation of the pattern docs but in
> >the implementations since introduction of vcond named patterns) 
> >
> >Almost all RTL patterns are allowed to FAIL, and that is a very good
> >thing.  If the vectoriser does not allow that, *it* is buggy.
> 
> Your opinion. Please suggest a better way to query target vector capabilities. 

Again, that is what RTL does.  If the vectoriser has extra considerations
or requirements, it perhaps should use some more conditions than just
"this pattern exists"?  Maybe this can be done via the optabs, dunno.

I have no good idea how to query things better -- I don't work in the
vectorisers much at all -- but this should not constrain the target
instruction support that much (it makes no good sense to have to have
two separate patterns for things, one for the vectorisers, one for the
builtins and everything else).

Thanks,


Segher

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-03 19:09                                                               ` Segher Boessenkool
@ 2020-06-03 19:13                                                                 ` Jakub Jelinek
  0 siblings, 0 replies; 65+ messages in thread
From: Jakub Jelinek @ 2020-06-03 19:13 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: Richard Biener, GCC Patches, David Edelsohn

On Wed, Jun 03, 2020 at 02:09:11PM -0500, Segher Boessenkool wrote:
> Yeah, but that is specific to the vectoriser, not something that the
> RTL expander should have to deal with.  A big impedance mismatch there.

It isn't specific to vectorizer, many named patterns aren't allowed to FAIL
and many others are.
E.g. if one looks at optabs.c, there are many calls to maybe_emit_*
functions which are allowed to FAIL, and many calls to the corresponding
emit_* functions that assert that it doesn't FAIL.
It is true that the documentation documents this in some and not in all
cases.

	Jakub


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-03 18:27                                               ` Segher Boessenkool
@ 2020-06-08 11:04                                                 ` Martin Liška
  2020-06-09 13:42                                                   ` Richard Biener
  0 siblings, 1 reply; 65+ messages in thread
From: Martin Liška @ 2020-06-08 11:04 UTC (permalink / raw)
  To: Segher Boessenkool
  Cc: Richard Biener, GCC Patches, Richard Sandiford, David Edelsohn

[-- Attachment #1: Type: text/plain, Size: 244 bytes --]

Hello.

Thank you for the approval. There's the patch that defines 4 new DEF_INTERNAL_OPTAB_FN.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
It also builds on ppc64le-linux-gnu.

Ready to be installed?
Thanks,
Martin

[-- Attachment #2: 0001-Lower-VEC_COND_EXPR-into-internal-functions.patch --]
[-- Type: text/x-patch, Size: 26190 bytes --]

From ae67d31e1ea7e03bbba216a7b9404009b2dea070 Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Mon, 9 Mar 2020 13:23:03 +0100
Subject: [PATCH] Lower VEC_COND_EXPR into internal functions.

gcc/ChangeLog:

2020-03-30  Martin Liska  <mliska@suse.cz>

	* expr.c (expand_expr_real_2): Put gcc_unreachable, we should reach
	this path.
	(do_store_flag): Likewise here.
	* internal-fn.c (vec_cond_mask_direct): New.
	(vec_cond_direct): Likewise.
	(vec_condu_direct): Likewise.
	(vec_condeq_direct): Likewise.
	(expand_vect_cond_optab_fn): Move from optabs.c.
	(expand_vec_cond_optab_fn): New alias.
	(expand_vec_condu_optab_fn): Likewise.
	(expand_vec_condeq_optab_fn): Likewise.
	(expand_vect_cond_mask_optab_fn): Moved from optabs.c.
	(expand_vec_cond_mask_optab_fn): New alias.
	(direct_vec_cond_mask_optab_supported_p): New.
	(direct_vec_cond_optab_supported_p): Likewise.
	(direct_vec_condu_optab_supported_p): Likewise.
	(direct_vec_condeq_optab_supported_p): Likewise.
	* internal-fn.def (DEF_INTERNAL_OPTAB_CAN_FAIL):
	(VCOND): New new internal optab
	function.
	(VCONDU): Likewise.
	(VCONDEQ): Likewise.
	(VCOND_MASK): Likewise.
	* optabs.c (expand_vec_cond_mask_expr): Removed.
	(expand_vec_cond_expr): Likewise.
	* optabs.h (expand_vec_cond_expr): Likewise.
	(vector_compare_rtx): Likewise.
	* passes.def: Add pass_gimple_isel.
	* tree-cfg.c (verify_gimple_assign_ternary): Add new
	GIMPLE check.
	* tree-pass.h (make_pass_gimple_isel): New.
	* tree-ssa-forwprop.c (pass_forwprop::execute): Do not forward
	to already lowered VEC_COND_EXPR.
	* tree-vect-generic.c (expand_vector_divmod): Expand to SSA_NAME.
	(expand_vector_condition): Expand tcc_comparison of a VEC_COND_EXPR
	into a SSA_NAME.
	(gimple_expand_vec_cond_expr): New.
	(gimple_expand_vec_cond_exprs): New.
	(class pass_gimple_isel): New.
	(make_pass_gimple_isel): New.
	* genemit.c (DEF_INTERNAL_OPTAB_CAN_FAIL): Support optabs that
	can fail.
---
 gcc/expr.c              |  25 +----
 gcc/internal-fn.c       |  89 +++++++++++++++
 gcc/internal-fn.def     |   5 +
 gcc/optabs.c            | 124 +--------------------
 gcc/optabs.h            |   7 +-
 gcc/passes.def          |   1 +
 gcc/tree-cfg.c          |   8 ++
 gcc/tree-pass.h         |   1 +
 gcc/tree-ssa-forwprop.c |   6 +
 gcc/tree-vect-generic.c | 237 +++++++++++++++++++++++++++++++++++++++-
 10 files changed, 349 insertions(+), 154 deletions(-)

diff --git a/gcc/expr.c b/gcc/expr.c
index ca6b1c1291e..3c68b0d754c 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9316,17 +9316,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
       if (temp != 0)
 	return temp;
 
-      /* For vector MIN <x, y>, expand it a VEC_COND_EXPR <x <= y, x, y>
-	 and similarly for MAX <x, y>.  */
       if (VECTOR_TYPE_P (type))
-	{
-	  tree t0 = make_tree (type, op0);
-	  tree t1 = make_tree (type, op1);
-	  tree comparison = build2 (code == MIN_EXPR ? LE_EXPR : GE_EXPR,
-				    type, t0, t1);
-	  return expand_vec_cond_expr (type, comparison, t0, t1,
-				       original_target);
-	}
+	gcc_unreachable ();
 
       /* At this point, a MEM target is no longer useful; we will get better
 	 code without it.  */
@@ -9915,10 +9906,6 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 	return temp;
       }
 
-    case VEC_COND_EXPR:
-      target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
-      return target;
-
     case VEC_DUPLICATE_EXPR:
       op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier);
       target = expand_vector_broadcast (mode, op0);
@@ -12249,8 +12236,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
   STRIP_NOPS (arg1);
 
   /* For vector typed comparisons emit code to generate the desired
-     all-ones or all-zeros mask.  Conveniently use the VEC_COND_EXPR
-     expander for this.  */
+     all-ones or all-zeros mask.  */
   if (TREE_CODE (ops->type) == VECTOR_TYPE)
     {
       tree ifexp = build2 (ops->code, ops->type, arg0, arg1);
@@ -12258,12 +12244,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
 	  && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type, ops->code))
 	return expand_vec_cmp_expr (ops->type, ifexp, target);
       else
-	{
-	  tree if_true = constant_boolean_node (true, ops->type);
-	  tree if_false = constant_boolean_node (false, ops->type);
-	  return expand_vec_cond_expr (ops->type, ifexp, if_true,
-				       if_false, target);
-	}
+	gcc_unreachable ();
     }
 
   /* Optimize (x % C1) == C2 or (x % C1) != C2 if it is beneficial
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 5e9aa60721e..644f234e087 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-ssa.h"
 #include "tree-phinodes.h"
 #include "ssa-iterators.h"
+#include "explow.h"
 
 /* The names of each internal function, indexed by function number.  */
 const char *const internal_fn_name_array[] = {
@@ -107,6 +108,10 @@ init_internal_fns ()
 #define mask_store_direct { 3, 2, false }
 #define store_lanes_direct { 0, 0, false }
 #define mask_store_lanes_direct { 0, 0, false }
+#define vec_cond_mask_direct { 0, 0, false }
+#define vec_cond_direct { 0, 0, false }
+#define vec_condu_direct { 0, 0, false }
+#define vec_condeq_direct { 0, 0, false }
 #define scatter_store_direct { 3, 1, false }
 #define unary_direct { 0, 0, true }
 #define binary_direct { 0, 0, true }
@@ -2548,6 +2553,86 @@ expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 
 #define expand_mask_store_lanes_optab_fn expand_mask_store_optab_fn
 
+/* Expand VCOND, VCONDU and VCONDEQ optab internal functions.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+  class expand_operand ops[6];
+  insn_code icode;
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0a = gimple_call_arg (stmt, 0);
+  tree op0b = gimple_call_arg (stmt, 1);
+  tree op1 = gimple_call_arg (stmt, 2);
+  tree op2 = gimple_call_arg (stmt, 3);
+  enum tree_code tcode = (tree_code) int_cst_value (gimple_call_arg (stmt, 4));
+
+  tree vec_cond_type = TREE_TYPE (lhs);
+  tree op_mode = TREE_TYPE (op0a);
+  bool unsignedp = TYPE_UNSIGNED (op_mode);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode cmp_op_mode = TYPE_MODE (op_mode);
+
+  icode = convert_optab_handler (optab, mode, cmp_op_mode);
+  rtx comparison
+    = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp, icode, 4);
+  rtx rtx_op1 = expand_normal (op1);
+  rtx rtx_op2 = expand_normal (op2);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_fixed_operand (&ops[3], comparison);
+  create_fixed_operand (&ops[4], XEXP (comparison, 0));
+  create_fixed_operand (&ops[5], XEXP (comparison, 1));
+  expand_insn (icode, 6, ops);
+}
+
+#define expand_vec_cond_optab_fn expand_vect_cond_optab_fn
+#define expand_vec_condu_optab_fn expand_vect_cond_optab_fn
+#define expand_vec_condeq_optab_fn expand_vect_cond_optab_fn
+
+/* Expand VCOND_MASK optab internal function.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+  class expand_operand ops[4];
+
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0 = gimple_call_arg (stmt, 0);
+  tree op1 = gimple_call_arg (stmt, 1);
+  tree op2 = gimple_call_arg (stmt, 2);
+  tree vec_cond_type = TREE_TYPE (lhs);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
+  enum insn_code icode = convert_optab_handler (optab, mode, mask_mode);
+  rtx mask, rtx_op1, rtx_op2;
+
+  gcc_assert (icode != CODE_FOR_nothing);
+
+  mask = expand_normal (op0);
+  rtx_op1 = expand_normal (op1);
+  rtx_op2 = expand_normal (op2);
+
+  mask = force_reg (mask_mode, mask);
+  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_input_operand (&ops[3], mask, mask_mode);
+  expand_insn (icode, 4, ops);
+}
+
+#define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn
+
 static void
 expand_ABNORMAL_DISPATCHER (internal_fn, gcall *)
 {
@@ -3131,6 +3216,10 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 #define direct_mask_store_optab_supported_p direct_optab_supported_p
 #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_cond_mask_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_cond_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_condu_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_condeq_optab_supported_p multi_vector_optab_supported_p
 #define direct_scatter_store_optab_supported_p convert_optab_supported_p
 #define direct_while_optab_supported_p convert_optab_supported_p
 #define direct_fold_extract_optab_supported_p direct_optab_supported_p
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 1d190d492ff..0c6fc371190 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -136,6 +136,11 @@ DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
 DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0,
 		       vec_mask_store_lanes, mask_store_lanes)
 
+DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
+DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
+DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
+DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
+
 DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
 DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW,
 		       check_raw_ptrs, check_ptrs)
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 6d0b76c13ba..184827fdf4e 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -5442,7 +5442,7 @@ get_rtx_code (enum tree_code tcode, bool unsignedp)
    first comparison operand for insn ICODE.  Do not generate the
    compare instruction itself.  */
 
-static rtx
+rtx
 vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
 		    tree t_op0, tree t_op1, bool unsignedp,
 		    enum insn_code icode, unsigned int opno)
@@ -5809,128 +5809,6 @@ expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
   return tmp;
 }
 
-/* Generate insns for a VEC_COND_EXPR with mask, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_mask_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-			   rtx target)
-{
-  class expand_operand ops[4];
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
-  enum insn_code icode = get_vcond_mask_icode (mode, mask_mode);
-  rtx mask, rtx_op1, rtx_op2;
-
-  if (icode == CODE_FOR_nothing)
-    return 0;
-
-  mask = expand_normal (op0);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  mask = force_reg (mask_mode, mask);
-  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_input_operand (&ops[3], mask, mask_mode);
-  expand_insn (icode, 4, ops);
-
-  return ops[0].value;
-}
-
-/* Generate insns for a VEC_COND_EXPR, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-		      rtx target)
-{
-  class expand_operand ops[6];
-  enum insn_code icode;
-  rtx comparison, rtx_op1, rtx_op2;
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode cmp_op_mode;
-  bool unsignedp;
-  tree op0a, op0b;
-  enum tree_code tcode;
-
-  if (COMPARISON_CLASS_P (op0))
-    {
-      op0a = TREE_OPERAND (op0, 0);
-      op0b = TREE_OPERAND (op0, 1);
-      tcode = TREE_CODE (op0);
-    }
-  else
-    {
-      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
-      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
-	  != CODE_FOR_nothing)
-	return expand_vec_cond_mask_expr (vec_cond_type, op0, op1,
-					  op2, target);
-      /* Fake op0 < 0.  */
-      else
-	{
-	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
-		      == MODE_VECTOR_INT);
-	  op0a = op0;
-	  op0b = build_zero_cst (TREE_TYPE (op0));
-	  tcode = LT_EXPR;
-	}
-    }
-  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
-  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
-
-
-  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
-	      && known_eq (GET_MODE_NUNITS (mode),
-			   GET_MODE_NUNITS (cmp_op_mode)));
-
-  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
-  if (icode == CODE_FOR_nothing)
-    {
-      if (tcode == LT_EXPR
-	  && op0a == op0
-	  && TREE_CODE (op0) == VECTOR_CST)
-	{
-	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
-	     into a constant when only get_vcond_eq_icode is supported.
-	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
-	  unsigned HOST_WIDE_INT nelts;
-	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
-	    {
-	      if (VECTOR_CST_STEPPED_P (op0))
-		return 0;
-	      nelts = vector_cst_encoded_nelts (op0);
-	    }
-	  for (unsigned int i = 0; i < nelts; ++i)
-	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
-	      return 0;
-	  tcode = NE_EXPR;
-	}
-      if (tcode == EQ_EXPR || tcode == NE_EXPR)
-	icode = get_vcond_eq_icode (mode, cmp_op_mode);
-      if (icode == CODE_FOR_nothing)
-	return 0;
-    }
-
-  comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp,
-				   icode, 4);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_fixed_operand (&ops[3], comparison);
-  create_fixed_operand (&ops[4], XEXP (comparison, 0));
-  create_fixed_operand (&ops[5], XEXP (comparison, 1));
-  expand_insn (icode, 6, ops);
-  return ops[0].value;
-}
-
 /* Generate VEC_SERIES_EXPR <OP0, OP1>, returning a value of mode VMODE.
    Use TARGET for the result if nonnull and convenient.  */
 
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 5bd19503a0a..7c2ec257cb0 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -321,9 +321,6 @@ extern rtx expand_vec_perm_const (machine_mode, rtx, rtx,
 /* Generate code for vector comparison.  */
 extern rtx expand_vec_cmp_expr (tree, tree, rtx);
 
-/* Generate code for VEC_COND_EXPR.  */
-extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
-
 /* Generate code for VEC_SERIES_EXPR.  */
 extern rtx expand_vec_series_expr (machine_mode, rtx, rtx, rtx);
 
@@ -364,5 +361,9 @@ extern void expand_jump_insn (enum insn_code icode, unsigned int nops,
 			      class expand_operand *ops);
 
 extern enum rtx_code get_rtx_code (enum tree_code tcode, bool unsignedp);
+extern rtx vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
+			       tree t_op0, tree t_op1, bool unsignedp,
+			       enum insn_code icode, unsigned int opno);
+
 
 #endif /* GCC_OPTABS_H */
diff --git a/gcc/passes.def b/gcc/passes.def
index 56322025226..2b1e09fdda3 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -399,6 +399,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_cleanup_eh);
   NEXT_PASS (pass_lower_resx);
   NEXT_PASS (pass_nrv);
+  NEXT_PASS (pass_gimple_isel);
   NEXT_PASS (pass_cleanup_cfg_post_optimizing);
   NEXT_PASS (pass_warn_function_noreturn);
   NEXT_PASS (pass_gen_hsail);
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index d06a479e570..16ff06fbf88 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4199,6 +4199,14 @@ verify_gimple_assign_ternary (gassign *stmt)
 	  debug_generic_expr (rhs1_type);
 	  return true;
 	}
+      else if (cfun->curr_properties & PROP_gimple_lvec
+	       && TREE_CODE_CLASS (TREE_CODE (rhs1)) == tcc_comparison)
+	{
+	  error ("the first argument of %<VEC_COND_EXPR%> cannot be "
+		 "a %<GENERIC%> tree comparison expression");
+	  debug_generic_expr (rhs1);
+	  return true;
+	}
       /* Fallthrough.  */
     case COND_EXPR:
       if (!is_gimple_val (rhs1)
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 396428f167f..215c8f2a337 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -627,6 +627,7 @@ extern gimple_opt_pass *make_pass_local_fn_summary (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lower_vaarg (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_gimple_isel (gcc::context *ctxt);
 
 /* Current optimization pass.  */
 extern opt_pass *current_pass;
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 759baf56897..fce392e204c 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -3125,6 +3125,12 @@ pass_forwprop::execute (function *fun)
 		    if (code == COND_EXPR
 			|| code == VEC_COND_EXPR)
 		      {
+			/* Do not propagate into VEC_COND_EXPRs after they are
+			   vector lowering pass.  */
+			if (code == VEC_COND_EXPR
+			    && (fun->curr_properties & PROP_gimple_lvec))
+			  break;
+
 			/* In this case the entire COND_EXPR is in rhs1. */
 			if (forward_propagate_into_cond (&gsi))
 			  {
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index a7fe83da0e3..8f6d63f01c5 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -694,12 +694,14 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 	  if (addend == NULL_TREE
 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
 	    {
-	      tree zero, cst, cond, mask_type;
-	      gimple *stmt;
+	      tree zero, cst, mask_type, mask;
+	      gimple *stmt, *cond;
 
 	      mask_type = truth_type_for (type);
 	      zero = build_zero_cst (type);
-	      cond = build2 (LT_EXPR, mask_type, op0, zero);
+	      mask = make_ssa_name (mask_type);
+	      cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
+	      gsi_insert_before (gsi, cond, GSI_SAME_STMT);
 	      tree_vector_builder vec (type, nunits, 1);
 	      for (i = 0; i < nunits; i++)
 		vec.quick_push (build_int_cst (TREE_TYPE (type),
@@ -707,8 +709,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 						<< shifts[i]) - 1));
 	      cst = vec.build ();
 	      addend = make_ssa_name (type);
-	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
-					  cst, zero);
+	      stmt
+		= gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
 	    }
 	}
@@ -964,7 +966,17 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
     }
 
   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
-    return;
+    {
+      if (a_is_comparison)
+	{
+	  a = gimplify_build2 (gsi, TREE_CODE (a), TREE_TYPE (a), a1, a2);
+	  gimple_assign_set_rhs1 (stmt, a);
+	  update_stmt (stmt);
+	  return;
+	}
+      gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
+      return;
+    }
 
   /* Handle vector boolean types with bitmasks.  If there is a comparison
      and we can expand the comparison into the vector boolean bitmask,
@@ -2241,6 +2253,176 @@ expand_vector_operations (void)
   return cfg_changed ? TODO_cleanup_cfg : 0;
 }
 
+/* Expand all VEC_COND_EXPR gimple assignments into calls to internal
+   function based on type of selected expansion.  */
+
+static gimple *
+gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
+			     hash_map<tree, unsigned int> *vec_cond_ssa_name_uses)
+{
+  tree lhs, op0a = NULL_TREE, op0b = NULL_TREE;
+  enum tree_code code;
+  enum tree_code tcode;
+  machine_mode cmp_op_mode;
+  bool unsignedp;
+  enum insn_code icode;
+  imm_use_iterator imm_iter;
+
+  /* Only consider code == GIMPLE_ASSIGN.  */
+  gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi));
+  if (!stmt)
+    return NULL;
+
+  code = gimple_assign_rhs_code (stmt);
+  if (code != VEC_COND_EXPR)
+    return NULL;
+
+  tree op0 = gimple_assign_rhs1 (stmt);
+  tree op1 = gimple_assign_rhs2 (stmt);
+  tree op2 = gimple_assign_rhs3 (stmt);
+  lhs = gimple_assign_lhs (stmt);
+  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+
+  gcc_assert (!COMPARISON_CLASS_P (op0));
+  if (TREE_CODE (op0) == SSA_NAME)
+    {
+      unsigned int used_vec_cond_exprs = 0;
+      unsigned int *slot = vec_cond_ssa_name_uses->get (op0);
+      if (slot)
+	used_vec_cond_exprs = *slot;
+      else
+	{
+	  gimple *use_stmt;
+	  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, op0)
+	    {
+	      gassign *assign = dyn_cast<gassign *> (use_stmt);
+	      if (assign != NULL
+		  && gimple_assign_rhs_code (assign) == VEC_COND_EXPR
+		  && gimple_assign_rhs1 (assign) == op0)
+		used_vec_cond_exprs++;
+	    }
+	  vec_cond_ssa_name_uses->put (op0, used_vec_cond_exprs);
+	}
+
+      gassign *def_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (op0));
+      if (def_stmt)
+	{
+	  tcode = gimple_assign_rhs_code (def_stmt);
+	  op0a = gimple_assign_rhs1 (def_stmt);
+	  op0b = gimple_assign_rhs2 (def_stmt);
+
+	  tree op0a_type = TREE_TYPE (op0a);
+	  if (used_vec_cond_exprs >= 2
+	      && (get_vcond_mask_icode (mode, TYPE_MODE (op0a_type))
+		  != CODE_FOR_nothing)
+	      && expand_vec_cmp_expr_p (op0a_type, TREE_TYPE (lhs), tcode))
+	    {
+	      /* Keep the SSA name and use vcond_mask.  */
+	      tcode = TREE_CODE (op0);
+	    }
+	}
+      else
+	tcode = TREE_CODE (op0);
+    }
+  else
+    tcode = TREE_CODE (op0);
+
+  if (TREE_CODE_CLASS (tcode) != tcc_comparison)
+    {
+      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
+      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
+	  != CODE_FOR_nothing)
+	return gimple_build_call_internal (IFN_VCOND_MASK, 3, op0, op1, op2);
+      /* Fake op0 < 0.  */
+      else
+	{
+	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
+		      == MODE_VECTOR_INT);
+	  op0a = op0;
+	  op0b = build_zero_cst (TREE_TYPE (op0));
+	  tcode = LT_EXPR;
+	}
+    }
+  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
+  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
+
+
+  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
+	      && known_eq (GET_MODE_NUNITS (mode),
+			   GET_MODE_NUNITS (cmp_op_mode)));
+
+  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
+  if (icode == CODE_FOR_nothing)
+    {
+      if (tcode == LT_EXPR
+	  && op0a == op0
+	  && TREE_CODE (op0) == VECTOR_CST)
+	{
+	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
+	     into a constant when only get_vcond_eq_icode is supported.
+	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
+	  unsigned HOST_WIDE_INT nelts;
+	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
+	    {
+	      if (VECTOR_CST_STEPPED_P (op0))
+		gcc_unreachable ();
+	      nelts = vector_cst_encoded_nelts (op0);
+	    }
+	  for (unsigned int i = 0; i < nelts; ++i)
+	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
+	      gcc_unreachable ();
+	  tcode = NE_EXPR;
+	}
+      if (tcode == EQ_EXPR || tcode == NE_EXPR)
+	{
+	  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+	  return gimple_build_call_internal (IFN_VCONDEQ, 5, op0a, op0b, op1,
+					     op2, tcode_tree);
+	}
+    }
+
+  gcc_assert (icode != CODE_FOR_nothing);
+  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+  return gimple_build_call_internal (unsignedp ? IFN_VCONDU : IFN_VCOND,
+				     5, op0a, op0b, op1, op2, tcode_tree);
+}
+
+/* Iterate all gimple statements and try to expand
+   VEC_COND_EXPR assignments.  */
+
+static unsigned int
+gimple_expand_vec_cond_exprs (void)
+{
+  gimple_stmt_iterator gsi;
+  basic_block bb;
+  bool cfg_changed = false;
+  hash_map<tree, unsigned int> vec_cond_ssa_name_uses;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	{
+	  gimple *g = gimple_expand_vec_cond_expr (&gsi,
+						   &vec_cond_ssa_name_uses);
+	  if (g != NULL)
+	    {
+	      tree lhs = gimple_assign_lhs (gsi_stmt (gsi));
+	      gimple_set_lhs (g, lhs);
+	      gsi_replace (&gsi, g, false);
+	    }
+	  /* ???  If we do not cleanup EH then we will ICE in
+	     verification.  But in reality we have created wrong-code
+	     as we did not properly transition EH info and edges to
+	     the piecewise computations.  */
+	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
+	      && gimple_purge_dead_eh_edges (bb))
+	    cfg_changed = true;
+	}
+    }
+
+  return cfg_changed ? TODO_cleanup_cfg : 0;
+}
+
 namespace {
 
 const pass_data pass_data_lower_vector =
@@ -2324,4 +2506,47 @@ make_pass_lower_vector_ssa (gcc::context *ctxt)
   return new pass_lower_vector_ssa (ctxt);
 }
 
+namespace {
+
+const pass_data pass_data_gimple_isel =
+{
+  GIMPLE_PASS, /* type */
+  "isel", /* name */
+  OPTGROUP_VEC, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_cfg, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_gimple_isel : public gimple_opt_pass
+{
+public:
+  pass_gimple_isel (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_gimple_isel, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return true;
+    }
+
+  virtual unsigned int execute (function *)
+    {
+      return gimple_expand_vec_cond_exprs ();
+    }
+
+}; // class pass_gimple_isel
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_gimple_isel (gcc::context *ctxt)
+{
+  return new pass_gimple_isel (ctxt);
+}
+
 #include "gt-tree-vect-generic.h"
-- 
2.26.2


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-08 11:04                                                 ` Martin Liška
@ 2020-06-09 13:42                                                   ` Richard Biener
  2020-06-10  8:51                                                     ` Martin Liška
  2020-06-11  8:52                                                     ` Martin Liška
  0 siblings, 2 replies; 65+ messages in thread
From: Richard Biener @ 2020-06-09 13:42 UTC (permalink / raw)
  To: Martin Liška
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

On Mon, Jun 8, 2020 at 1:04 PM Martin Liška <mliska@suse.cz> wrote:
>
> Hello.
>
> Thank you for the approval. There's the patch that defines 4 new DEF_INTERNAL_OPTAB_FN.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> It also builds on ppc64le-linux-gnu.
>
> Ready to be installed?

The ChangeLog refers to DEF_INTERNAL_OPTAB_CAN_FAIL which is no longer there.

Can you put the isel pass to a separate file please?

So this is a first step towards sanitizing VEC_COND_EXPR.  There were followups
mentioned, namely a) enforcing that VEC_COND_EXPR constraint everywhere,
b) isel vector comparisons at the same time since expansion has a
vec_cond fallback

There's

+         /* ???  If we do not cleanup EH then we will ICE in
+            verification.  But in reality we have created wrong-code
+            as we did not properly transition EH info and edges to
+            the piecewise computations.  */
+         if (maybe_clean_eh_stmt (gsi_stmt (gsi))
+             && gimple_purge_dead_eh_edges (bb))
+           cfg_changed = true;

which of course is bad.  It's the comparison that can throw and I guess current
RTL expansion manages to cope by find_many_sub_bbs and friends.  But we
need to get this correct on GIMPLE here.  Note I find it odd this only triggers
during ISEL - it should trigger during the lowering step which splits
the comparison
from the VEC_COND_EXPR.  An appropriate fix at lowering time would be to
insert the VEC_COND_EXPR w/o the condition on the normal outgoing edge
and keep the comparison in place of the original VEC_COND_EXPR,
moving EH info from the VEC_COND_EXPR to the comparison.

I think we need to fix that before merging.

Thanks,
Richard.

> Thanks,
> Martin

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-09 13:42                                                   ` Richard Biener
@ 2020-06-10  8:51                                                     ` Martin Liška
  2020-06-10 10:50                                                       ` Richard Biener
  2020-06-11  8:52                                                     ` Martin Liška
  1 sibling, 1 reply; 65+ messages in thread
From: Martin Liška @ 2020-06-10  8:51 UTC (permalink / raw)
  To: Richard Biener
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

On 6/9/20 3:42 PM, Richard Biener wrote:
> On Mon, Jun 8, 2020 at 1:04 PM Martin Liška <mliska@suse.cz> wrote:
>>
>> Hello.
>>
>> Thank you for the approval. There's the patch that defines 4 new DEF_INTERNAL_OPTAB_FN.
>>
>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>> It also builds on ppc64le-linux-gnu.
>>
>> Ready to be installed?
> 
> The ChangeLog refers to DEF_INTERNAL_OPTAB_CAN_FAIL which is no longer there.


Sure.

> 
> Can you put the isel pass to a separate file please?

Yes.

> 
> So this is a first step towards sanitizing VEC_COND_EXPR.  There were followups
> mentioned, namely a) enforcing that VEC_COND_EXPR constraint everywhere,
> b) isel vector comparisons at the same time since expansion has a
> vec_cond fallback

I'm planning to work on the follow up.

> 
> There's
> 
> +         /* ???  If we do not cleanup EH then we will ICE in
> +            verification.  But in reality we have created wrong-code
> +            as we did not properly transition EH info and edges to
> +            the piecewise computations.  */
> +         if (maybe_clean_eh_stmt (gsi_stmt (gsi))
> +             && gimple_purge_dead_eh_edges (bb))
> +           cfg_changed = true;

Hm, I've tried to comment the code in both ISEL and expansion and I can't find a test-case
that would trigger a verification error (in vect.exp and i386.exp). Can you come up with
something that will trigger the code?

> 
> which of course is bad.  It's the comparison that can throw and I guess current
> RTL expansion manages to cope by find_many_sub_bbs and friends.  But we
> need to get this correct on GIMPLE here.  Note I find it odd this only triggers
> during ISEL - it should trigger during the lowering step which splits
> the comparison
> from the VEC_COND_EXPR.  An appropriate fix at lowering time would be to
> insert the VEC_COND_EXPR w/o the condition on the normal outgoing edge
> and keep the comparison in place of the original VEC_COND_EXPR,
> moving EH info from the VEC_COND_EXPR to the comparison.

Ah ok, so it's about correction of EH..

Martin

> 
> I think we need to fix that before merging.
> 
> Thanks,
> Richard.
> 
>> Thanks,
>> Martin


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-10  8:51                                                     ` Martin Liška
@ 2020-06-10 10:50                                                       ` Richard Biener
  2020-06-10 12:27                                                         ` Martin Liška
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Biener @ 2020-06-10 10:50 UTC (permalink / raw)
  To: Martin Liška
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

On Wed, Jun 10, 2020 at 10:51 AM Martin Liška <mliska@suse.cz> wrote:
>
> On 6/9/20 3:42 PM, Richard Biener wrote:
> > On Mon, Jun 8, 2020 at 1:04 PM Martin Liška <mliska@suse.cz> wrote:
> >>
> >> Hello.
> >>
> >> Thank you for the approval. There's the patch that defines 4 new DEF_INTERNAL_OPTAB_FN.
> >>
> >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> >> It also builds on ppc64le-linux-gnu.
> >>
> >> Ready to be installed?
> >
> > The ChangeLog refers to DEF_INTERNAL_OPTAB_CAN_FAIL which is no longer there.
>
>
> Sure.
>
> >
> > Can you put the isel pass to a separate file please?
>
> Yes.
>
> >
> > So this is a first step towards sanitizing VEC_COND_EXPR.  There were followups
> > mentioned, namely a) enforcing that VEC_COND_EXPR constraint everywhere,
> > b) isel vector comparisons at the same time since expansion has a
> > vec_cond fallback
>
> I'm planning to work on the follow up.
>
> >
> > There's
> >
> > +         /* ???  If we do not cleanup EH then we will ICE in
> > +            verification.  But in reality we have created wrong-code
> > +            as we did not properly transition EH info and edges to
> > +            the piecewise computations.  */
> > +         if (maybe_clean_eh_stmt (gsi_stmt (gsi))
> > +             && gimple_purge_dead_eh_edges (bb))
> > +           cfg_changed = true;
>
> Hm, I've tried to comment the code in both ISEL and expansion and I can't find a test-case
> that would trigger a verification error (in vect.exp and i386.exp). Can you come up with
> something that will trigger the code?

typedef double v2df __attribute__((vector_size(16)));

v2df foo (v2df a, v2df b, v2df c, v2df d)
{
  try
  {
    v2df res = a < b ? c : d;
    return res;
    }
    catch (...)
    {
    return (v2df){};
    }
}

with -fnon-call-exceptions should trigger it.

> >
> > which of course is bad.  It's the comparison that can throw and I guess current
> > RTL expansion manages to cope by find_many_sub_bbs and friends.  But we
> > need to get this correct on GIMPLE here.  Note I find it odd this only triggers
> > during ISEL - it should trigger during the lowering step which splits
> > the comparison
> > from the VEC_COND_EXPR.  An appropriate fix at lowering time would be to
> > insert the VEC_COND_EXPR w/o the condition on the normal outgoing edge
> > and keep the comparison in place of the original VEC_COND_EXPR,
> > moving EH info from the VEC_COND_EXPR to the comparison.
>
> Ah ok, so it's about correction of EH..
>
> Martin
>
> >
> > I think we need to fix that before merging.
> >
> > Thanks,
> > Richard.
> >
> >> Thanks,
> >> Martin
>

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-10 10:50                                                       ` Richard Biener
@ 2020-06-10 12:27                                                         ` Martin Liška
  2020-06-10 13:01                                                           ` Martin Liška
  0 siblings, 1 reply; 65+ messages in thread
From: Martin Liška @ 2020-06-10 12:27 UTC (permalink / raw)
  To: Richard Biener
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

On 6/10/20 12:50 PM, Richard Biener wrote:
> with -fnon-call-exceptions should trigger it.

Thanks, that works!

We start with:

foo (v2df a, v2df b, v2df c, v2df d)
Eh tree:
    1 try land:{1,<L1>} catch:{}
{
   void * _1;
   v2df _2;
   v2df _8;

   <bb 2> [local count: 1073741824]:
   [LP 1] _8 = VEC_COND_EXPR <a_4(D) < b_5(D), c_6(D), d_7(D)>;

   <bb 3> [local count: 1073741824]:
   # _2 = PHI <{ 0.0, 0.0 }(4), _8(2)>
   return _2;

   <bb 4> [count: 0]:
<L1>: [LP 1]
   _1 = __builtin_eh_pointer (1);
   __cxa_begin_catch (_1);
   __cxa_end_catch ();
   goto <bb 3>; [0.00%]

I tried to use:

	  maybe_clean_or_replace_eh_stmt (stmt, assign);

which does:

   <bb 2> [local count: 1073741824]:
   [LP 1] _12 = a_4(D) < b_5(D);

   <bb 3> [local count: 1073741824]:
   _8 = VEC_COND_EXPR <_12, c_6(D), d_7(D)>;

which requires to split the BB. But now I'm missing an edge:

/home/marxin/Programming/testcases/vect-low.c: In function ‘v2df foo(v2df, v2df, v2df, v2df)’:
/home/marxin/Programming/testcases/vect-low.c:3:6: error: BB 2 is missing an EH edge

Am I doing that correctly? Or do we have a better function for it?
Martin

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-10 12:27                                                         ` Martin Liška
@ 2020-06-10 13:01                                                           ` Martin Liška
  0 siblings, 0 replies; 65+ messages in thread
From: Martin Liška @ 2020-06-10 13:01 UTC (permalink / raw)
  To: Richard Biener
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

On 6/10/20 2:27 PM, Martin Liška wrote:
> /home/marxin/Programming/testcases/vect-low.c: In function ‘v2df foo(v2df, v2df, v2df, v2df)’:
> /home/marxin/Programming/testcases/vect-low.c:3:6: error: BB 2 is missing an EH edge

Ok, I was missing copying of the EH edges:

       FOR_EACH_EDGE (e, ei, gimple_bb (old_stmt)->succs)
	{
	  if (e->flags & EDGE_EH)
	    make_edge (gimple_bb (new_stmt), e->dest, e->flags);
	}

Martin

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-09 13:42                                                   ` Richard Biener
  2020-06-10  8:51                                                     ` Martin Liška
@ 2020-06-11  8:52                                                     ` Martin Liška
  2020-06-12  9:43                                                       ` Richard Biener
  1 sibling, 1 reply; 65+ messages in thread
From: Martin Liška @ 2020-06-11  8:52 UTC (permalink / raw)
  To: Richard Biener
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

[-- Attachment #1: Type: text/plain, Size: 272 bytes --]

On 6/9/20 3:42 PM, Richard Biener wrote:
> I think we need to fix that before merging.

There's updated version of the patch that should handle the EH properly.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

[-- Attachment #2: 0001-Lower-VEC_COND_EXPR-into-internal-functions.patch --]
[-- Type: text/x-patch, Size: 31938 bytes --]

From fc5a59e8c8887c102bff06e1a537ccfc9d44e3d8 Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Mon, 9 Mar 2020 13:23:03 +0100
Subject: [PATCH] Lower VEC_COND_EXPR into internal functions.

gcc/ChangeLog:

2020-03-30  Martin Liska  <mliska@suse.cz>

	* expr.c (expand_expr_real_2): Put gcc_unreachable, we should reach
	this path.
	(do_store_flag): Likewise here.
	* internal-fn.c (vec_cond_mask_direct): New.
	(vec_cond_direct): Likewise.
	(vec_condu_direct): Likewise.
	(vec_condeq_direct): Likewise.
	(expand_vect_cond_optab_fn): Move from optabs.c.
	(expand_vec_cond_optab_fn): New alias.
	(expand_vec_condu_optab_fn): Likewise.
	(expand_vec_condeq_optab_fn): Likewise.
	(expand_vect_cond_mask_optab_fn): Moved from optabs.c.
	(expand_vec_cond_mask_optab_fn): New alias.
	(direct_vec_cond_mask_optab_supported_p): New.
	(direct_vec_cond_optab_supported_p): Likewise.
	(direct_vec_condu_optab_supported_p): Likewise.
	(direct_vec_condeq_optab_supported_p): Likewise.
	* internal-fn.def (VCOND): New new internal optab
	function.
	(VCONDU): Likewise.
	(VCONDEQ): Likewise.
	(VCOND_MASK): Likewise.
	* optabs.c (expand_vec_cond_mask_expr): Removed.
	(expand_vec_cond_expr): Likewise.
	* optabs.h (expand_vec_cond_expr): Likewise.
	(vector_compare_rtx): Likewise.
	* passes.def: Add pass_gimple_isel.
	* tree-cfg.c (verify_gimple_assign_ternary): Add new
	GIMPLE check.
	* tree-pass.h (make_pass_gimple_isel): New.
	* tree-ssa-forwprop.c (pass_forwprop::execute): Do not forward
	to already lowered VEC_COND_EXPR.
	* tree-vect-generic.c (expand_vector_divmod): Expand to SSA_NAME.
	(expand_vector_condition): Expand tcc_comparison of a VEC_COND_EXPR
	into a SSA_NAME.
	(expand_vector_condition): Add new argument.
	(expand_vector_operations): Likewise.
	(expand_vector_operations_1): Fix up EH by moving that to vector
	comparison.
	* tree-vect-isel.c: New file.

gcc/testsuite/ChangeLog:

	* g++.dg/vect/vec-cond-expr-eh.C: New test.
---
 gcc/Makefile.in                              |   2 +
 gcc/expr.c                                   |  25 +-
 gcc/internal-fn.c                            |  89 +++++++
 gcc/internal-fn.def                          |   5 +
 gcc/optabs.c                                 | 124 +---------
 gcc/optabs.h                                 |   7 +-
 gcc/passes.def                               |   1 +
 gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C |  17 ++
 gcc/tree-cfg.c                               |   8 +
 gcc/tree-pass.h                              |   1 +
 gcc/tree-ssa-forwprop.c                      |   6 +
 gcc/tree-vect-generic.c                      |  71 ++++--
 gcc/tree-vect-isel.c                         | 244 +++++++++++++++++++
 13 files changed, 431 insertions(+), 169 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C
 create mode 100644 gcc/tree-vect-isel.c

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 4f70c189b9d..4cbb9d23606 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1631,6 +1631,7 @@ OBJS = \
 	tree-streamer-out.o \
 	tree-tailcall.o \
 	tree-vect-generic.o \
+	tree-vect-isel.o \
 	tree-vect-patterns.o \
 	tree-vect-data-refs.o \
 	tree-vect-stmts.o \
@@ -2600,6 +2601,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
   $(srcdir)/dwarf2cfi.c \
   $(srcdir)/dwarf2out.c \
   $(srcdir)/tree-vect-generic.c \
+  $(srcdir)/tree-vect-isel.c \
   $(srcdir)/dojump.c $(srcdir)/emit-rtl.h \
   $(srcdir)/emit-rtl.c $(srcdir)/except.h $(srcdir)/explow.c $(srcdir)/expr.c \
   $(srcdir)/expr.h \
diff --git a/gcc/expr.c b/gcc/expr.c
index ca6b1c1291e..3c68b0d754c 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9316,17 +9316,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
       if (temp != 0)
 	return temp;
 
-      /* For vector MIN <x, y>, expand it a VEC_COND_EXPR <x <= y, x, y>
-	 and similarly for MAX <x, y>.  */
       if (VECTOR_TYPE_P (type))
-	{
-	  tree t0 = make_tree (type, op0);
-	  tree t1 = make_tree (type, op1);
-	  tree comparison = build2 (code == MIN_EXPR ? LE_EXPR : GE_EXPR,
-				    type, t0, t1);
-	  return expand_vec_cond_expr (type, comparison, t0, t1,
-				       original_target);
-	}
+	gcc_unreachable ();
 
       /* At this point, a MEM target is no longer useful; we will get better
 	 code without it.  */
@@ -9915,10 +9906,6 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 	return temp;
       }
 
-    case VEC_COND_EXPR:
-      target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
-      return target;
-
     case VEC_DUPLICATE_EXPR:
       op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier);
       target = expand_vector_broadcast (mode, op0);
@@ -12249,8 +12236,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
   STRIP_NOPS (arg1);
 
   /* For vector typed comparisons emit code to generate the desired
-     all-ones or all-zeros mask.  Conveniently use the VEC_COND_EXPR
-     expander for this.  */
+     all-ones or all-zeros mask.  */
   if (TREE_CODE (ops->type) == VECTOR_TYPE)
     {
       tree ifexp = build2 (ops->code, ops->type, arg0, arg1);
@@ -12258,12 +12244,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
 	  && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type, ops->code))
 	return expand_vec_cmp_expr (ops->type, ifexp, target);
       else
-	{
-	  tree if_true = constant_boolean_node (true, ops->type);
-	  tree if_false = constant_boolean_node (false, ops->type);
-	  return expand_vec_cond_expr (ops->type, ifexp, if_true,
-				       if_false, target);
-	}
+	gcc_unreachable ();
     }
 
   /* Optimize (x % C1) == C2 or (x % C1) != C2 if it is beneficial
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 5e9aa60721e..644f234e087 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-ssa.h"
 #include "tree-phinodes.h"
 #include "ssa-iterators.h"
+#include "explow.h"
 
 /* The names of each internal function, indexed by function number.  */
 const char *const internal_fn_name_array[] = {
@@ -107,6 +108,10 @@ init_internal_fns ()
 #define mask_store_direct { 3, 2, false }
 #define store_lanes_direct { 0, 0, false }
 #define mask_store_lanes_direct { 0, 0, false }
+#define vec_cond_mask_direct { 0, 0, false }
+#define vec_cond_direct { 0, 0, false }
+#define vec_condu_direct { 0, 0, false }
+#define vec_condeq_direct { 0, 0, false }
 #define scatter_store_direct { 3, 1, false }
 #define unary_direct { 0, 0, true }
 #define binary_direct { 0, 0, true }
@@ -2548,6 +2553,86 @@ expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 
 #define expand_mask_store_lanes_optab_fn expand_mask_store_optab_fn
 
+/* Expand VCOND, VCONDU and VCONDEQ optab internal functions.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+  class expand_operand ops[6];
+  insn_code icode;
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0a = gimple_call_arg (stmt, 0);
+  tree op0b = gimple_call_arg (stmt, 1);
+  tree op1 = gimple_call_arg (stmt, 2);
+  tree op2 = gimple_call_arg (stmt, 3);
+  enum tree_code tcode = (tree_code) int_cst_value (gimple_call_arg (stmt, 4));
+
+  tree vec_cond_type = TREE_TYPE (lhs);
+  tree op_mode = TREE_TYPE (op0a);
+  bool unsignedp = TYPE_UNSIGNED (op_mode);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode cmp_op_mode = TYPE_MODE (op_mode);
+
+  icode = convert_optab_handler (optab, mode, cmp_op_mode);
+  rtx comparison
+    = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp, icode, 4);
+  rtx rtx_op1 = expand_normal (op1);
+  rtx rtx_op2 = expand_normal (op2);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_fixed_operand (&ops[3], comparison);
+  create_fixed_operand (&ops[4], XEXP (comparison, 0));
+  create_fixed_operand (&ops[5], XEXP (comparison, 1));
+  expand_insn (icode, 6, ops);
+}
+
+#define expand_vec_cond_optab_fn expand_vect_cond_optab_fn
+#define expand_vec_condu_optab_fn expand_vect_cond_optab_fn
+#define expand_vec_condeq_optab_fn expand_vect_cond_optab_fn
+
+/* Expand VCOND_MASK optab internal function.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+  class expand_operand ops[4];
+
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0 = gimple_call_arg (stmt, 0);
+  tree op1 = gimple_call_arg (stmt, 1);
+  tree op2 = gimple_call_arg (stmt, 2);
+  tree vec_cond_type = TREE_TYPE (lhs);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
+  enum insn_code icode = convert_optab_handler (optab, mode, mask_mode);
+  rtx mask, rtx_op1, rtx_op2;
+
+  gcc_assert (icode != CODE_FOR_nothing);
+
+  mask = expand_normal (op0);
+  rtx_op1 = expand_normal (op1);
+  rtx_op2 = expand_normal (op2);
+
+  mask = force_reg (mask_mode, mask);
+  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_input_operand (&ops[3], mask, mask_mode);
+  expand_insn (icode, 4, ops);
+}
+
+#define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn
+
 static void
 expand_ABNORMAL_DISPATCHER (internal_fn, gcall *)
 {
@@ -3131,6 +3216,10 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 #define direct_mask_store_optab_supported_p direct_optab_supported_p
 #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_cond_mask_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_cond_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_condu_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_condeq_optab_supported_p multi_vector_optab_supported_p
 #define direct_scatter_store_optab_supported_p convert_optab_supported_p
 #define direct_while_optab_supported_p convert_optab_supported_p
 #define direct_fold_extract_optab_supported_p direct_optab_supported_p
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 1d190d492ff..0c6fc371190 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -136,6 +136,11 @@ DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
 DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0,
 		       vec_mask_store_lanes, mask_store_lanes)
 
+DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
+DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
+DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
+DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
+
 DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
 DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW,
 		       check_raw_ptrs, check_ptrs)
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 6d0b76c13ba..184827fdf4e 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -5442,7 +5442,7 @@ get_rtx_code (enum tree_code tcode, bool unsignedp)
    first comparison operand for insn ICODE.  Do not generate the
    compare instruction itself.  */
 
-static rtx
+rtx
 vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
 		    tree t_op0, tree t_op1, bool unsignedp,
 		    enum insn_code icode, unsigned int opno)
@@ -5809,128 +5809,6 @@ expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
   return tmp;
 }
 
-/* Generate insns for a VEC_COND_EXPR with mask, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_mask_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-			   rtx target)
-{
-  class expand_operand ops[4];
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
-  enum insn_code icode = get_vcond_mask_icode (mode, mask_mode);
-  rtx mask, rtx_op1, rtx_op2;
-
-  if (icode == CODE_FOR_nothing)
-    return 0;
-
-  mask = expand_normal (op0);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  mask = force_reg (mask_mode, mask);
-  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_input_operand (&ops[3], mask, mask_mode);
-  expand_insn (icode, 4, ops);
-
-  return ops[0].value;
-}
-
-/* Generate insns for a VEC_COND_EXPR, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-		      rtx target)
-{
-  class expand_operand ops[6];
-  enum insn_code icode;
-  rtx comparison, rtx_op1, rtx_op2;
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode cmp_op_mode;
-  bool unsignedp;
-  tree op0a, op0b;
-  enum tree_code tcode;
-
-  if (COMPARISON_CLASS_P (op0))
-    {
-      op0a = TREE_OPERAND (op0, 0);
-      op0b = TREE_OPERAND (op0, 1);
-      tcode = TREE_CODE (op0);
-    }
-  else
-    {
-      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
-      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
-	  != CODE_FOR_nothing)
-	return expand_vec_cond_mask_expr (vec_cond_type, op0, op1,
-					  op2, target);
-      /* Fake op0 < 0.  */
-      else
-	{
-	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
-		      == MODE_VECTOR_INT);
-	  op0a = op0;
-	  op0b = build_zero_cst (TREE_TYPE (op0));
-	  tcode = LT_EXPR;
-	}
-    }
-  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
-  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
-
-
-  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
-	      && known_eq (GET_MODE_NUNITS (mode),
-			   GET_MODE_NUNITS (cmp_op_mode)));
-
-  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
-  if (icode == CODE_FOR_nothing)
-    {
-      if (tcode == LT_EXPR
-	  && op0a == op0
-	  && TREE_CODE (op0) == VECTOR_CST)
-	{
-	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
-	     into a constant when only get_vcond_eq_icode is supported.
-	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
-	  unsigned HOST_WIDE_INT nelts;
-	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
-	    {
-	      if (VECTOR_CST_STEPPED_P (op0))
-		return 0;
-	      nelts = vector_cst_encoded_nelts (op0);
-	    }
-	  for (unsigned int i = 0; i < nelts; ++i)
-	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
-	      return 0;
-	  tcode = NE_EXPR;
-	}
-      if (tcode == EQ_EXPR || tcode == NE_EXPR)
-	icode = get_vcond_eq_icode (mode, cmp_op_mode);
-      if (icode == CODE_FOR_nothing)
-	return 0;
-    }
-
-  comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp,
-				   icode, 4);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_fixed_operand (&ops[3], comparison);
-  create_fixed_operand (&ops[4], XEXP (comparison, 0));
-  create_fixed_operand (&ops[5], XEXP (comparison, 1));
-  expand_insn (icode, 6, ops);
-  return ops[0].value;
-}
-
 /* Generate VEC_SERIES_EXPR <OP0, OP1>, returning a value of mode VMODE.
    Use TARGET for the result if nonnull and convenient.  */
 
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 5bd19503a0a..7c2ec257cb0 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -321,9 +321,6 @@ extern rtx expand_vec_perm_const (machine_mode, rtx, rtx,
 /* Generate code for vector comparison.  */
 extern rtx expand_vec_cmp_expr (tree, tree, rtx);
 
-/* Generate code for VEC_COND_EXPR.  */
-extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
-
 /* Generate code for VEC_SERIES_EXPR.  */
 extern rtx expand_vec_series_expr (machine_mode, rtx, rtx, rtx);
 
@@ -364,5 +361,9 @@ extern void expand_jump_insn (enum insn_code icode, unsigned int nops,
 			      class expand_operand *ops);
 
 extern enum rtx_code get_rtx_code (enum tree_code tcode, bool unsignedp);
+extern rtx vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
+			       tree t_op0, tree t_op1, bool unsignedp,
+			       enum insn_code icode, unsigned int opno);
+
 
 #endif /* GCC_OPTABS_H */
diff --git a/gcc/passes.def b/gcc/passes.def
index 56322025226..2b1e09fdda3 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -399,6 +399,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_cleanup_eh);
   NEXT_PASS (pass_lower_resx);
   NEXT_PASS (pass_nrv);
+  NEXT_PASS (pass_gimple_isel);
   NEXT_PASS (pass_cleanup_cfg_post_optimizing);
   NEXT_PASS (pass_warn_function_noreturn);
   NEXT_PASS (pass_gen_hsail);
diff --git a/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C b/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C
new file mode 100644
index 00000000000..00fe2422444
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fnon-call-exceptions" } */
+
+typedef double v2df __attribute__((vector_size(16)));
+
+v2df foo (v2df a, v2df b, v2df c, v2df d)
+{
+  try
+  {
+    v2df res = a < b ? c : d;
+    return res;
+    }
+    catch (...)
+    {
+    return (v2df){};
+    }
+}
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index d06a479e570..16ff06fbf88 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4199,6 +4199,14 @@ verify_gimple_assign_ternary (gassign *stmt)
 	  debug_generic_expr (rhs1_type);
 	  return true;
 	}
+      else if (cfun->curr_properties & PROP_gimple_lvec
+	       && TREE_CODE_CLASS (TREE_CODE (rhs1)) == tcc_comparison)
+	{
+	  error ("the first argument of %<VEC_COND_EXPR%> cannot be "
+		 "a %<GENERIC%> tree comparison expression");
+	  debug_generic_expr (rhs1);
+	  return true;
+	}
       /* Fallthrough.  */
     case COND_EXPR:
       if (!is_gimple_val (rhs1)
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 396428f167f..215c8f2a337 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -627,6 +627,7 @@ extern gimple_opt_pass *make_pass_local_fn_summary (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lower_vaarg (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_gimple_isel (gcc::context *ctxt);
 
 /* Current optimization pass.  */
 extern opt_pass *current_pass;
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 494c9e9c20b..cc031e103b5 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -3136,6 +3136,12 @@ pass_forwprop::execute (function *fun)
 		    if (code == COND_EXPR
 			|| code == VEC_COND_EXPR)
 		      {
+			/* Do not propagate into VEC_COND_EXPRs after they are
+			   vector lowering pass.  */
+			if (code == VEC_COND_EXPR
+			    && (fun->curr_properties & PROP_gimple_lvec))
+			  break;
+
 			/* In this case the entire COND_EXPR is in rhs1. */
 			if (forward_propagate_into_cond (&gsi))
 			  {
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index a7fe83da0e3..fa1a4fc9846 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -24,6 +24,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "rtl.h"
 #include "tree.h"
 #include "gimple.h"
+#include "cfghooks.h"
 #include "tree-pass.h"
 #include "ssa.h"
 #include "expmed.h"
@@ -42,8 +43,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "insn-config.h"
 #include "recog.h"		/* FIXME: for insn_data */
 
+typedef std::pair<gimple *, gimple *> gimple_pair;
 
-static void expand_vector_operations_1 (gimple_stmt_iterator *);
+static void expand_vector_operations_1 (gimple_stmt_iterator *,
+					auto_vec<gimple_pair> *);
 
 /* Return the number of elements in a vector type TYPE that we have
    already decided needs to be expanded piecewise.  We don't support
@@ -694,12 +697,14 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 	  if (addend == NULL_TREE
 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
 	    {
-	      tree zero, cst, cond, mask_type;
-	      gimple *stmt;
+	      tree zero, cst, mask_type, mask;
+	      gimple *stmt, *cond;
 
 	      mask_type = truth_type_for (type);
 	      zero = build_zero_cst (type);
-	      cond = build2 (LT_EXPR, mask_type, op0, zero);
+	      mask = make_ssa_name (mask_type);
+	      cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
+	      gsi_insert_before (gsi, cond, GSI_SAME_STMT);
 	      tree_vector_builder vec (type, nunits, 1);
 	      for (i = 0; i < nunits; i++)
 		vec.quick_push (build_int_cst (TREE_TYPE (type),
@@ -707,8 +712,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 						<< shifts[i]) - 1));
 	      cst = vec.build ();
 	      addend = make_ssa_name (type);
-	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
-					  cst, zero);
+	      stmt
+		= gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
 	    }
 	}
@@ -930,7 +935,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 /* Expand a vector condition to scalars, by using many conditions
    on the vector's elements.  */
 static void
-expand_vector_condition (gimple_stmt_iterator *gsi)
+expand_vector_condition (gimple_stmt_iterator *gsi,
+			 auto_vec<gimple_pair> *moved_eh_stmts)
 {
   gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
   tree type = gimple_expr_type (stmt);
@@ -964,7 +970,23 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
     }
 
   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
-    return;
+    {
+      if (a_is_comparison)
+	{
+	  a = gimplify_build2 (gsi, TREE_CODE (a), TREE_TYPE (a), a1, a2);
+	  gimple_assign_set_rhs1 (stmt, a);
+	  gimple *assign = SSA_NAME_DEF_STMT (a);
+	  update_stmt (stmt);
+	  if (lookup_stmt_eh_lp (stmt) != 0)
+	    {
+	      maybe_clean_or_replace_eh_stmt (stmt, assign);
+	      moved_eh_stmts->safe_push (gimple_pair (stmt, assign));
+	    }
+	  return;
+	}
+      gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
+      return;
+    }
 
   /* Handle vector boolean types with bitmasks.  If there is a comparison
      and we can expand the comparison into the vector boolean bitmask,
@@ -1946,7 +1968,8 @@ expand_vector_conversion (gimple_stmt_iterator *gsi)
 /* Process one statement.  If we identify a vector operation, expand it.  */
 
 static void
-expand_vector_operations_1 (gimple_stmt_iterator *gsi)
+expand_vector_operations_1 (gimple_stmt_iterator *gsi,
+			    auto_vec<gimple_pair> *moved_eh_stmts)
 {
   tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
   enum tree_code code;
@@ -1975,7 +1998,7 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
 
   if (code == VEC_COND_EXPR)
     {
-      expand_vector_condition (gsi);
+      expand_vector_condition (gsi, moved_eh_stmts);
       return;
     }
 
@@ -2219,23 +2242,29 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
 static unsigned int
 expand_vector_operations (void)
 {
+  edge e;
+  edge_iterator ei;
   gimple_stmt_iterator gsi;
   basic_block bb;
   bool cfg_changed = false;
+  auto_vec<gimple_pair> moved_eh_stmts;
 
   FOR_EACH_BB_FN (bb, cfun)
+    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+      expand_vector_operations_1 (&gsi, &moved_eh_stmts);
+
+  for (unsigned i = 0; i < moved_eh_stmts.length (); i++)
     {
-      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-	{
-	  expand_vector_operations_1 (&gsi);
-	  /* ???  If we do not cleanup EH then we will ICE in
-	     verification.  But in reality we have created wrong-code
-	     as we did not properly transition EH info and edges to
-	     the piecewise computations.  */
-	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
-	      && gimple_purge_dead_eh_edges (bb))
-	    cfg_changed = true;
-	}
+      gimple *old_stmt = moved_eh_stmts[i].first;
+      gimple *new_stmt = moved_eh_stmts[i].second;
+      split_block (gimple_bb (new_stmt), new_stmt);
+
+      FOR_EACH_EDGE (e, ei, gimple_bb (old_stmt)->succs)
+	if (e->flags & EDGE_EH)
+	  make_edge (gimple_bb (new_stmt), e->dest, e->flags);
+
+      gimple_purge_dead_eh_edges (gimple_bb (old_stmt));
+      cfg_changed = true;
     }
 
   return cfg_changed ? TODO_cleanup_cfg : 0;
diff --git a/gcc/tree-vect-isel.c b/gcc/tree-vect-isel.c
new file mode 100644
index 00000000000..97f92080503
--- /dev/null
+++ b/gcc/tree-vect-isel.c
@@ -0,0 +1,244 @@
+/* Schedule GIMPLE vector statements.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "expmed.h"
+#include "optabs-tree.h"
+#include "tree-eh.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "gimplify.h"
+#include "tree-cfg.h"
+
+/* Expand all VEC_COND_EXPR gimple assignments into calls to internal
+   function based on type of selected expansion.  */
+
+static gimple *
+gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
+			     hash_map<tree, unsigned int> *vec_cond_ssa_name_uses)
+{
+  tree lhs, op0a = NULL_TREE, op0b = NULL_TREE;
+  enum tree_code code;
+  enum tree_code tcode;
+  machine_mode cmp_op_mode;
+  bool unsignedp;
+  enum insn_code icode;
+  imm_use_iterator imm_iter;
+
+  /* Only consider code == GIMPLE_ASSIGN.  */
+  gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi));
+  if (!stmt)
+    return NULL;
+
+  code = gimple_assign_rhs_code (stmt);
+  if (code != VEC_COND_EXPR)
+    return NULL;
+
+  tree op0 = gimple_assign_rhs1 (stmt);
+  tree op1 = gimple_assign_rhs2 (stmt);
+  tree op2 = gimple_assign_rhs3 (stmt);
+  lhs = gimple_assign_lhs (stmt);
+  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+
+  gcc_assert (!COMPARISON_CLASS_P (op0));
+  if (TREE_CODE (op0) == SSA_NAME)
+    {
+      unsigned int used_vec_cond_exprs = 0;
+      unsigned int *slot = vec_cond_ssa_name_uses->get (op0);
+      if (slot)
+	used_vec_cond_exprs = *slot;
+      else
+	{
+	  gimple *use_stmt;
+	  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, op0)
+	    {
+	      gassign *assign = dyn_cast<gassign *> (use_stmt);
+	      if (assign != NULL
+		  && gimple_assign_rhs_code (assign) == VEC_COND_EXPR
+		  && gimple_assign_rhs1 (assign) == op0)
+		used_vec_cond_exprs++;
+	    }
+	  vec_cond_ssa_name_uses->put (op0, used_vec_cond_exprs);
+	}
+
+      gassign *def_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (op0));
+      if (def_stmt)
+	{
+	  tcode = gimple_assign_rhs_code (def_stmt);
+	  op0a = gimple_assign_rhs1 (def_stmt);
+	  op0b = gimple_assign_rhs2 (def_stmt);
+
+	  tree op0a_type = TREE_TYPE (op0a);
+	  if (used_vec_cond_exprs >= 2
+	      && (get_vcond_mask_icode (mode, TYPE_MODE (op0a_type))
+		  != CODE_FOR_nothing)
+	      && expand_vec_cmp_expr_p (op0a_type, TREE_TYPE (lhs), tcode))
+	    {
+	      /* Keep the SSA name and use vcond_mask.  */
+	      tcode = TREE_CODE (op0);
+	    }
+	}
+      else
+	tcode = TREE_CODE (op0);
+    }
+  else
+    tcode = TREE_CODE (op0);
+
+  if (TREE_CODE_CLASS (tcode) != tcc_comparison)
+    {
+      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
+      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
+	  != CODE_FOR_nothing)
+	return gimple_build_call_internal (IFN_VCOND_MASK, 3, op0, op1, op2);
+      /* Fake op0 < 0.  */
+      else
+	{
+	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
+		      == MODE_VECTOR_INT);
+	  op0a = op0;
+	  op0b = build_zero_cst (TREE_TYPE (op0));
+	  tcode = LT_EXPR;
+	}
+    }
+  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
+  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
+
+
+  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
+	      && known_eq (GET_MODE_NUNITS (mode),
+			   GET_MODE_NUNITS (cmp_op_mode)));
+
+  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
+  if (icode == CODE_FOR_nothing)
+    {
+      if (tcode == LT_EXPR
+	  && op0a == op0
+	  && TREE_CODE (op0) == VECTOR_CST)
+	{
+	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
+	     into a constant when only get_vcond_eq_icode is supported.
+	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
+	  unsigned HOST_WIDE_INT nelts;
+	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
+	    {
+	      if (VECTOR_CST_STEPPED_P (op0))
+		gcc_unreachable ();
+	      nelts = vector_cst_encoded_nelts (op0);
+	    }
+	  for (unsigned int i = 0; i < nelts; ++i)
+	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
+	      gcc_unreachable ();
+	  tcode = NE_EXPR;
+	}
+      if (tcode == EQ_EXPR || tcode == NE_EXPR)
+	{
+	  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+	  return gimple_build_call_internal (IFN_VCONDEQ, 5, op0a, op0b, op1,
+					     op2, tcode_tree);
+	}
+    }
+
+  gcc_assert (icode != CODE_FOR_nothing);
+  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+  return gimple_build_call_internal (unsignedp ? IFN_VCONDU : IFN_VCOND,
+				     5, op0a, op0b, op1, op2, tcode_tree);
+}
+
+
+
+/* Iterate all gimple statements and try to expand
+   VEC_COND_EXPR assignments.  */
+
+static unsigned int
+gimple_expand_vec_cond_exprs (void)
+{
+  gimple_stmt_iterator gsi;
+  basic_block bb;
+  bool cfg_changed = false;
+  hash_map<tree, unsigned int> vec_cond_ssa_name_uses;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	{
+	  gimple *g = gimple_expand_vec_cond_expr (&gsi,
+						   &vec_cond_ssa_name_uses);
+	  if (g != NULL)
+	    {
+	      tree lhs = gimple_assign_lhs (gsi_stmt (gsi));
+	      gimple_set_lhs (g, lhs);
+	      gsi_replace (&gsi, g, false);
+	    }
+	}
+    }
+
+  return cfg_changed ? TODO_cleanup_cfg : 0;
+}
+
+namespace {
+
+const pass_data pass_data_gimple_isel =
+{
+  GIMPLE_PASS, /* type */
+  "isel", /* name */
+  OPTGROUP_VEC, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_cfg, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_gimple_isel : public gimple_opt_pass
+{
+public:
+  pass_gimple_isel (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_gimple_isel, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return true;
+    }
+
+  virtual unsigned int execute (function *)
+    {
+      return gimple_expand_vec_cond_exprs ();
+    }
+
+}; // class pass_gimple_isel
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_gimple_isel (gcc::context *ctxt)
+{
+  return new pass_gimple_isel (ctxt);
+}
+
-- 
2.26.2


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-11  8:52                                                     ` Martin Liška
@ 2020-06-12  9:43                                                       ` Richard Biener
  2020-06-12 13:24                                                         ` Martin Liška
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Biener @ 2020-06-12  9:43 UTC (permalink / raw)
  To: Martin Liška
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

On Thu, Jun 11, 2020 at 10:52 AM Martin Liška <mliska@suse.cz> wrote:
>
> On 6/9/20 3:42 PM, Richard Biener wrote:
> > I think we need to fix that before merging.
>
> There's updated version of the patch that should handle the EH properly.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?

I think it would be better to avoid creating of a new edge and removing the old
by simply moving the condition stmt to the normal outgoing edge.  Thus in
expand_vector_condition do

   if (lookup_stmt_eh_lp (stmt) != 0)
     {
        maybe_clean_or_replace_eh_stmt (stmt, assign);
        gsi_remove (gsi, false);
        edge_iterator ei;
        edge e;
        FOR_EACH_EDGE (e, ei, gimple_bb (assign)->succs)
           if (e->flags & EDGE_EH)
             break;
        if (e)
          {
             gsi_remove (gsi, false);
             gsi_insert_on_edge_immediate (e, stmt);
          }
        else
           gsi_remove (gsi, true);
     }

a twist is probably the following which shows how we wrongly
make 'res' available in the catch block.  The above would
break (your variant as well) since SSA form is broken afterwards.
This of course solves itself when we not start with the broken
IL in the first place.  We could also try to "solve" this in the
SSA renamer, but then I'm not sure if gimplification doesn't
already break it.

typedef double v2df __attribute__((vector_size(16)));

v2df foo (v2df a, v2df b, v2df c, v2df d)
{
 v2df res;
  try
  {
     res = a < b ? c : d;
    return res; // replace with gcc_unreachable () for more twists
    }
    catch (...)
    {
    return res;
    }
}

So ... how far are you with enforcing a split VEC_COND_EXPR?
Thus can we avoid the above completely (even as intermediate
state)?

Thanks,
Richard.

> Thanks,
> Martin

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-12  9:43                                                       ` Richard Biener
@ 2020-06-12 13:24                                                         ` Martin Liška
  2020-06-15  7:14                                                           ` Richard Biener
  0 siblings, 1 reply; 65+ messages in thread
From: Martin Liška @ 2020-06-12 13:24 UTC (permalink / raw)
  To: Richard Biener
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

[-- Attachment #1: Type: text/plain, Size: 896 bytes --]

On 6/12/20 11:43 AM, Richard Biener wrote:
> So ... how far are you with enforcing a split VEC_COND_EXPR?
> Thus can we avoid the above completely (even as intermediate
> state)?

Apparently, I'm quite close. Using the attached patch I see only 2 testsuite
failures:

FAIL: gcc.dg/tree-ssa/pr68714.c scan-tree-dump-times reassoc1 " <= " 1
FAIL: gcc.target/i386/pr78102.c scan-assembler-times pcmpeqq 3

The first one is about teaching reassoc about the SSA_NAMEs in VEC_COND_EXPR. I haven't
analyze the second failure.

I'm also not sure about the gimlification change, I see a superfluous assignments:
   vec_cond_cmp.5 = _1 == _2;
   vec_cond_cmp.6 = vec_cond_cmp.5;
   vec_cond_cmp.7 = vec_cond_cmp.6;
   _3 = VEC_COND_EXPR <vec_cond_cmp.7, { -1, -1, -1, -1, -1, -1, -1, -1 }, { 0, 0, 0, 0, 0, 0, 0, 0 }>;
?

So with the suggested patch, the EH should be gone as you suggested. Right?

Martin

[-- Attachment #2: 0001-Lower-VEC_COND_EXPR-into-internal-functions.patch --]
[-- Type: text/x-patch, Size: 30367 bytes --]

From 3589a849ba63d7289e547b44c9f7349ee23ee2ca Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Mon, 9 Mar 2020 13:23:03 +0100
Subject: [PATCH] Lower VEC_COND_EXPR into internal functions.

gcc/ChangeLog:

2020-03-30  Martin Liska  <mliska@suse.cz>

	* expr.c (expand_expr_real_2): Put gcc_unreachable, we should reach
	this path.
	(do_store_flag): Likewise here.
	* internal-fn.c (vec_cond_mask_direct): New.
	(vec_cond_direct): Likewise.
	(vec_condu_direct): Likewise.
	(vec_condeq_direct): Likewise.
	(expand_vect_cond_optab_fn): Move from optabs.c.
	(expand_vec_cond_optab_fn): New alias.
	(expand_vec_condu_optab_fn): Likewise.
	(expand_vec_condeq_optab_fn): Likewise.
	(expand_vect_cond_mask_optab_fn): Moved from optabs.c.
	(expand_vec_cond_mask_optab_fn): New alias.
	(direct_vec_cond_mask_optab_supported_p): New.
	(direct_vec_cond_optab_supported_p): Likewise.
	(direct_vec_condu_optab_supported_p): Likewise.
	(direct_vec_condeq_optab_supported_p): Likewise.
	* internal-fn.def (VCOND): New new internal optab
	function.
	(VCONDU): Likewise.
	(VCONDEQ): Likewise.
	(VCOND_MASK): Likewise.
	* optabs.c (expand_vec_cond_mask_expr): Removed.
	(expand_vec_cond_expr): Likewise.
	* optabs.h (expand_vec_cond_expr): Likewise.
	(vector_compare_rtx): Likewise.
	* passes.def: Add pass_gimple_isel.
	* tree-cfg.c (verify_gimple_assign_ternary): Add new
	GIMPLE check.
	* tree-pass.h (make_pass_gimple_isel): New.
	* tree-ssa-forwprop.c (pass_forwprop::execute): Do not forward
	VEC_COND_EXPR.
	* tree-vect-generic.c (expand_vector_divmod): Expand to SSA_NAME.
	* tree-vect-isel.c: New file.
	* Makefile.in: Add the file.
	* gimplify.c (gimplify_expr): Assign first argument of
	VEC_COND_EXPR to a SSA name.
	* tree-vect-stmts.c (vectorizable_condition): Assign first
	argument of VEC_COND_EXPR to a SSA name.

gcc/testsuite/ChangeLog:

	* g++.dg/vect/vec-cond-expr-eh.C: New test.
---
 gcc/Makefile.in                              |   2 +
 gcc/expr.c                                   |  25 +-
 gcc/gimplify.c                               |   6 +-
 gcc/internal-fn.c                            |  89 +++++++
 gcc/internal-fn.def                          |   5 +
 gcc/optabs.c                                 | 124 +---------
 gcc/optabs.h                                 |   7 +-
 gcc/passes.def                               |   1 +
 gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C |  17 ++
 gcc/tree-cfg.c                               |   7 +
 gcc/tree-pass.h                              |   1 +
 gcc/tree-ssa-forwprop.c                      |   4 +
 gcc/tree-vect-generic.c                      |  35 ++-
 gcc/tree-vect-isel.c                         | 244 +++++++++++++++++++
 gcc/tree-vect-stmts.c                        |   4 +-
 15 files changed, 399 insertions(+), 172 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C
 create mode 100644 gcc/tree-vect-isel.c

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 4f70c189b9d..4cbb9d23606 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1631,6 +1631,7 @@ OBJS = \
 	tree-streamer-out.o \
 	tree-tailcall.o \
 	tree-vect-generic.o \
+	tree-vect-isel.o \
 	tree-vect-patterns.o \
 	tree-vect-data-refs.o \
 	tree-vect-stmts.o \
@@ -2600,6 +2601,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
   $(srcdir)/dwarf2cfi.c \
   $(srcdir)/dwarf2out.c \
   $(srcdir)/tree-vect-generic.c \
+  $(srcdir)/tree-vect-isel.c \
   $(srcdir)/dojump.c $(srcdir)/emit-rtl.h \
   $(srcdir)/emit-rtl.c $(srcdir)/except.h $(srcdir)/explow.c $(srcdir)/expr.c \
   $(srcdir)/expr.h \
diff --git a/gcc/expr.c b/gcc/expr.c
index ca6b1c1291e..3c68b0d754c 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9316,17 +9316,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
       if (temp != 0)
 	return temp;
 
-      /* For vector MIN <x, y>, expand it a VEC_COND_EXPR <x <= y, x, y>
-	 and similarly for MAX <x, y>.  */
       if (VECTOR_TYPE_P (type))
-	{
-	  tree t0 = make_tree (type, op0);
-	  tree t1 = make_tree (type, op1);
-	  tree comparison = build2 (code == MIN_EXPR ? LE_EXPR : GE_EXPR,
-				    type, t0, t1);
-	  return expand_vec_cond_expr (type, comparison, t0, t1,
-				       original_target);
-	}
+	gcc_unreachable ();
 
       /* At this point, a MEM target is no longer useful; we will get better
 	 code without it.  */
@@ -9915,10 +9906,6 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 	return temp;
       }
 
-    case VEC_COND_EXPR:
-      target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
-      return target;
-
     case VEC_DUPLICATE_EXPR:
       op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier);
       target = expand_vector_broadcast (mode, op0);
@@ -12249,8 +12236,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
   STRIP_NOPS (arg1);
 
   /* For vector typed comparisons emit code to generate the desired
-     all-ones or all-zeros mask.  Conveniently use the VEC_COND_EXPR
-     expander for this.  */
+     all-ones or all-zeros mask.  */
   if (TREE_CODE (ops->type) == VECTOR_TYPE)
     {
       tree ifexp = build2 (ops->code, ops->type, arg0, arg1);
@@ -12258,12 +12244,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
 	  && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type, ops->code))
 	return expand_vec_cmp_expr (ops->type, ifexp, target);
       else
-	{
-	  tree if_true = constant_boolean_node (true, ops->type);
-	  tree if_false = constant_boolean_node (false, ops->type);
-	  return expand_vec_cond_expr (ops->type, ifexp, if_true,
-				       if_false, target);
-	}
+	gcc_unreachable ();
     }
 
   /* Optimize (x % C1) == C2 or (x % C1) != C2 if it is beneficial
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index e14932fafaf..745d21f0006 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -14221,9 +14221,13 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	case VEC_COND_EXPR:
 	  {
 	    enum gimplify_status r0, r1, r2;
-
 	    r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
 				post_p, is_gimple_condexpr, fb_rvalue);
+	    tree xop0 = TREE_OPERAND (*expr_p, 0);
+	    tmp = create_tmp_var_raw (TREE_TYPE (xop0), "vec_cond_cmp");
+	    gimple_add_tmp_var (tmp);
+	    gimplify_assign (tmp, xop0, pre_p);
+	    TREE_OPERAND (*expr_p, 0) = tmp;
 	    r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
 				post_p, is_gimple_val, fb_rvalue);
 	    r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p,
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 5e9aa60721e..644f234e087 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-ssa.h"
 #include "tree-phinodes.h"
 #include "ssa-iterators.h"
+#include "explow.h"
 
 /* The names of each internal function, indexed by function number.  */
 const char *const internal_fn_name_array[] = {
@@ -107,6 +108,10 @@ init_internal_fns ()
 #define mask_store_direct { 3, 2, false }
 #define store_lanes_direct { 0, 0, false }
 #define mask_store_lanes_direct { 0, 0, false }
+#define vec_cond_mask_direct { 0, 0, false }
+#define vec_cond_direct { 0, 0, false }
+#define vec_condu_direct { 0, 0, false }
+#define vec_condeq_direct { 0, 0, false }
 #define scatter_store_direct { 3, 1, false }
 #define unary_direct { 0, 0, true }
 #define binary_direct { 0, 0, true }
@@ -2548,6 +2553,86 @@ expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 
 #define expand_mask_store_lanes_optab_fn expand_mask_store_optab_fn
 
+/* Expand VCOND, VCONDU and VCONDEQ optab internal functions.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+  class expand_operand ops[6];
+  insn_code icode;
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0a = gimple_call_arg (stmt, 0);
+  tree op0b = gimple_call_arg (stmt, 1);
+  tree op1 = gimple_call_arg (stmt, 2);
+  tree op2 = gimple_call_arg (stmt, 3);
+  enum tree_code tcode = (tree_code) int_cst_value (gimple_call_arg (stmt, 4));
+
+  tree vec_cond_type = TREE_TYPE (lhs);
+  tree op_mode = TREE_TYPE (op0a);
+  bool unsignedp = TYPE_UNSIGNED (op_mode);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode cmp_op_mode = TYPE_MODE (op_mode);
+
+  icode = convert_optab_handler (optab, mode, cmp_op_mode);
+  rtx comparison
+    = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp, icode, 4);
+  rtx rtx_op1 = expand_normal (op1);
+  rtx rtx_op2 = expand_normal (op2);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_fixed_operand (&ops[3], comparison);
+  create_fixed_operand (&ops[4], XEXP (comparison, 0));
+  create_fixed_operand (&ops[5], XEXP (comparison, 1));
+  expand_insn (icode, 6, ops);
+}
+
+#define expand_vec_cond_optab_fn expand_vect_cond_optab_fn
+#define expand_vec_condu_optab_fn expand_vect_cond_optab_fn
+#define expand_vec_condeq_optab_fn expand_vect_cond_optab_fn
+
+/* Expand VCOND_MASK optab internal function.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+  class expand_operand ops[4];
+
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0 = gimple_call_arg (stmt, 0);
+  tree op1 = gimple_call_arg (stmt, 1);
+  tree op2 = gimple_call_arg (stmt, 2);
+  tree vec_cond_type = TREE_TYPE (lhs);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
+  enum insn_code icode = convert_optab_handler (optab, mode, mask_mode);
+  rtx mask, rtx_op1, rtx_op2;
+
+  gcc_assert (icode != CODE_FOR_nothing);
+
+  mask = expand_normal (op0);
+  rtx_op1 = expand_normal (op1);
+  rtx_op2 = expand_normal (op2);
+
+  mask = force_reg (mask_mode, mask);
+  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_input_operand (&ops[3], mask, mask_mode);
+  expand_insn (icode, 4, ops);
+}
+
+#define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn
+
 static void
 expand_ABNORMAL_DISPATCHER (internal_fn, gcall *)
 {
@@ -3131,6 +3216,10 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 #define direct_mask_store_optab_supported_p direct_optab_supported_p
 #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_cond_mask_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_cond_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_condu_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_condeq_optab_supported_p multi_vector_optab_supported_p
 #define direct_scatter_store_optab_supported_p convert_optab_supported_p
 #define direct_while_optab_supported_p convert_optab_supported_p
 #define direct_fold_extract_optab_supported_p direct_optab_supported_p
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 1d190d492ff..0c6fc371190 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -136,6 +136,11 @@ DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
 DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0,
 		       vec_mask_store_lanes, mask_store_lanes)
 
+DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
+DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
+DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
+DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
+
 DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
 DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW,
 		       check_raw_ptrs, check_ptrs)
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 6d0b76c13ba..184827fdf4e 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -5442,7 +5442,7 @@ get_rtx_code (enum tree_code tcode, bool unsignedp)
    first comparison operand for insn ICODE.  Do not generate the
    compare instruction itself.  */
 
-static rtx
+rtx
 vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
 		    tree t_op0, tree t_op1, bool unsignedp,
 		    enum insn_code icode, unsigned int opno)
@@ -5809,128 +5809,6 @@ expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
   return tmp;
 }
 
-/* Generate insns for a VEC_COND_EXPR with mask, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_mask_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-			   rtx target)
-{
-  class expand_operand ops[4];
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
-  enum insn_code icode = get_vcond_mask_icode (mode, mask_mode);
-  rtx mask, rtx_op1, rtx_op2;
-
-  if (icode == CODE_FOR_nothing)
-    return 0;
-
-  mask = expand_normal (op0);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  mask = force_reg (mask_mode, mask);
-  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_input_operand (&ops[3], mask, mask_mode);
-  expand_insn (icode, 4, ops);
-
-  return ops[0].value;
-}
-
-/* Generate insns for a VEC_COND_EXPR, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-		      rtx target)
-{
-  class expand_operand ops[6];
-  enum insn_code icode;
-  rtx comparison, rtx_op1, rtx_op2;
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode cmp_op_mode;
-  bool unsignedp;
-  tree op0a, op0b;
-  enum tree_code tcode;
-
-  if (COMPARISON_CLASS_P (op0))
-    {
-      op0a = TREE_OPERAND (op0, 0);
-      op0b = TREE_OPERAND (op0, 1);
-      tcode = TREE_CODE (op0);
-    }
-  else
-    {
-      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
-      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
-	  != CODE_FOR_nothing)
-	return expand_vec_cond_mask_expr (vec_cond_type, op0, op1,
-					  op2, target);
-      /* Fake op0 < 0.  */
-      else
-	{
-	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
-		      == MODE_VECTOR_INT);
-	  op0a = op0;
-	  op0b = build_zero_cst (TREE_TYPE (op0));
-	  tcode = LT_EXPR;
-	}
-    }
-  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
-  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
-
-
-  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
-	      && known_eq (GET_MODE_NUNITS (mode),
-			   GET_MODE_NUNITS (cmp_op_mode)));
-
-  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
-  if (icode == CODE_FOR_nothing)
-    {
-      if (tcode == LT_EXPR
-	  && op0a == op0
-	  && TREE_CODE (op0) == VECTOR_CST)
-	{
-	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
-	     into a constant when only get_vcond_eq_icode is supported.
-	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
-	  unsigned HOST_WIDE_INT nelts;
-	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
-	    {
-	      if (VECTOR_CST_STEPPED_P (op0))
-		return 0;
-	      nelts = vector_cst_encoded_nelts (op0);
-	    }
-	  for (unsigned int i = 0; i < nelts; ++i)
-	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
-	      return 0;
-	  tcode = NE_EXPR;
-	}
-      if (tcode == EQ_EXPR || tcode == NE_EXPR)
-	icode = get_vcond_eq_icode (mode, cmp_op_mode);
-      if (icode == CODE_FOR_nothing)
-	return 0;
-    }
-
-  comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp,
-				   icode, 4);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_fixed_operand (&ops[3], comparison);
-  create_fixed_operand (&ops[4], XEXP (comparison, 0));
-  create_fixed_operand (&ops[5], XEXP (comparison, 1));
-  expand_insn (icode, 6, ops);
-  return ops[0].value;
-}
-
 /* Generate VEC_SERIES_EXPR <OP0, OP1>, returning a value of mode VMODE.
    Use TARGET for the result if nonnull and convenient.  */
 
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 5bd19503a0a..7c2ec257cb0 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -321,9 +321,6 @@ extern rtx expand_vec_perm_const (machine_mode, rtx, rtx,
 /* Generate code for vector comparison.  */
 extern rtx expand_vec_cmp_expr (tree, tree, rtx);
 
-/* Generate code for VEC_COND_EXPR.  */
-extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
-
 /* Generate code for VEC_SERIES_EXPR.  */
 extern rtx expand_vec_series_expr (machine_mode, rtx, rtx, rtx);
 
@@ -364,5 +361,9 @@ extern void expand_jump_insn (enum insn_code icode, unsigned int nops,
 			      class expand_operand *ops);
 
 extern enum rtx_code get_rtx_code (enum tree_code tcode, bool unsignedp);
+extern rtx vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
+			       tree t_op0, tree t_op1, bool unsignedp,
+			       enum insn_code icode, unsigned int opno);
+
 
 #endif /* GCC_OPTABS_H */
diff --git a/gcc/passes.def b/gcc/passes.def
index 56322025226..2b1e09fdda3 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -399,6 +399,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_cleanup_eh);
   NEXT_PASS (pass_lower_resx);
   NEXT_PASS (pass_nrv);
+  NEXT_PASS (pass_gimple_isel);
   NEXT_PASS (pass_cleanup_cfg_post_optimizing);
   NEXT_PASS (pass_warn_function_noreturn);
   NEXT_PASS (pass_gen_hsail);
diff --git a/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C b/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C
new file mode 100644
index 00000000000..00fe2422444
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fnon-call-exceptions" } */
+
+typedef double v2df __attribute__((vector_size(16)));
+
+v2df foo (v2df a, v2df b, v2df c, v2df d)
+{
+  try
+  {
+    v2df res = a < b ? c : d;
+    return res;
+    }
+    catch (...)
+    {
+    return (v2df){};
+    }
+}
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index d06a479e570..943f1531b89 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4199,6 +4199,13 @@ verify_gimple_assign_ternary (gassign *stmt)
 	  debug_generic_expr (rhs1_type);
 	  return true;
 	}
+      else if (TREE_CODE_CLASS (TREE_CODE (rhs1)) == tcc_comparison)
+	{
+	  error ("the first argument of %<VEC_COND_EXPR%> cannot be "
+	 "a %<GENERIC%> tree comparison expression");
+	  debug_generic_expr (rhs1);
+	  return true;
+	}
       /* Fallthrough.  */
     case COND_EXPR:
       if (!is_gimple_val (rhs1)
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 396428f167f..215c8f2a337 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -627,6 +627,7 @@ extern gimple_opt_pass *make_pass_local_fn_summary (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lower_vaarg (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_gimple_isel (gcc::context *ctxt);
 
 /* Current optimization pass.  */
 extern opt_pass *current_pass;
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 494c9e9c20b..090fb52a2f1 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -3136,6 +3136,10 @@ pass_forwprop::execute (function *fun)
 		    if (code == COND_EXPR
 			|| code == VEC_COND_EXPR)
 		      {
+			/* Do not propagate into VEC_COND_EXPRs.  */
+			if (code == VEC_COND_EXPR)
+			  break;
+
 			/* In this case the entire COND_EXPR is in rhs1. */
 			if (forward_propagate_into_cond (&gsi))
 			  {
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index a7fe83da0e3..99d650fef6d 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -694,12 +694,14 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 	  if (addend == NULL_TREE
 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
 	    {
-	      tree zero, cst, cond, mask_type;
-	      gimple *stmt;
+	      tree zero, cst, mask_type, mask;
+	      gimple *stmt, *cond;
 
 	      mask_type = truth_type_for (type);
 	      zero = build_zero_cst (type);
-	      cond = build2 (LT_EXPR, mask_type, op0, zero);
+	      mask = make_ssa_name (mask_type);
+	      cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
+	      gsi_insert_before (gsi, cond, GSI_SAME_STMT);
 	      tree_vector_builder vec (type, nunits, 1);
 	      for (i = 0; i < nunits; i++)
 		vec.quick_push (build_int_cst (TREE_TYPE (type),
@@ -707,8 +709,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 						<< shifts[i]) - 1));
 	      cst = vec.build ();
 	      addend = make_ssa_name (type);
-	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
-					  cst, zero);
+	      stmt
+		= gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
 	    }
 	}
@@ -964,7 +966,10 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
     }
 
   if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
-    return;
+    {
+      gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
+      return;
+    }
 
   /* Handle vector boolean types with bitmasks.  If there is a comparison
      and we can expand the comparison into the vector boolean bitmask,
@@ -2221,24 +2226,12 @@ expand_vector_operations (void)
 {
   gimple_stmt_iterator gsi;
   basic_block bb;
-  bool cfg_changed = false;
 
   FOR_EACH_BB_FN (bb, cfun)
-    {
-      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-	{
-	  expand_vector_operations_1 (&gsi);
-	  /* ???  If we do not cleanup EH then we will ICE in
-	     verification.  But in reality we have created wrong-code
-	     as we did not properly transition EH info and edges to
-	     the piecewise computations.  */
-	  if (maybe_clean_eh_stmt (gsi_stmt (gsi))
-	      && gimple_purge_dead_eh_edges (bb))
-	    cfg_changed = true;
-	}
-    }
+    for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+      expand_vector_operations_1 (&gsi);
 
-  return cfg_changed ? TODO_cleanup_cfg : 0;
+  return 0;
 }
 
 namespace {
diff --git a/gcc/tree-vect-isel.c b/gcc/tree-vect-isel.c
new file mode 100644
index 00000000000..97f92080503
--- /dev/null
+++ b/gcc/tree-vect-isel.c
@@ -0,0 +1,244 @@
+/* Schedule GIMPLE vector statements.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "expmed.h"
+#include "optabs-tree.h"
+#include "tree-eh.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "gimplify.h"
+#include "tree-cfg.h"
+
+/* Expand all VEC_COND_EXPR gimple assignments into calls to internal
+   function based on type of selected expansion.  */
+
+static gimple *
+gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
+			     hash_map<tree, unsigned int> *vec_cond_ssa_name_uses)
+{
+  tree lhs, op0a = NULL_TREE, op0b = NULL_TREE;
+  enum tree_code code;
+  enum tree_code tcode;
+  machine_mode cmp_op_mode;
+  bool unsignedp;
+  enum insn_code icode;
+  imm_use_iterator imm_iter;
+
+  /* Only consider code == GIMPLE_ASSIGN.  */
+  gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi));
+  if (!stmt)
+    return NULL;
+
+  code = gimple_assign_rhs_code (stmt);
+  if (code != VEC_COND_EXPR)
+    return NULL;
+
+  tree op0 = gimple_assign_rhs1 (stmt);
+  tree op1 = gimple_assign_rhs2 (stmt);
+  tree op2 = gimple_assign_rhs3 (stmt);
+  lhs = gimple_assign_lhs (stmt);
+  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+
+  gcc_assert (!COMPARISON_CLASS_P (op0));
+  if (TREE_CODE (op0) == SSA_NAME)
+    {
+      unsigned int used_vec_cond_exprs = 0;
+      unsigned int *slot = vec_cond_ssa_name_uses->get (op0);
+      if (slot)
+	used_vec_cond_exprs = *slot;
+      else
+	{
+	  gimple *use_stmt;
+	  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, op0)
+	    {
+	      gassign *assign = dyn_cast<gassign *> (use_stmt);
+	      if (assign != NULL
+		  && gimple_assign_rhs_code (assign) == VEC_COND_EXPR
+		  && gimple_assign_rhs1 (assign) == op0)
+		used_vec_cond_exprs++;
+	    }
+	  vec_cond_ssa_name_uses->put (op0, used_vec_cond_exprs);
+	}
+
+      gassign *def_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (op0));
+      if (def_stmt)
+	{
+	  tcode = gimple_assign_rhs_code (def_stmt);
+	  op0a = gimple_assign_rhs1 (def_stmt);
+	  op0b = gimple_assign_rhs2 (def_stmt);
+
+	  tree op0a_type = TREE_TYPE (op0a);
+	  if (used_vec_cond_exprs >= 2
+	      && (get_vcond_mask_icode (mode, TYPE_MODE (op0a_type))
+		  != CODE_FOR_nothing)
+	      && expand_vec_cmp_expr_p (op0a_type, TREE_TYPE (lhs), tcode))
+	    {
+	      /* Keep the SSA name and use vcond_mask.  */
+	      tcode = TREE_CODE (op0);
+	    }
+	}
+      else
+	tcode = TREE_CODE (op0);
+    }
+  else
+    tcode = TREE_CODE (op0);
+
+  if (TREE_CODE_CLASS (tcode) != tcc_comparison)
+    {
+      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
+      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
+	  != CODE_FOR_nothing)
+	return gimple_build_call_internal (IFN_VCOND_MASK, 3, op0, op1, op2);
+      /* Fake op0 < 0.  */
+      else
+	{
+	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
+		      == MODE_VECTOR_INT);
+	  op0a = op0;
+	  op0b = build_zero_cst (TREE_TYPE (op0));
+	  tcode = LT_EXPR;
+	}
+    }
+  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
+  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
+
+
+  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
+	      && known_eq (GET_MODE_NUNITS (mode),
+			   GET_MODE_NUNITS (cmp_op_mode)));
+
+  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
+  if (icode == CODE_FOR_nothing)
+    {
+      if (tcode == LT_EXPR
+	  && op0a == op0
+	  && TREE_CODE (op0) == VECTOR_CST)
+	{
+	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
+	     into a constant when only get_vcond_eq_icode is supported.
+	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
+	  unsigned HOST_WIDE_INT nelts;
+	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
+	    {
+	      if (VECTOR_CST_STEPPED_P (op0))
+		gcc_unreachable ();
+	      nelts = vector_cst_encoded_nelts (op0);
+	    }
+	  for (unsigned int i = 0; i < nelts; ++i)
+	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
+	      gcc_unreachable ();
+	  tcode = NE_EXPR;
+	}
+      if (tcode == EQ_EXPR || tcode == NE_EXPR)
+	{
+	  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+	  return gimple_build_call_internal (IFN_VCONDEQ, 5, op0a, op0b, op1,
+					     op2, tcode_tree);
+	}
+    }
+
+  gcc_assert (icode != CODE_FOR_nothing);
+  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+  return gimple_build_call_internal (unsignedp ? IFN_VCONDU : IFN_VCOND,
+				     5, op0a, op0b, op1, op2, tcode_tree);
+}
+
+
+
+/* Iterate all gimple statements and try to expand
+   VEC_COND_EXPR assignments.  */
+
+static unsigned int
+gimple_expand_vec_cond_exprs (void)
+{
+  gimple_stmt_iterator gsi;
+  basic_block bb;
+  bool cfg_changed = false;
+  hash_map<tree, unsigned int> vec_cond_ssa_name_uses;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	{
+	  gimple *g = gimple_expand_vec_cond_expr (&gsi,
+						   &vec_cond_ssa_name_uses);
+	  if (g != NULL)
+	    {
+	      tree lhs = gimple_assign_lhs (gsi_stmt (gsi));
+	      gimple_set_lhs (g, lhs);
+	      gsi_replace (&gsi, g, false);
+	    }
+	}
+    }
+
+  return cfg_changed ? TODO_cleanup_cfg : 0;
+}
+
+namespace {
+
+const pass_data pass_data_gimple_isel =
+{
+  GIMPLE_PASS, /* type */
+  "isel", /* name */
+  OPTGROUP_VEC, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_cfg, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_gimple_isel : public gimple_opt_pass
+{
+public:
+  pass_gimple_isel (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_gimple_isel, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return true;
+    }
+
+  virtual unsigned int execute (function *)
+    {
+      return gimple_expand_vec_cond_exprs ();
+    }
+
+}; // class pass_gimple_isel
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_gimple_isel (gcc::context *ctxt)
+{
+  return new pass_gimple_isel (ctxt);
+}
+
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index cdd6f6c5e5d..5c98edbda80 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -9936,8 +9936,8 @@ vectorizable_condition (vec_info *vinfo,
 	{
 	  vec_cond_rhs = vec_oprnds1[i];
 	  if (bitop1 == NOP_EXPR)
-	    vec_compare = build2 (cond_code, vec_cmp_type,
-				  vec_cond_lhs, vec_cond_rhs);
+	    vec_compare = gimplify_build2 (gsi, cond_code, vec_cmp_type,
+					   vec_cond_lhs, vec_cond_rhs);
 	  else
 	    {
 	      new_temp = make_ssa_name (vec_cmp_type);
-- 
2.26.2


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-12 13:24                                                         ` Martin Liška
@ 2020-06-15  7:14                                                           ` Richard Biener
  2020-06-15 11:19                                                             ` Martin Liška
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Biener @ 2020-06-15  7:14 UTC (permalink / raw)
  To: Martin Liška
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

On Fri, Jun 12, 2020 at 3:24 PM Martin Liška <mliska@suse.cz> wrote:
>
> On 6/12/20 11:43 AM, Richard Biener wrote:
> > So ... how far are you with enforcing a split VEC_COND_EXPR?
> > Thus can we avoid the above completely (even as intermediate
> > state)?
>
> Apparently, I'm quite close. Using the attached patch I see only 2 testsuite
> failures:
>
> FAIL: gcc.dg/tree-ssa/pr68714.c scan-tree-dump-times reassoc1 " <= " 1
> FAIL: gcc.target/i386/pr78102.c scan-assembler-times pcmpeqq 3
>
> The first one is about teaching reassoc about the SSA_NAMEs in VEC_COND_EXPR. I haven't
> analyze the second failure.
>
> I'm also not sure about the gimlification change, I see a superfluous assignments:
>    vec_cond_cmp.5 = _1 == _2;
>    vec_cond_cmp.6 = vec_cond_cmp.5;
>    vec_cond_cmp.7 = vec_cond_cmp.6;
>    _3 = VEC_COND_EXPR <vec_cond_cmp.7, { -1, -1, -1, -1, -1, -1, -1, -1 }, { 0, 0, 0, 0, 0, 0, 0, 0 }>;
> ?
>
> So with the suggested patch, the EH should be gone as you suggested. Right?

Right, it should be on the comparison already from the start.

@@ -14221,9 +14221,13 @@ gimplify_expr (tree *expr_p, gimple_seq
*pre_p, gimple_seq *post_p,
        case VEC_COND_EXPR:
          {
            enum gimplify_status r0, r1, r2;
-
            r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
                                post_p, is_gimple_condexpr, fb_rvalue);
+           tree xop0 = TREE_OPERAND (*expr_p, 0);
+           tmp = create_tmp_var_raw (TREE_TYPE (xop0), "vec_cond_cmp");
+           gimple_add_tmp_var (tmp);
+           gimplify_assign (tmp, xop0, pre_p);
+           TREE_OPERAND (*expr_p, 0) = tmp;
            r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
                                post_p, is_gimple_val, fb_rvalue);

all of VEC_COND_EXPR can now be a simple goto expr_3;

diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 494c9e9c20b..090fb52a2f1 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -3136,6 +3136,10 @@ pass_forwprop::execute (function *fun)
                    if (code == COND_EXPR
                        || code == VEC_COND_EXPR)
                      {
+                       /* Do not propagate into VEC_COND_EXPRs.  */
+                       if (code == VEC_COND_EXPR)
+                         break;
+

err - remove the || code == VEC_COND_EXPR instead?

@@ -2221,24 +2226,12 @@ expand_vector_operations (void)
 {
   gimple_stmt_iterator gsi;
   basic_block bb;
-  bool cfg_changed = false;

   FOR_EACH_BB_FN (bb, cfun)
-    {
-      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-       {
-         expand_vector_operations_1 (&gsi);
-         /* ???  If we do not cleanup EH then we will ICE in
-            verification.  But in reality we have created wrong-code
-            as we did not properly transition EH info and edges to
-            the piecewise computations.  */
-         if (maybe_clean_eh_stmt (gsi_stmt (gsi))
-             && gimple_purge_dead_eh_edges (bb))
-           cfg_changed = true;
-       }
-    }

I'm not sure about this.  Consider the C++ testcase where
the ?: is replaced by a division.  If veclower needs to replace
that with four scalrar division statements then the above
still applies - veclower does not correctly duplicate EH info
and EH edges to the individual divisions (and we do not know
which component might trap).

So please leave the above in.  You can try if using integer
division makes it break and add such a testcase if there's
no coverage for this in the testsuite.

What's missing from the patch is adjusting
verify_gimple_assign_ternary from

  if (((rhs_code == VEC_COND_EXPR || rhs_code == COND_EXPR)
       ? !is_gimple_condexpr (rhs1) : !is_gimple_val (rhs1))
      || !is_gimple_val (rhs2)
      || !is_gimple_val (rhs3))
    {
      error ("invalid operands in ternary operation");
      return true;

to the same with the rhs_code == VEC_COND_EXPR case removed.

You'll likely figure the vectorizer still creates some VEC_COND_EXPRs
with embedded comparisons.

Thanks,
Richard.


> Martin

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-15  7:14                                                           ` Richard Biener
@ 2020-06-15 11:19                                                             ` Martin Liška
  2020-06-15 11:59                                                               ` Richard Biener
  0 siblings, 1 reply; 65+ messages in thread
From: Martin Liška @ 2020-06-15 11:19 UTC (permalink / raw)
  To: Richard Biener
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

[-- Attachment #1: Type: text/plain, Size: 4665 bytes --]

On 6/15/20 9:14 AM, Richard Biener wrote:
> On Fri, Jun 12, 2020 at 3:24 PM Martin Liška <mliska@suse.cz> wrote:
>>
>> On 6/12/20 11:43 AM, Richard Biener wrote:
>>> So ... how far are you with enforcing a split VEC_COND_EXPR?
>>> Thus can we avoid the above completely (even as intermediate
>>> state)?
>>
>> Apparently, I'm quite close. Using the attached patch I see only 2 testsuite
>> failures:
>>
>> FAIL: gcc.dg/tree-ssa/pr68714.c scan-tree-dump-times reassoc1 " <= " 1
>> FAIL: gcc.target/i386/pr78102.c scan-assembler-times pcmpeqq 3
>>
>> The first one is about teaching reassoc about the SSA_NAMEs in VEC_COND_EXPR. I haven't
>> analyze the second failure.
>>
>> I'm also not sure about the gimlification change, I see a superfluous assignments:
>>     vec_cond_cmp.5 = _1 == _2;
>>     vec_cond_cmp.6 = vec_cond_cmp.5;
>>     vec_cond_cmp.7 = vec_cond_cmp.6;
>>     _3 = VEC_COND_EXPR <vec_cond_cmp.7, { -1, -1, -1, -1, -1, -1, -1, -1 }, { 0, 0, 0, 0, 0, 0, 0, 0 }>;
>> ?
>>
>> So with the suggested patch, the EH should be gone as you suggested. Right?
> 
> Right, it should be on the comparison already from the start.
> 
> @@ -14221,9 +14221,13 @@ gimplify_expr (tree *expr_p, gimple_seq
> *pre_p, gimple_seq *post_p,
>          case VEC_COND_EXPR:
>            {
>              enum gimplify_status r0, r1, r2;
> -
>              r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
>                                  post_p, is_gimple_condexpr, fb_rvalue);
> +           tree xop0 = TREE_OPERAND (*expr_p, 0);
> +           tmp = create_tmp_var_raw (TREE_TYPE (xop0), "vec_cond_cmp");
> +           gimple_add_tmp_var (tmp);
> +           gimplify_assign (tmp, xop0, pre_p);
> +           TREE_OPERAND (*expr_p, 0) = tmp;
>              r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
>                                  post_p, is_gimple_val, fb_rvalue);
> 
> all of VEC_COND_EXPR can now be a simple goto expr_3;

Works for me, thanks!

> 
> diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
> index 494c9e9c20b..090fb52a2f1 100644
> --- a/gcc/tree-ssa-forwprop.c
> +++ b/gcc/tree-ssa-forwprop.c
> @@ -3136,6 +3136,10 @@ pass_forwprop::execute (function *fun)
>                      if (code == COND_EXPR
>                          || code == VEC_COND_EXPR)
>                        {
> +                       /* Do not propagate into VEC_COND_EXPRs.  */
> +                       if (code == VEC_COND_EXPR)
> +                         break;
> +
> 
> err - remove the || code == VEC_COND_EXPR instead?

Yep.

> 
> @@ -2221,24 +2226,12 @@ expand_vector_operations (void)
>   {
>     gimple_stmt_iterator gsi;
>     basic_block bb;
> -  bool cfg_changed = false;
> 
>     FOR_EACH_BB_FN (bb, cfun)
> -    {
> -      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> -       {
> -         expand_vector_operations_1 (&gsi);
> -         /* ???  If we do not cleanup EH then we will ICE in
> -            verification.  But in reality we have created wrong-code
> -            as we did not properly transition EH info and edges to
> -            the piecewise computations.  */
> -         if (maybe_clean_eh_stmt (gsi_stmt (gsi))
> -             && gimple_purge_dead_eh_edges (bb))
> -           cfg_changed = true;
> -       }
> -    }
> 
> I'm not sure about this.  Consider the C++ testcase where
> the ?: is replaced by a division.  If veclower needs to replace
> that with four scalrar division statements then the above
> still applies - veclower does not correctly duplicate EH info
> and EH edges to the individual divisions (and we do not know
> which component might trap).
> 
> So please leave the above in.  You can try if using integer
> division makes it break and add such a testcase if there's
> no coverage for this in the testsuite.

I'm leaving that above. Can you please explain how can a division test-case
be created?

> 
> What's missing from the patch is adjusting
> verify_gimple_assign_ternary from
> 
>    if (((rhs_code == VEC_COND_EXPR || rhs_code == COND_EXPR)
>         ? !is_gimple_condexpr (rhs1) : !is_gimple_val (rhs1))
>        || !is_gimple_val (rhs2)
>        || !is_gimple_val (rhs3))
>      {
>        error ("invalid operands in ternary operation");
>        return true;
> 
> to the same with the rhs_code == VEC_COND_EXPR case removed.

Hmm. I'm not sure I've got this comment. Why do we want to change it
and is it done wright in the patch?

> 
> You'll likely figure the vectorizer still creates some VEC_COND_EXPRs
> with embedded comparisons.

I've fixed 2 failing test-cases I mentioned in the previous email.

Martin

> 
> Thanks,
> Richard.
> 
> 
>> Martin


[-- Attachment #2: 0001-Lower-VEC_COND_EXPR-into-internal-functions.patch --]
[-- Type: text/x-patch, Size: 37074 bytes --]

From 32a2fb841cfd15d17527e44c4dc119e25d643cf1 Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Mon, 9 Mar 2020 13:23:03 +0100
Subject: [PATCH] Lower VEC_COND_EXPR into internal functions.

gcc/ChangeLog:

	* Makefile.in: Add new file.
	* expr.c (expand_expr_real_2): Add gcc_unreachable as we should
	not meet this condition.
	(do_store_flag):
	* gimplify.c (gimplify_expr): Gimplify first argument of
	VEC_COND_EXPR to be a SSA name.
	* internal-fn.c (vec_cond_mask_direct): New.
	(vec_cond_direct): Likewise.
	(vec_condu_direct): Likewise.
	(vec_condeq_direct): Likewise.
	(expand_vect_cond_optab_fn):  New.
	(expand_vec_cond_optab_fn): Likewise.
	(expand_vec_condu_optab_fn): Likewise.
	(expand_vec_condeq_optab_fn): Likewise.
	(expand_vect_cond_mask_optab_fn): Likewise.
	(expand_vec_cond_mask_optab_fn): Likewise.
	(direct_vec_cond_mask_optab_supported_p): Likewise.
	(direct_vec_cond_optab_supported_p): Likewise.
	(direct_vec_condu_optab_supported_p): Likewise.
	(direct_vec_condeq_optab_supported_p): Likewise.
	* internal-fn.def (VCOND): New OPTAB.
	(VCONDU): Likewise.
	(VCONDEQ): Likewise.
	(VCOND_MASK): Likewise.
	* optabs.c (get_rtx_code): Make it global.
	(expand_vec_cond_mask_expr): Removed.
	(expand_vec_cond_expr): Removed.
	* optabs.h (expand_vec_cond_expr): Likewise.
	(vector_compare_rtx): Make it global.
	* passes.def: Add new pass_gimple_isel pass.
	* tree-cfg.c (verify_gimple_assign_ternary): Add check
	for VEC_COND_EXPR about first argument.
	* tree-pass.h (make_pass_gimple_isel): New.
	* tree-ssa-forwprop.c (pass_forwprop::execute): Prevent
	propagation of the first argument of a VEC_COND_EXPR.
	* tree-ssa-reassoc.c (ovce_extract_ops): Support SSA_NAME as
	first argument of a VEC_COND_EXPR.
	(optimize_vec_cond_expr): Likewise.
	* tree-vect-generic.c (expand_vector_divmod): Make SSA_NAME
	for a first argument of created VEC_COND_EXPR.
	(expand_vector_condition): Fix coding style.
	* tree-vect-stmts.c (vectorizable_condition): Gimplify
	first argument.
	* tree-vect-isel.c: New file.

gcc/testsuite/ChangeLog:

	* g++.dg/vect/vec-cond-expr-eh.C: New test.
---
 gcc/Makefile.in                              |   2 +
 gcc/expr.c                                   |  25 +-
 gcc/gimplify.c                               |  15 +-
 gcc/internal-fn.c                            |  89 +++++++
 gcc/internal-fn.def                          |   5 +
 gcc/optabs.c                                 | 124 +---------
 gcc/optabs.h                                 |   7 +-
 gcc/passes.def                               |   1 +
 gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C |  17 ++
 gcc/tree-cfg.c                               |   9 +-
 gcc/tree-pass.h                              |   1 +
 gcc/tree-ssa-forwprop.c                      |   3 +-
 gcc/tree-ssa-reassoc.c                       |  69 ++++--
 gcc/tree-vect-generic.c                      |  45 ++--
 gcc/tree-vect-isel.c                         | 244 +++++++++++++++++++
 gcc/tree-vect-stmts.c                        |   4 +-
 16 files changed, 449 insertions(+), 211 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C
 create mode 100644 gcc/tree-vect-isel.c

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 4f70c189b9d..4cbb9d23606 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1631,6 +1631,7 @@ OBJS = \
 	tree-streamer-out.o \
 	tree-tailcall.o \
 	tree-vect-generic.o \
+	tree-vect-isel.o \
 	tree-vect-patterns.o \
 	tree-vect-data-refs.o \
 	tree-vect-stmts.o \
@@ -2600,6 +2601,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
   $(srcdir)/dwarf2cfi.c \
   $(srcdir)/dwarf2out.c \
   $(srcdir)/tree-vect-generic.c \
+  $(srcdir)/tree-vect-isel.c \
   $(srcdir)/dojump.c $(srcdir)/emit-rtl.h \
   $(srcdir)/emit-rtl.c $(srcdir)/except.h $(srcdir)/explow.c $(srcdir)/expr.c \
   $(srcdir)/expr.h \
diff --git a/gcc/expr.c b/gcc/expr.c
index ca6b1c1291e..3c68b0d754c 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9316,17 +9316,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
       if (temp != 0)
 	return temp;
 
-      /* For vector MIN <x, y>, expand it a VEC_COND_EXPR <x <= y, x, y>
-	 and similarly for MAX <x, y>.  */
       if (VECTOR_TYPE_P (type))
-	{
-	  tree t0 = make_tree (type, op0);
-	  tree t1 = make_tree (type, op1);
-	  tree comparison = build2 (code == MIN_EXPR ? LE_EXPR : GE_EXPR,
-				    type, t0, t1);
-	  return expand_vec_cond_expr (type, comparison, t0, t1,
-				       original_target);
-	}
+	gcc_unreachable ();
 
       /* At this point, a MEM target is no longer useful; we will get better
 	 code without it.  */
@@ -9915,10 +9906,6 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 	return temp;
       }
 
-    case VEC_COND_EXPR:
-      target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
-      return target;
-
     case VEC_DUPLICATE_EXPR:
       op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier);
       target = expand_vector_broadcast (mode, op0);
@@ -12249,8 +12236,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
   STRIP_NOPS (arg1);
 
   /* For vector typed comparisons emit code to generate the desired
-     all-ones or all-zeros mask.  Conveniently use the VEC_COND_EXPR
-     expander for this.  */
+     all-ones or all-zeros mask.  */
   if (TREE_CODE (ops->type) == VECTOR_TYPE)
     {
       tree ifexp = build2 (ops->code, ops->type, arg0, arg1);
@@ -12258,12 +12244,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
 	  && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type, ops->code))
 	return expand_vec_cmp_expr (ops->type, ifexp, target);
       else
-	{
-	  tree if_true = constant_boolean_node (true, ops->type);
-	  tree if_false = constant_boolean_node (false, ops->type);
-	  return expand_vec_cond_expr (ops->type, ifexp, if_true,
-				       if_false, target);
-	}
+	gcc_unreachable ();
     }
 
   /* Optimize (x % C1) == C2 or (x % C1) != C2 if it is beneficial
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 416fb609b94..339e8bc5089 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -14219,20 +14219,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	  }
 
 	case VEC_COND_EXPR:
-	  {
-	    enum gimplify_status r0, r1, r2;
-
-	    r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
-				post_p, is_gimple_condexpr, fb_rvalue);
-	    r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
-				post_p, is_gimple_val, fb_rvalue);
-	    r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p,
-				post_p, is_gimple_val, fb_rvalue);
-
-	    ret = MIN (MIN (r0, r1), r2);
-	    recalculate_side_effects (*expr_p);
-	  }
-	  break;
+	  goto expr_3;
 
 	case VEC_PERM_EXPR:
 	  /* Classified as tcc_expression.  */
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 5e9aa60721e..644f234e087 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-ssa.h"
 #include "tree-phinodes.h"
 #include "ssa-iterators.h"
+#include "explow.h"
 
 /* The names of each internal function, indexed by function number.  */
 const char *const internal_fn_name_array[] = {
@@ -107,6 +108,10 @@ init_internal_fns ()
 #define mask_store_direct { 3, 2, false }
 #define store_lanes_direct { 0, 0, false }
 #define mask_store_lanes_direct { 0, 0, false }
+#define vec_cond_mask_direct { 0, 0, false }
+#define vec_cond_direct { 0, 0, false }
+#define vec_condu_direct { 0, 0, false }
+#define vec_condeq_direct { 0, 0, false }
 #define scatter_store_direct { 3, 1, false }
 #define unary_direct { 0, 0, true }
 #define binary_direct { 0, 0, true }
@@ -2548,6 +2553,86 @@ expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 
 #define expand_mask_store_lanes_optab_fn expand_mask_store_optab_fn
 
+/* Expand VCOND, VCONDU and VCONDEQ optab internal functions.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+  class expand_operand ops[6];
+  insn_code icode;
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0a = gimple_call_arg (stmt, 0);
+  tree op0b = gimple_call_arg (stmt, 1);
+  tree op1 = gimple_call_arg (stmt, 2);
+  tree op2 = gimple_call_arg (stmt, 3);
+  enum tree_code tcode = (tree_code) int_cst_value (gimple_call_arg (stmt, 4));
+
+  tree vec_cond_type = TREE_TYPE (lhs);
+  tree op_mode = TREE_TYPE (op0a);
+  bool unsignedp = TYPE_UNSIGNED (op_mode);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode cmp_op_mode = TYPE_MODE (op_mode);
+
+  icode = convert_optab_handler (optab, mode, cmp_op_mode);
+  rtx comparison
+    = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp, icode, 4);
+  rtx rtx_op1 = expand_normal (op1);
+  rtx rtx_op2 = expand_normal (op2);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_fixed_operand (&ops[3], comparison);
+  create_fixed_operand (&ops[4], XEXP (comparison, 0));
+  create_fixed_operand (&ops[5], XEXP (comparison, 1));
+  expand_insn (icode, 6, ops);
+}
+
+#define expand_vec_cond_optab_fn expand_vect_cond_optab_fn
+#define expand_vec_condu_optab_fn expand_vect_cond_optab_fn
+#define expand_vec_condeq_optab_fn expand_vect_cond_optab_fn
+
+/* Expand VCOND_MASK optab internal function.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+  class expand_operand ops[4];
+
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0 = gimple_call_arg (stmt, 0);
+  tree op1 = gimple_call_arg (stmt, 1);
+  tree op2 = gimple_call_arg (stmt, 2);
+  tree vec_cond_type = TREE_TYPE (lhs);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
+  enum insn_code icode = convert_optab_handler (optab, mode, mask_mode);
+  rtx mask, rtx_op1, rtx_op2;
+
+  gcc_assert (icode != CODE_FOR_nothing);
+
+  mask = expand_normal (op0);
+  rtx_op1 = expand_normal (op1);
+  rtx_op2 = expand_normal (op2);
+
+  mask = force_reg (mask_mode, mask);
+  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_input_operand (&ops[3], mask, mask_mode);
+  expand_insn (icode, 4, ops);
+}
+
+#define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn
+
 static void
 expand_ABNORMAL_DISPATCHER (internal_fn, gcall *)
 {
@@ -3131,6 +3216,10 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 #define direct_mask_store_optab_supported_p direct_optab_supported_p
 #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_cond_mask_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_cond_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_condu_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_condeq_optab_supported_p multi_vector_optab_supported_p
 #define direct_scatter_store_optab_supported_p convert_optab_supported_p
 #define direct_while_optab_supported_p convert_optab_supported_p
 #define direct_fold_extract_optab_supported_p direct_optab_supported_p
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 1d190d492ff..0c6fc371190 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -136,6 +136,11 @@ DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
 DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0,
 		       vec_mask_store_lanes, mask_store_lanes)
 
+DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
+DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
+DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
+DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
+
 DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
 DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW,
 		       check_raw_ptrs, check_ptrs)
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 6d0b76c13ba..184827fdf4e 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -5442,7 +5442,7 @@ get_rtx_code (enum tree_code tcode, bool unsignedp)
    first comparison operand for insn ICODE.  Do not generate the
    compare instruction itself.  */
 
-static rtx
+rtx
 vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
 		    tree t_op0, tree t_op1, bool unsignedp,
 		    enum insn_code icode, unsigned int opno)
@@ -5809,128 +5809,6 @@ expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
   return tmp;
 }
 
-/* Generate insns for a VEC_COND_EXPR with mask, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_mask_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-			   rtx target)
-{
-  class expand_operand ops[4];
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
-  enum insn_code icode = get_vcond_mask_icode (mode, mask_mode);
-  rtx mask, rtx_op1, rtx_op2;
-
-  if (icode == CODE_FOR_nothing)
-    return 0;
-
-  mask = expand_normal (op0);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  mask = force_reg (mask_mode, mask);
-  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_input_operand (&ops[3], mask, mask_mode);
-  expand_insn (icode, 4, ops);
-
-  return ops[0].value;
-}
-
-/* Generate insns for a VEC_COND_EXPR, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-		      rtx target)
-{
-  class expand_operand ops[6];
-  enum insn_code icode;
-  rtx comparison, rtx_op1, rtx_op2;
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode cmp_op_mode;
-  bool unsignedp;
-  tree op0a, op0b;
-  enum tree_code tcode;
-
-  if (COMPARISON_CLASS_P (op0))
-    {
-      op0a = TREE_OPERAND (op0, 0);
-      op0b = TREE_OPERAND (op0, 1);
-      tcode = TREE_CODE (op0);
-    }
-  else
-    {
-      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
-      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
-	  != CODE_FOR_nothing)
-	return expand_vec_cond_mask_expr (vec_cond_type, op0, op1,
-					  op2, target);
-      /* Fake op0 < 0.  */
-      else
-	{
-	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
-		      == MODE_VECTOR_INT);
-	  op0a = op0;
-	  op0b = build_zero_cst (TREE_TYPE (op0));
-	  tcode = LT_EXPR;
-	}
-    }
-  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
-  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
-
-
-  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
-	      && known_eq (GET_MODE_NUNITS (mode),
-			   GET_MODE_NUNITS (cmp_op_mode)));
-
-  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
-  if (icode == CODE_FOR_nothing)
-    {
-      if (tcode == LT_EXPR
-	  && op0a == op0
-	  && TREE_CODE (op0) == VECTOR_CST)
-	{
-	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
-	     into a constant when only get_vcond_eq_icode is supported.
-	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
-	  unsigned HOST_WIDE_INT nelts;
-	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
-	    {
-	      if (VECTOR_CST_STEPPED_P (op0))
-		return 0;
-	      nelts = vector_cst_encoded_nelts (op0);
-	    }
-	  for (unsigned int i = 0; i < nelts; ++i)
-	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
-	      return 0;
-	  tcode = NE_EXPR;
-	}
-      if (tcode == EQ_EXPR || tcode == NE_EXPR)
-	icode = get_vcond_eq_icode (mode, cmp_op_mode);
-      if (icode == CODE_FOR_nothing)
-	return 0;
-    }
-
-  comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp,
-				   icode, 4);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_fixed_operand (&ops[3], comparison);
-  create_fixed_operand (&ops[4], XEXP (comparison, 0));
-  create_fixed_operand (&ops[5], XEXP (comparison, 1));
-  expand_insn (icode, 6, ops);
-  return ops[0].value;
-}
-
 /* Generate VEC_SERIES_EXPR <OP0, OP1>, returning a value of mode VMODE.
    Use TARGET for the result if nonnull and convenient.  */
 
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 5bd19503a0a..7c2ec257cb0 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -321,9 +321,6 @@ extern rtx expand_vec_perm_const (machine_mode, rtx, rtx,
 /* Generate code for vector comparison.  */
 extern rtx expand_vec_cmp_expr (tree, tree, rtx);
 
-/* Generate code for VEC_COND_EXPR.  */
-extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
-
 /* Generate code for VEC_SERIES_EXPR.  */
 extern rtx expand_vec_series_expr (machine_mode, rtx, rtx, rtx);
 
@@ -364,5 +361,9 @@ extern void expand_jump_insn (enum insn_code icode, unsigned int nops,
 			      class expand_operand *ops);
 
 extern enum rtx_code get_rtx_code (enum tree_code tcode, bool unsignedp);
+extern rtx vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
+			       tree t_op0, tree t_op1, bool unsignedp,
+			       enum insn_code icode, unsigned int opno);
+
 
 #endif /* GCC_OPTABS_H */
diff --git a/gcc/passes.def b/gcc/passes.def
index 56322025226..2b1e09fdda3 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -399,6 +399,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_cleanup_eh);
   NEXT_PASS (pass_lower_resx);
   NEXT_PASS (pass_nrv);
+  NEXT_PASS (pass_gimple_isel);
   NEXT_PASS (pass_cleanup_cfg_post_optimizing);
   NEXT_PASS (pass_warn_function_noreturn);
   NEXT_PASS (pass_gen_hsail);
diff --git a/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C b/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C
new file mode 100644
index 00000000000..00fe2422444
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fnon-call-exceptions" } */
+
+typedef double v2df __attribute__((vector_size(16)));
+
+v2df foo (v2df a, v2df b, v2df c, v2df d)
+{
+  try
+  {
+    v2df res = a < b ? c : d;
+    return res;
+    }
+    catch (...)
+    {
+    return (v2df){};
+    }
+}
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index d06a479e570..5a23b910b61 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4155,7 +4155,7 @@ verify_gimple_assign_ternary (gassign *stmt)
       return true;
     }
 
-  if (((rhs_code == VEC_COND_EXPR || rhs_code == COND_EXPR)
+  if ((rhs_code == COND_EXPR
        ? !is_gimple_condexpr (rhs1) : !is_gimple_val (rhs1))
       || !is_gimple_val (rhs2)
       || !is_gimple_val (rhs3))
@@ -4199,6 +4199,13 @@ verify_gimple_assign_ternary (gassign *stmt)
 	  debug_generic_expr (rhs1_type);
 	  return true;
 	}
+      else if (TREE_CODE_CLASS (TREE_CODE (rhs1)) == tcc_comparison)
+	{
+	  error ("the first argument of %<VEC_COND_EXPR%> cannot be "
+	 "a %<GENERIC%> tree comparison expression");
+	  debug_generic_expr (rhs1);
+	  return true;
+	}
       /* Fallthrough.  */
     case COND_EXPR:
       if (!is_gimple_val (rhs1)
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 396428f167f..215c8f2a337 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -627,6 +627,7 @@ extern gimple_opt_pass *make_pass_local_fn_summary (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lower_vaarg (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_gimple_isel (gcc::context *ctxt);
 
 /* Current optimization pass.  */
 extern opt_pass *current_pass;
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 494c9e9c20b..0ab8267fdf9 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -3133,8 +3133,7 @@ pass_forwprop::execute (function *fun)
 		    tree rhs1 = gimple_assign_rhs1 (stmt);
 		    enum tree_code code = gimple_assign_rhs_code (stmt);
 
-		    if (code == COND_EXPR
-			|| code == VEC_COND_EXPR)
+		    if (code == COND_EXPR)
 		      {
 			/* In this case the entire COND_EXPR is in rhs1. */
 			if (forward_propagate_into_cond (&gsi))
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index af8faf2e6ea..8d80a2c768c 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -3831,7 +3831,8 @@ optimize_range_tests (enum tree_code opcode,
    to type of comparison.  */
 
 static tree_code
-ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
+ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
+		  tree *lhs, tree *rhs, gassign **vcond)
 {
   if (TREE_CODE (var) != SSA_NAME)
     return ERROR_MARK;
@@ -3839,6 +3840,8 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
   if (stmt == NULL)
     return ERROR_MARK;
+  if (*vcond)
+    *vcond = stmt;
 
   /* ??? If we start creating more COND_EXPR, we could perform
      this same optimization with them.	For now, simplify.  */
@@ -3847,9 +3850,20 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
 
   tree cond = gimple_assign_rhs1 (stmt);
   tree_code cmp = TREE_CODE (cond);
-  if (TREE_CODE_CLASS (cmp) != tcc_comparison)
+  if (cmp != SSA_NAME)
     return ERROR_MARK;
 
+  gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
+  if (stmt != NULL
+      && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
+    return ERROR_MARK;
+
+  cmp = gimple_assign_rhs_code (assign);
+  if (lhs)
+    *lhs = gimple_assign_rhs1 (assign);
+  if (rhs)
+    *rhs = gimple_assign_rhs2 (assign);
+
   /* ??? For now, allow only canonical true and false result vectors.
      We could expand this to other constants should the need arise,
      but at the moment we don't create them.  */
@@ -3870,7 +3884,7 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
 
   /* Success!  */
   if (rets)
-    *rets = stmt;
+    *rets = assign;
   if (reti)
     *reti = inv;
   if (type)
@@ -3894,10 +3908,11 @@ optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
     {
       tree elt0 = (*ops)[i]->op;
 
-      gassign *stmt0;
+      gassign *stmt0, *vcond0;
       bool invert;
-      tree type;
-      tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type);
+      tree type, lhs0, rhs0;
+      tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
+					 &rhs0, &vcond0);
       if (cmp0 == ERROR_MARK)
 	continue;
 
@@ -3905,26 +3920,20 @@ optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
 	{
 	  tree &elt1 = (*ops)[j]->op;
 
-	  gassign *stmt1;
-	  tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL);
+	  gassign *stmt1, *vcond1;
+	  tree lhs1, rhs1;
+	  tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
+					     &rhs1, &vcond1);
 	  if (cmp1 == ERROR_MARK)
 	    continue;
 
-	  tree cond0 = gimple_assign_rhs1 (stmt0);
-	  tree x0 = TREE_OPERAND (cond0, 0);
-	  tree y0 = TREE_OPERAND (cond0, 1);
-
-	  tree cond1 = gimple_assign_rhs1 (stmt1);
-	  tree x1 = TREE_OPERAND (cond1, 0);
-	  tree y1 = TREE_OPERAND (cond1, 1);
-
 	  tree comb;
 	  if (opcode == BIT_AND_EXPR)
-	    comb = maybe_fold_and_comparisons (type, cmp0, x0, y0, cmp1, x1,
-					       y1);
+	    comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
+					       cmp1, lhs1, rhs1);
 	  else if (opcode == BIT_IOR_EXPR)
-	    comb = maybe_fold_or_comparisons (type, cmp0, x0, y0, cmp1, x1,
-					      y1);
+	    comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
+					      cmp1, lhs1, rhs1);
 	  else
 	    gcc_unreachable ();
 	  if (comb == NULL)
@@ -3934,19 +3943,27 @@ optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	    {
 	      fprintf (dump_file, "Transforming ");
-	      print_generic_expr (dump_file, cond0);
+	      print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
 	      fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
-	      print_generic_expr (dump_file, cond1);
+	      print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
 	      fprintf (dump_file, " into ");
 	      print_generic_expr (dump_file, comb);
 	      fputc ('\n', dump_file);
 	    }
 
-	  gimple_assign_set_rhs1 (stmt0, comb);
+	  gimple_seq seq;
+	  tree exp = force_gimple_operand (comb, &seq, true, NULL_TREE);
+	  if (seq)
+	    {
+	      gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
+	      gsi_insert_before (&gsi, seq, GSI_SAME_STMT);
+	    }
 	  if (invert)
-	    std::swap (*gimple_assign_rhs2_ptr (stmt0),
-		       *gimple_assign_rhs3_ptr (stmt0));
-	  update_stmt (stmt0);
+	    std::swap (*gimple_assign_rhs2_ptr (vcond0),
+		       *gimple_assign_rhs3_ptr (vcond0));
+
+	  gimple_assign_set_rhs1 (vcond0, exp);
+	  update_stmt (vcond0);
 
 	  elt1 = error_mark_node;
 	  any_changes = true;
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index a7fe83da0e3..fb955bbf3d2 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -694,12 +694,14 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 	  if (addend == NULL_TREE
 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
 	    {
-	      tree zero, cst, cond, mask_type;
-	      gimple *stmt;
+	      tree zero, cst, mask_type, mask;
+	      gimple *stmt, *cond;
 
 	      mask_type = truth_type_for (type);
 	      zero = build_zero_cst (type);
-	      cond = build2 (LT_EXPR, mask_type, op0, zero);
+	      mask = make_ssa_name (mask_type);
+	      cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
+	      gsi_insert_before (gsi, cond, GSI_SAME_STMT);
 	      tree_vector_builder vec (type, nunits, 1);
 	      for (i = 0; i < nunits; i++)
 		vec.quick_push (build_int_cst (TREE_TYPE (type),
@@ -707,8 +709,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 						<< shifts[i]) - 1));
 	      cst = vec.build ();
 	      addend = make_ssa_name (type);
-	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
-					  cst, zero);
+	      stmt
+		= gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
 	    }
 	}
@@ -950,21 +952,28 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
   tree index = bitsize_int (0);
   tree comp_width = width;
   tree comp_index = index;
-  int i;
   location_t loc = gimple_location (gsi_stmt (*gsi));
+  tree_code code = TREE_CODE (a);
 
-  if (!is_gimple_val (a))
+  if (code == SSA_NAME)
     {
-      gcc_assert (COMPARISON_CLASS_P (a));
-      a_is_comparison = true;
-      a1 = TREE_OPERAND (a, 0);
-      a2 = TREE_OPERAND (a, 1);
-      comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
-      comp_width = vector_element_bits_tree (TREE_TYPE (a1));
+      gimple *assign = SSA_NAME_DEF_STMT (a);
+      if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
+	{
+	  a_is_comparison = true;
+	  a1 = gimple_assign_rhs1 (assign);
+	  a2 = gimple_assign_rhs2 (assign);
+	  code = gimple_assign_rhs_code (assign);
+	  comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
+	  comp_width = vector_element_bits_tree (TREE_TYPE (a1));
+	}
     }
 
-  if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
-    return;
+  if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code))
+    {
+      gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
+      return;
+    }
 
   /* Handle vector boolean types with bitmasks.  If there is a comparison
      and we can expand the comparison into the vector boolean bitmask,
@@ -987,7 +996,7 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
 	  : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
     {
       if (a_is_comparison)
-	a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2);
+	a = gimplify_build2 (gsi, code, type, a1, a2);
       a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
       a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
       a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
@@ -1018,7 +1027,7 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
 
   int nunits = nunits_for_known_piecewise_op (type);
   vec_alloc (v, nunits);
-  for (i = 0; i < nunits; i++)
+  for (int i = 0; i < nunits; i++)
     {
       tree aa, result;
       tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
@@ -1029,7 +1038,7 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
 				       comp_width, comp_index);
 	  tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
 				       comp_width, comp_index);
-	  aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
+	  aa = fold_build2 (code, cond_type, aa1, aa2);
 	}
       else if (a_is_scalar_bitmask)
 	{
diff --git a/gcc/tree-vect-isel.c b/gcc/tree-vect-isel.c
new file mode 100644
index 00000000000..97f92080503
--- /dev/null
+++ b/gcc/tree-vect-isel.c
@@ -0,0 +1,244 @@
+/* Schedule GIMPLE vector statements.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "expmed.h"
+#include "optabs-tree.h"
+#include "tree-eh.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "gimplify.h"
+#include "tree-cfg.h"
+
+/* Expand all VEC_COND_EXPR gimple assignments into calls to internal
+   function based on type of selected expansion.  */
+
+static gimple *
+gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
+			     hash_map<tree, unsigned int> *vec_cond_ssa_name_uses)
+{
+  tree lhs, op0a = NULL_TREE, op0b = NULL_TREE;
+  enum tree_code code;
+  enum tree_code tcode;
+  machine_mode cmp_op_mode;
+  bool unsignedp;
+  enum insn_code icode;
+  imm_use_iterator imm_iter;
+
+  /* Only consider code == GIMPLE_ASSIGN.  */
+  gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi));
+  if (!stmt)
+    return NULL;
+
+  code = gimple_assign_rhs_code (stmt);
+  if (code != VEC_COND_EXPR)
+    return NULL;
+
+  tree op0 = gimple_assign_rhs1 (stmt);
+  tree op1 = gimple_assign_rhs2 (stmt);
+  tree op2 = gimple_assign_rhs3 (stmt);
+  lhs = gimple_assign_lhs (stmt);
+  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+
+  gcc_assert (!COMPARISON_CLASS_P (op0));
+  if (TREE_CODE (op0) == SSA_NAME)
+    {
+      unsigned int used_vec_cond_exprs = 0;
+      unsigned int *slot = vec_cond_ssa_name_uses->get (op0);
+      if (slot)
+	used_vec_cond_exprs = *slot;
+      else
+	{
+	  gimple *use_stmt;
+	  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, op0)
+	    {
+	      gassign *assign = dyn_cast<gassign *> (use_stmt);
+	      if (assign != NULL
+		  && gimple_assign_rhs_code (assign) == VEC_COND_EXPR
+		  && gimple_assign_rhs1 (assign) == op0)
+		used_vec_cond_exprs++;
+	    }
+	  vec_cond_ssa_name_uses->put (op0, used_vec_cond_exprs);
+	}
+
+      gassign *def_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (op0));
+      if (def_stmt)
+	{
+	  tcode = gimple_assign_rhs_code (def_stmt);
+	  op0a = gimple_assign_rhs1 (def_stmt);
+	  op0b = gimple_assign_rhs2 (def_stmt);
+
+	  tree op0a_type = TREE_TYPE (op0a);
+	  if (used_vec_cond_exprs >= 2
+	      && (get_vcond_mask_icode (mode, TYPE_MODE (op0a_type))
+		  != CODE_FOR_nothing)
+	      && expand_vec_cmp_expr_p (op0a_type, TREE_TYPE (lhs), tcode))
+	    {
+	      /* Keep the SSA name and use vcond_mask.  */
+	      tcode = TREE_CODE (op0);
+	    }
+	}
+      else
+	tcode = TREE_CODE (op0);
+    }
+  else
+    tcode = TREE_CODE (op0);
+
+  if (TREE_CODE_CLASS (tcode) != tcc_comparison)
+    {
+      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
+      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
+	  != CODE_FOR_nothing)
+	return gimple_build_call_internal (IFN_VCOND_MASK, 3, op0, op1, op2);
+      /* Fake op0 < 0.  */
+      else
+	{
+	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
+		      == MODE_VECTOR_INT);
+	  op0a = op0;
+	  op0b = build_zero_cst (TREE_TYPE (op0));
+	  tcode = LT_EXPR;
+	}
+    }
+  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
+  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
+
+
+  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
+	      && known_eq (GET_MODE_NUNITS (mode),
+			   GET_MODE_NUNITS (cmp_op_mode)));
+
+  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
+  if (icode == CODE_FOR_nothing)
+    {
+      if (tcode == LT_EXPR
+	  && op0a == op0
+	  && TREE_CODE (op0) == VECTOR_CST)
+	{
+	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
+	     into a constant when only get_vcond_eq_icode is supported.
+	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
+	  unsigned HOST_WIDE_INT nelts;
+	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
+	    {
+	      if (VECTOR_CST_STEPPED_P (op0))
+		gcc_unreachable ();
+	      nelts = vector_cst_encoded_nelts (op0);
+	    }
+	  for (unsigned int i = 0; i < nelts; ++i)
+	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
+	      gcc_unreachable ();
+	  tcode = NE_EXPR;
+	}
+      if (tcode == EQ_EXPR || tcode == NE_EXPR)
+	{
+	  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+	  return gimple_build_call_internal (IFN_VCONDEQ, 5, op0a, op0b, op1,
+					     op2, tcode_tree);
+	}
+    }
+
+  gcc_assert (icode != CODE_FOR_nothing);
+  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+  return gimple_build_call_internal (unsignedp ? IFN_VCONDU : IFN_VCOND,
+				     5, op0a, op0b, op1, op2, tcode_tree);
+}
+
+
+
+/* Iterate all gimple statements and try to expand
+   VEC_COND_EXPR assignments.  */
+
+static unsigned int
+gimple_expand_vec_cond_exprs (void)
+{
+  gimple_stmt_iterator gsi;
+  basic_block bb;
+  bool cfg_changed = false;
+  hash_map<tree, unsigned int> vec_cond_ssa_name_uses;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	{
+	  gimple *g = gimple_expand_vec_cond_expr (&gsi,
+						   &vec_cond_ssa_name_uses);
+	  if (g != NULL)
+	    {
+	      tree lhs = gimple_assign_lhs (gsi_stmt (gsi));
+	      gimple_set_lhs (g, lhs);
+	      gsi_replace (&gsi, g, false);
+	    }
+	}
+    }
+
+  return cfg_changed ? TODO_cleanup_cfg : 0;
+}
+
+namespace {
+
+const pass_data pass_data_gimple_isel =
+{
+  GIMPLE_PASS, /* type */
+  "isel", /* name */
+  OPTGROUP_VEC, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_cfg, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_gimple_isel : public gimple_opt_pass
+{
+public:
+  pass_gimple_isel (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_gimple_isel, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return true;
+    }
+
+  virtual unsigned int execute (function *)
+    {
+      return gimple_expand_vec_cond_exprs ();
+    }
+
+}; // class pass_gimple_isel
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_gimple_isel (gcc::context *ctxt)
+{
+  return new pass_gimple_isel (ctxt);
+}
+
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index cf2d979fea1..710b17a7c5c 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -9937,8 +9937,8 @@ vectorizable_condition (vec_info *vinfo,
 	{
 	  vec_cond_rhs = vec_oprnds1[i];
 	  if (bitop1 == NOP_EXPR)
-	    vec_compare = build2 (cond_code, vec_cmp_type,
-				  vec_cond_lhs, vec_cond_rhs);
+	    vec_compare = gimplify_build2 (gsi, cond_code, vec_cmp_type,
+					   vec_cond_lhs, vec_cond_rhs);
 	  else
 	    {
 	      new_temp = make_ssa_name (vec_cmp_type);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-15 11:19                                                             ` Martin Liška
@ 2020-06-15 11:59                                                               ` Richard Biener
  2020-06-15 12:20                                                                 ` Martin Liška
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Biener @ 2020-06-15 11:59 UTC (permalink / raw)
  To: Martin Liška
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

On Mon, Jun 15, 2020 at 1:19 PM Martin Liška <mliska@suse.cz> wrote:
>
> On 6/15/20 9:14 AM, Richard Biener wrote:
> > On Fri, Jun 12, 2020 at 3:24 PM Martin Liška <mliska@suse.cz> wrote:
> >>
> >> On 6/12/20 11:43 AM, Richard Biener wrote:
> >>> So ... how far are you with enforcing a split VEC_COND_EXPR?
> >>> Thus can we avoid the above completely (even as intermediate
> >>> state)?
> >>
> >> Apparently, I'm quite close. Using the attached patch I see only 2 testsuite
> >> failures:
> >>
> >> FAIL: gcc.dg/tree-ssa/pr68714.c scan-tree-dump-times reassoc1 " <= " 1
> >> FAIL: gcc.target/i386/pr78102.c scan-assembler-times pcmpeqq 3
> >>
> >> The first one is about teaching reassoc about the SSA_NAMEs in VEC_COND_EXPR. I haven't
> >> analyze the second failure.
> >>
> >> I'm also not sure about the gimlification change, I see a superfluous assignments:
> >>     vec_cond_cmp.5 = _1 == _2;
> >>     vec_cond_cmp.6 = vec_cond_cmp.5;
> >>     vec_cond_cmp.7 = vec_cond_cmp.6;
> >>     _3 = VEC_COND_EXPR <vec_cond_cmp.7, { -1, -1, -1, -1, -1, -1, -1, -1 }, { 0, 0, 0, 0, 0, 0, 0, 0 }>;
> >> ?
> >>
> >> So with the suggested patch, the EH should be gone as you suggested. Right?
> >
> > Right, it should be on the comparison already from the start.
> >
> > @@ -14221,9 +14221,13 @@ gimplify_expr (tree *expr_p, gimple_seq
> > *pre_p, gimple_seq *post_p,
> >          case VEC_COND_EXPR:
> >            {
> >              enum gimplify_status r0, r1, r2;
> > -
> >              r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
> >                                  post_p, is_gimple_condexpr, fb_rvalue);
> > +           tree xop0 = TREE_OPERAND (*expr_p, 0);
> > +           tmp = create_tmp_var_raw (TREE_TYPE (xop0), "vec_cond_cmp");
> > +           gimple_add_tmp_var (tmp);
> > +           gimplify_assign (tmp, xop0, pre_p);
> > +           TREE_OPERAND (*expr_p, 0) = tmp;
> >              r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
> >                                  post_p, is_gimple_val, fb_rvalue);
> >
> > all of VEC_COND_EXPR can now be a simple goto expr_3;
>
> Works for me, thanks!
>
> >
> > diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
> > index 494c9e9c20b..090fb52a2f1 100644
> > --- a/gcc/tree-ssa-forwprop.c
> > +++ b/gcc/tree-ssa-forwprop.c
> > @@ -3136,6 +3136,10 @@ pass_forwprop::execute (function *fun)
> >                      if (code == COND_EXPR
> >                          || code == VEC_COND_EXPR)
> >                        {
> > +                       /* Do not propagate into VEC_COND_EXPRs.  */
> > +                       if (code == VEC_COND_EXPR)
> > +                         break;
> > +
> >
> > err - remove the || code == VEC_COND_EXPR instead?
>
> Yep.
>
> >
> > @@ -2221,24 +2226,12 @@ expand_vector_operations (void)
> >   {
> >     gimple_stmt_iterator gsi;
> >     basic_block bb;
> > -  bool cfg_changed = false;
> >
> >     FOR_EACH_BB_FN (bb, cfun)
> > -    {
> > -      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> > -       {
> > -         expand_vector_operations_1 (&gsi);
> > -         /* ???  If we do not cleanup EH then we will ICE in
> > -            verification.  But in reality we have created wrong-code
> > -            as we did not properly transition EH info and edges to
> > -            the piecewise computations.  */
> > -         if (maybe_clean_eh_stmt (gsi_stmt (gsi))
> > -             && gimple_purge_dead_eh_edges (bb))
> > -           cfg_changed = true;
> > -       }
> > -    }
> >
> > I'm not sure about this.  Consider the C++ testcase where
> > the ?: is replaced by a division.  If veclower needs to replace
> > that with four scalrar division statements then the above
> > still applies - veclower does not correctly duplicate EH info
> > and EH edges to the individual divisions (and we do not know
> > which component might trap).
> >
> > So please leave the above in.  You can try if using integer
> > division makes it break and add such a testcase if there's
> > no coverage for this in the testsuite.
>
> I'm leaving that above. Can you please explain how can a division test-case
> be created?

typedef long v2di __attribute__((vector_size(16)));

v2di foo (v2di a, v2di b)
{
  try
  {
    v2di res = a / b;
    return res;
    }
    catch (...)
    {
    return (v2di){};
    }
}

with -fnon-call-exceptions I see in t.ii.090t.ehdisp (correctly):

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  [LP 1] _6 = a_4(D) / b_5(D);
;;    succ:       5
;;                3

while after t.ii.226t.veclower we have

;;   basic block 2, loop depth 0
;;    pred:       ENTRY
  _13 = BIT_FIELD_REF <a_4(D), 64, 0>;
  _14 = BIT_FIELD_REF <b_5(D), 64, 0>;
  _15 = _13 / _14;
  _16 = BIT_FIELD_REF <a_4(D), 64, 64>;
  _17 = BIT_FIELD_REF <b_5(D), 64, 64>;
  _18 = _16 / _17;
  _6 = {_15, _18};
  res_7 = _6;
  _8 = res_7;
;;    succ:       3

and all EH is gone and we'd ICE if you remove the above hunk.  Hopefully.

We still generate wrong-code obviously as we'd need to duplicate the
EH info on each component division (and split blocks and generate
extra EH edges).  That's a pre-existing bug of course.  I just wanted
to avoid to create a new instance just because of the early instruction
selection for VEC_COND_EXPR.

> >
> > What's missing from the patch is adjusting
> > verify_gimple_assign_ternary from
> >
> >    if (((rhs_code == VEC_COND_EXPR || rhs_code == COND_EXPR)
> >         ? !is_gimple_condexpr (rhs1) : !is_gimple_val (rhs1))
> >        || !is_gimple_val (rhs2)
> >        || !is_gimple_val (rhs3))
> >      {
> >        error ("invalid operands in ternary operation");
> >        return true;
> >
> > to the same with the rhs_code == VEC_COND_EXPR case removed.
>
> Hmm. I'm not sure I've got this comment. Why do we want to change it
> and is it done wright in the patch?

Ah, I missed the hunk you added.  But the check should be an inclusive
one, not an exclusive one and earlier accepting a is_gimple_condexpr
is superfluous when you later reject the tcc_comparison part.  Just
testing is_gimple_val is better.  So yes, remove your tree-cfg.c hunk
and just adjust the above test.

> >
> > You'll likely figure the vectorizer still creates some VEC_COND_EXPRs
> > with embedded comparisons.
>
> I've fixed 2 failing test-cases I mentioned in the previous email.
>
> Martin
>
> >
> > Thanks,
> > Richard.
> >
> >
> >> Martin
>

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-15 11:59                                                               ` Richard Biener
@ 2020-06-15 12:20                                                                 ` Martin Liška
  2020-06-17  8:50                                                                   ` Richard Biener
  0 siblings, 1 reply; 65+ messages in thread
From: Martin Liška @ 2020-06-15 12:20 UTC (permalink / raw)
  To: Richard Biener
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

[-- Attachment #1: Type: text/plain, Size: 7750 bytes --]

On 6/15/20 1:59 PM, Richard Biener wrote:
> On Mon, Jun 15, 2020 at 1:19 PM Martin Liška <mliska@suse.cz> wrote:
>>
>> On 6/15/20 9:14 AM, Richard Biener wrote:
>>> On Fri, Jun 12, 2020 at 3:24 PM Martin Liška <mliska@suse.cz> wrote:
>>>>
>>>> On 6/12/20 11:43 AM, Richard Biener wrote:
>>>>> So ... how far are you with enforcing a split VEC_COND_EXPR?
>>>>> Thus can we avoid the above completely (even as intermediate
>>>>> state)?
>>>>
>>>> Apparently, I'm quite close. Using the attached patch I see only 2 testsuite
>>>> failures:
>>>>
>>>> FAIL: gcc.dg/tree-ssa/pr68714.c scan-tree-dump-times reassoc1 " <= " 1
>>>> FAIL: gcc.target/i386/pr78102.c scan-assembler-times pcmpeqq 3
>>>>
>>>> The first one is about teaching reassoc about the SSA_NAMEs in VEC_COND_EXPR. I haven't
>>>> analyze the second failure.
>>>>
>>>> I'm also not sure about the gimlification change, I see a superfluous assignments:
>>>>      vec_cond_cmp.5 = _1 == _2;
>>>>      vec_cond_cmp.6 = vec_cond_cmp.5;
>>>>      vec_cond_cmp.7 = vec_cond_cmp.6;
>>>>      _3 = VEC_COND_EXPR <vec_cond_cmp.7, { -1, -1, -1, -1, -1, -1, -1, -1 }, { 0, 0, 0, 0, 0, 0, 0, 0 }>;
>>>> ?
>>>>
>>>> So with the suggested patch, the EH should be gone as you suggested. Right?
>>>
>>> Right, it should be on the comparison already from the start.
>>>
>>> @@ -14221,9 +14221,13 @@ gimplify_expr (tree *expr_p, gimple_seq
>>> *pre_p, gimple_seq *post_p,
>>>           case VEC_COND_EXPR:
>>>             {
>>>               enum gimplify_status r0, r1, r2;
>>> -
>>>               r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
>>>                                   post_p, is_gimple_condexpr, fb_rvalue);
>>> +           tree xop0 = TREE_OPERAND (*expr_p, 0);
>>> +           tmp = create_tmp_var_raw (TREE_TYPE (xop0), "vec_cond_cmp");
>>> +           gimple_add_tmp_var (tmp);
>>> +           gimplify_assign (tmp, xop0, pre_p);
>>> +           TREE_OPERAND (*expr_p, 0) = tmp;
>>>               r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
>>>                                   post_p, is_gimple_val, fb_rvalue);
>>>
>>> all of VEC_COND_EXPR can now be a simple goto expr_3;
>>
>> Works for me, thanks!
>>
>>>
>>> diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
>>> index 494c9e9c20b..090fb52a2f1 100644
>>> --- a/gcc/tree-ssa-forwprop.c
>>> +++ b/gcc/tree-ssa-forwprop.c
>>> @@ -3136,6 +3136,10 @@ pass_forwprop::execute (function *fun)
>>>                       if (code == COND_EXPR
>>>                           || code == VEC_COND_EXPR)
>>>                         {
>>> +                       /* Do not propagate into VEC_COND_EXPRs.  */
>>> +                       if (code == VEC_COND_EXPR)
>>> +                         break;
>>> +
>>>
>>> err - remove the || code == VEC_COND_EXPR instead?
>>
>> Yep.
>>
>>>
>>> @@ -2221,24 +2226,12 @@ expand_vector_operations (void)
>>>    {
>>>      gimple_stmt_iterator gsi;
>>>      basic_block bb;
>>> -  bool cfg_changed = false;
>>>
>>>      FOR_EACH_BB_FN (bb, cfun)
>>> -    {
>>> -      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>> -       {
>>> -         expand_vector_operations_1 (&gsi);
>>> -         /* ???  If we do not cleanup EH then we will ICE in
>>> -            verification.  But in reality we have created wrong-code
>>> -            as we did not properly transition EH info and edges to
>>> -            the piecewise computations.  */
>>> -         if (maybe_clean_eh_stmt (gsi_stmt (gsi))
>>> -             && gimple_purge_dead_eh_edges (bb))
>>> -           cfg_changed = true;
>>> -       }
>>> -    }
>>>
>>> I'm not sure about this.  Consider the C++ testcase where
>>> the ?: is replaced by a division.  If veclower needs to replace
>>> that with four scalrar division statements then the above
>>> still applies - veclower does not correctly duplicate EH info
>>> and EH edges to the individual divisions (and we do not know
>>> which component might trap).
>>>
>>> So please leave the above in.  You can try if using integer
>>> division makes it break and add such a testcase if there's
>>> no coverage for this in the testsuite.
>>
>> I'm leaving that above. Can you please explain how can a division test-case
>> be created?
> 
> typedef long v2di __attribute__((vector_size(16)));
> 
> v2di foo (v2di a, v2di b)
> {
>    try
>    {
>      v2di res = a / b;
>      return res;
>      }
>      catch (...)
>      {
>      return (v2di){};
>      }
> }
> 
> with -fnon-call-exceptions I see in t.ii.090t.ehdisp (correctly):
> 
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>    [LP 1] _6 = a_4(D) / b_5(D);
> ;;    succ:       5
> ;;                3
> 
> while after t.ii.226t.veclower we have
> 
> ;;   basic block 2, loop depth 0
> ;;    pred:       ENTRY
>    _13 = BIT_FIELD_REF <a_4(D), 64, 0>;
>    _14 = BIT_FIELD_REF <b_5(D), 64, 0>;
>    _15 = _13 / _14;
>    _16 = BIT_FIELD_REF <a_4(D), 64, 64>;
>    _17 = BIT_FIELD_REF <b_5(D), 64, 64>;
>    _18 = _16 / _17;
>    _6 = {_15, _18};
>    res_7 = _6;
>    _8 = res_7;
> ;;    succ:       3
> 
> and all EH is gone and we'd ICE if you remove the above hunk.  Hopefully.

Yes, it ICEs then:


./xg++ -B. ~/Programming/testcases/ice.c -c -fnon-call-exceptions -O3
/home/marxin/Programming/testcases/ice.c: In function ‘v2di foo(v2di, v2di)’:
/home/marxin/Programming/testcases/ice.c:3:6: error: statement marked for throw, but doesn’t
     3 | v2di foo (v2di a, v2di b)
       |      ^~~
_6 = {_12, _15};
during GIMPLE pass: veclower2
/home/marxin/Programming/testcases/ice.c:3:6: internal compiler error: verify_gimple failed
0x10e308a verify_gimple_in_cfg(function*, bool)
	/home/marxin/Programming/gcc/gcc/tree-cfg.c:5461
0xfc9caf execute_function_todo
	/home/marxin/Programming/gcc/gcc/passes.c:1985
0xfcaafc do_per_function
	/home/marxin/Programming/gcc/gcc/passes.c:1640
0xfcaafc execute_todo
	/home/marxin/Programming/gcc/gcc/passes.c:2039
Please submit a full bug report,
with preprocessed source if appropriate.
Please include the complete backtrace with any bug report.
See <https://gcc.gnu.org/bugs/> for instructions.

> 
> We still generate wrong-code obviously as we'd need to duplicate the
> EH info on each component division (and split blocks and generate
> extra EH edges).  That's a pre-existing bug of course.  I just wanted
> to avoid to create a new instance just because of the early instruction
> selection for VEC_COND_EXPR.

Fine!

> 
>>>
>>> What's missing from the patch is adjusting
>>> verify_gimple_assign_ternary from
>>>
>>>     if (((rhs_code == VEC_COND_EXPR || rhs_code == COND_EXPR)
>>>          ? !is_gimple_condexpr (rhs1) : !is_gimple_val (rhs1))
>>>         || !is_gimple_val (rhs2)
>>>         || !is_gimple_val (rhs3))
>>>       {
>>>         error ("invalid operands in ternary operation");
>>>         return true;
>>>
>>> to the same with the rhs_code == VEC_COND_EXPR case removed.
>>
>> Hmm. I'm not sure I've got this comment. Why do we want to change it
>> and is it done wright in the patch?
> 
> Ah, I missed the hunk you added.

That explains the confusion I got.

>  But the check should be an inclusive
> one, not an exclusive one and earlier accepting a is_gimple_condexpr
> is superfluous when you later reject the tcc_comparison part.  Just
> testing is_gimple_val is better.  So yes, remove your tree-cfg.c hunk
> and just adjust the above test.

I simplified that.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Thanks,
Martin

> 
>>>
>>> You'll likely figure the vectorizer still creates some VEC_COND_EXPRs
>>> with embedded comparisons.
>>
>> I've fixed 2 failing test-cases I mentioned in the previous email.
>>
>> Martin
>>
>>>
>>> Thanks,
>>> Richard.
>>>
>>>
>>>> Martin
>>


[-- Attachment #2: 0001-Lower-VEC_COND_EXPR-into-internal-functions.patch --]
[-- Type: text/x-patch, Size: 36630 bytes --]

From 9ce7b3f510b46e1adba1b1d7eb005beb00d21d42 Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Mon, 9 Mar 2020 13:23:03 +0100
Subject: [PATCH] Lower VEC_COND_EXPR into internal functions.

gcc/ChangeLog:

	* Makefile.in: Add new file.
	* expr.c (expand_expr_real_2): Add gcc_unreachable as we should
	not meet this condition.
	(do_store_flag):
	* gimplify.c (gimplify_expr): Gimplify first argument of
	VEC_COND_EXPR to be a SSA name.
	* internal-fn.c (vec_cond_mask_direct): New.
	(vec_cond_direct): Likewise.
	(vec_condu_direct): Likewise.
	(vec_condeq_direct): Likewise.
	(expand_vect_cond_optab_fn):  New.
	(expand_vec_cond_optab_fn): Likewise.
	(expand_vec_condu_optab_fn): Likewise.
	(expand_vec_condeq_optab_fn): Likewise.
	(expand_vect_cond_mask_optab_fn): Likewise.
	(expand_vec_cond_mask_optab_fn): Likewise.
	(direct_vec_cond_mask_optab_supported_p): Likewise.
	(direct_vec_cond_optab_supported_p): Likewise.
	(direct_vec_condu_optab_supported_p): Likewise.
	(direct_vec_condeq_optab_supported_p): Likewise.
	* internal-fn.def (VCOND): New OPTAB.
	(VCONDU): Likewise.
	(VCONDEQ): Likewise.
	(VCOND_MASK): Likewise.
	* optabs.c (get_rtx_code): Make it global.
	(expand_vec_cond_mask_expr): Removed.
	(expand_vec_cond_expr): Removed.
	* optabs.h (expand_vec_cond_expr): Likewise.
	(vector_compare_rtx): Make it global.
	* passes.def: Add new pass_gimple_isel pass.
	* tree-cfg.c (verify_gimple_assign_ternary): Add check
	for VEC_COND_EXPR about first argument.
	* tree-pass.h (make_pass_gimple_isel): New.
	* tree-ssa-forwprop.c (pass_forwprop::execute): Prevent
	propagation of the first argument of a VEC_COND_EXPR.
	* tree-ssa-reassoc.c (ovce_extract_ops): Support SSA_NAME as
	first argument of a VEC_COND_EXPR.
	(optimize_vec_cond_expr): Likewise.
	* tree-vect-generic.c (expand_vector_divmod): Make SSA_NAME
	for a first argument of created VEC_COND_EXPR.
	(expand_vector_condition): Fix coding style.
	* tree-vect-stmts.c (vectorizable_condition): Gimplify
	first argument.
	* tree-vect-isel.c: New file.

gcc/testsuite/ChangeLog:

	* g++.dg/vect/vec-cond-expr-eh.C: New test.
---
 gcc/Makefile.in                              |   2 +
 gcc/expr.c                                   |  25 +-
 gcc/gimplify.c                               |  15 +-
 gcc/internal-fn.c                            |  89 +++++++
 gcc/internal-fn.def                          |   5 +
 gcc/optabs.c                                 | 124 +---------
 gcc/optabs.h                                 |   7 +-
 gcc/passes.def                               |   1 +
 gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C |  17 ++
 gcc/tree-cfg.c                               |   2 +-
 gcc/tree-pass.h                              |   1 +
 gcc/tree-ssa-forwprop.c                      |   3 +-
 gcc/tree-ssa-reassoc.c                       |  69 ++++--
 gcc/tree-vect-generic.c                      |  45 ++--
 gcc/tree-vect-isel.c                         | 244 +++++++++++++++++++
 gcc/tree-vect-stmts.c                        |   4 +-
 16 files changed, 442 insertions(+), 211 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C
 create mode 100644 gcc/tree-vect-isel.c

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 4f70c189b9d..4cbb9d23606 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1631,6 +1631,7 @@ OBJS = \
 	tree-streamer-out.o \
 	tree-tailcall.o \
 	tree-vect-generic.o \
+	tree-vect-isel.o \
 	tree-vect-patterns.o \
 	tree-vect-data-refs.o \
 	tree-vect-stmts.o \
@@ -2600,6 +2601,7 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h $(srcdir)/coretypes.h \
   $(srcdir)/dwarf2cfi.c \
   $(srcdir)/dwarf2out.c \
   $(srcdir)/tree-vect-generic.c \
+  $(srcdir)/tree-vect-isel.c \
   $(srcdir)/dojump.c $(srcdir)/emit-rtl.h \
   $(srcdir)/emit-rtl.c $(srcdir)/except.h $(srcdir)/explow.c $(srcdir)/expr.c \
   $(srcdir)/expr.h \
diff --git a/gcc/expr.c b/gcc/expr.c
index ca6b1c1291e..3c68b0d754c 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -9316,17 +9316,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
       if (temp != 0)
 	return temp;
 
-      /* For vector MIN <x, y>, expand it a VEC_COND_EXPR <x <= y, x, y>
-	 and similarly for MAX <x, y>.  */
       if (VECTOR_TYPE_P (type))
-	{
-	  tree t0 = make_tree (type, op0);
-	  tree t1 = make_tree (type, op1);
-	  tree comparison = build2 (code == MIN_EXPR ? LE_EXPR : GE_EXPR,
-				    type, t0, t1);
-	  return expand_vec_cond_expr (type, comparison, t0, t1,
-				       original_target);
-	}
+	gcc_unreachable ();
 
       /* At this point, a MEM target is no longer useful; we will get better
 	 code without it.  */
@@ -9915,10 +9906,6 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 	return temp;
       }
 
-    case VEC_COND_EXPR:
-      target = expand_vec_cond_expr (type, treeop0, treeop1, treeop2, target);
-      return target;
-
     case VEC_DUPLICATE_EXPR:
       op0 = expand_expr (treeop0, NULL_RTX, VOIDmode, modifier);
       target = expand_vector_broadcast (mode, op0);
@@ -12249,8 +12236,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
   STRIP_NOPS (arg1);
 
   /* For vector typed comparisons emit code to generate the desired
-     all-ones or all-zeros mask.  Conveniently use the VEC_COND_EXPR
-     expander for this.  */
+     all-ones or all-zeros mask.  */
   if (TREE_CODE (ops->type) == VECTOR_TYPE)
     {
       tree ifexp = build2 (ops->code, ops->type, arg0, arg1);
@@ -12258,12 +12244,7 @@ do_store_flag (sepops ops, rtx target, machine_mode mode)
 	  && expand_vec_cmp_expr_p (TREE_TYPE (arg0), ops->type, ops->code))
 	return expand_vec_cmp_expr (ops->type, ifexp, target);
       else
-	{
-	  tree if_true = constant_boolean_node (true, ops->type);
-	  tree if_false = constant_boolean_node (false, ops->type);
-	  return expand_vec_cond_expr (ops->type, ifexp, if_true,
-				       if_false, target);
-	}
+	gcc_unreachable ();
     }
 
   /* Optimize (x % C1) == C2 or (x % C1) != C2 if it is beneficial
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 416fb609b94..339e8bc5089 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -14219,20 +14219,7 @@ gimplify_expr (tree *expr_p, gimple_seq *pre_p, gimple_seq *post_p,
 	  }
 
 	case VEC_COND_EXPR:
-	  {
-	    enum gimplify_status r0, r1, r2;
-
-	    r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
-				post_p, is_gimple_condexpr, fb_rvalue);
-	    r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
-				post_p, is_gimple_val, fb_rvalue);
-	    r2 = gimplify_expr (&TREE_OPERAND (*expr_p, 2), pre_p,
-				post_p, is_gimple_val, fb_rvalue);
-
-	    ret = MIN (MIN (r0, r1), r2);
-	    recalculate_side_effects (*expr_p);
-	  }
-	  break;
+	  goto expr_3;
 
 	case VEC_PERM_EXPR:
 	  /* Classified as tcc_expression.  */
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index 5e9aa60721e..644f234e087 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -49,6 +49,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple-ssa.h"
 #include "tree-phinodes.h"
 #include "ssa-iterators.h"
+#include "explow.h"
 
 /* The names of each internal function, indexed by function number.  */
 const char *const internal_fn_name_array[] = {
@@ -107,6 +108,10 @@ init_internal_fns ()
 #define mask_store_direct { 3, 2, false }
 #define store_lanes_direct { 0, 0, false }
 #define mask_store_lanes_direct { 0, 0, false }
+#define vec_cond_mask_direct { 0, 0, false }
+#define vec_cond_direct { 0, 0, false }
+#define vec_condu_direct { 0, 0, false }
+#define vec_condeq_direct { 0, 0, false }
 #define scatter_store_direct { 3, 1, false }
 #define unary_direct { 0, 0, true }
 #define binary_direct { 0, 0, true }
@@ -2548,6 +2553,86 @@ expand_mask_store_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
 
 #define expand_mask_store_lanes_optab_fn expand_mask_store_optab_fn
 
+/* Expand VCOND, VCONDU and VCONDEQ optab internal functions.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+  class expand_operand ops[6];
+  insn_code icode;
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0a = gimple_call_arg (stmt, 0);
+  tree op0b = gimple_call_arg (stmt, 1);
+  tree op1 = gimple_call_arg (stmt, 2);
+  tree op2 = gimple_call_arg (stmt, 3);
+  enum tree_code tcode = (tree_code) int_cst_value (gimple_call_arg (stmt, 4));
+
+  tree vec_cond_type = TREE_TYPE (lhs);
+  tree op_mode = TREE_TYPE (op0a);
+  bool unsignedp = TYPE_UNSIGNED (op_mode);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode cmp_op_mode = TYPE_MODE (op_mode);
+
+  icode = convert_optab_handler (optab, mode, cmp_op_mode);
+  rtx comparison
+    = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp, icode, 4);
+  rtx rtx_op1 = expand_normal (op1);
+  rtx rtx_op2 = expand_normal (op2);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_fixed_operand (&ops[3], comparison);
+  create_fixed_operand (&ops[4], XEXP (comparison, 0));
+  create_fixed_operand (&ops[5], XEXP (comparison, 1));
+  expand_insn (icode, 6, ops);
+}
+
+#define expand_vec_cond_optab_fn expand_vect_cond_optab_fn
+#define expand_vec_condu_optab_fn expand_vect_cond_optab_fn
+#define expand_vec_condeq_optab_fn expand_vect_cond_optab_fn
+
+/* Expand VCOND_MASK optab internal function.
+   The expansion of STMT happens based on OPTAB table associated.  */
+
+static void
+expand_vect_cond_mask_optab_fn (internal_fn, gcall *stmt, convert_optab optab)
+{
+  class expand_operand ops[4];
+
+  tree lhs = gimple_call_lhs (stmt);
+  tree op0 = gimple_call_arg (stmt, 0);
+  tree op1 = gimple_call_arg (stmt, 1);
+  tree op2 = gimple_call_arg (stmt, 2);
+  tree vec_cond_type = TREE_TYPE (lhs);
+
+  machine_mode mode = TYPE_MODE (vec_cond_type);
+  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
+  enum insn_code icode = convert_optab_handler (optab, mode, mask_mode);
+  rtx mask, rtx_op1, rtx_op2;
+
+  gcc_assert (icode != CODE_FOR_nothing);
+
+  mask = expand_normal (op0);
+  rtx_op1 = expand_normal (op1);
+  rtx_op2 = expand_normal (op2);
+
+  mask = force_reg (mask_mode, mask);
+  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
+
+  rtx target = expand_expr (lhs, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  create_output_operand (&ops[0], target, mode);
+  create_input_operand (&ops[1], rtx_op1, mode);
+  create_input_operand (&ops[2], rtx_op2, mode);
+  create_input_operand (&ops[3], mask, mask_mode);
+  expand_insn (icode, 4, ops);
+}
+
+#define expand_vec_cond_mask_optab_fn expand_vect_cond_mask_optab_fn
+
 static void
 expand_ABNORMAL_DISPATCHER (internal_fn, gcall *)
 {
@@ -3131,6 +3216,10 @@ multi_vector_optab_supported_p (convert_optab optab, tree_pair types,
 #define direct_mask_store_optab_supported_p direct_optab_supported_p
 #define direct_store_lanes_optab_supported_p multi_vector_optab_supported_p
 #define direct_mask_store_lanes_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_cond_mask_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_cond_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_condu_optab_supported_p multi_vector_optab_supported_p
+#define direct_vec_condeq_optab_supported_p multi_vector_optab_supported_p
 #define direct_scatter_store_optab_supported_p convert_optab_supported_p
 #define direct_while_optab_supported_p convert_optab_supported_p
 #define direct_fold_extract_optab_supported_p direct_optab_supported_p
diff --git a/gcc/internal-fn.def b/gcc/internal-fn.def
index 1d190d492ff..0c6fc371190 100644
--- a/gcc/internal-fn.def
+++ b/gcc/internal-fn.def
@@ -136,6 +136,11 @@ DEF_INTERNAL_OPTAB_FN (STORE_LANES, ECF_CONST, vec_store_lanes, store_lanes)
 DEF_INTERNAL_OPTAB_FN (MASK_STORE_LANES, 0,
 		       vec_mask_store_lanes, mask_store_lanes)
 
+DEF_INTERNAL_OPTAB_FN (VCOND, 0, vcond, vec_cond)
+DEF_INTERNAL_OPTAB_FN (VCONDU, 0, vcondu, vec_condu)
+DEF_INTERNAL_OPTAB_FN (VCONDEQ, 0, vcondeq, vec_condeq)
+DEF_INTERNAL_OPTAB_FN (VCOND_MASK, 0, vcond_mask, vec_cond_mask)
+
 DEF_INTERNAL_OPTAB_FN (WHILE_ULT, ECF_CONST | ECF_NOTHROW, while_ult, while)
 DEF_INTERNAL_OPTAB_FN (CHECK_RAW_PTRS, ECF_CONST | ECF_NOTHROW,
 		       check_raw_ptrs, check_ptrs)
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 6d0b76c13ba..184827fdf4e 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -5442,7 +5442,7 @@ get_rtx_code (enum tree_code tcode, bool unsignedp)
    first comparison operand for insn ICODE.  Do not generate the
    compare instruction itself.  */
 
-static rtx
+rtx
 vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
 		    tree t_op0, tree t_op1, bool unsignedp,
 		    enum insn_code icode, unsigned int opno)
@@ -5809,128 +5809,6 @@ expand_vec_perm_var (machine_mode mode, rtx v0, rtx v1, rtx sel, rtx target)
   return tmp;
 }
 
-/* Generate insns for a VEC_COND_EXPR with mask, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_mask_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-			   rtx target)
-{
-  class expand_operand ops[4];
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode mask_mode = TYPE_MODE (TREE_TYPE (op0));
-  enum insn_code icode = get_vcond_mask_icode (mode, mask_mode);
-  rtx mask, rtx_op1, rtx_op2;
-
-  if (icode == CODE_FOR_nothing)
-    return 0;
-
-  mask = expand_normal (op0);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  mask = force_reg (mask_mode, mask);
-  rtx_op1 = force_reg (GET_MODE (rtx_op1), rtx_op1);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_input_operand (&ops[3], mask, mask_mode);
-  expand_insn (icode, 4, ops);
-
-  return ops[0].value;
-}
-
-/* Generate insns for a VEC_COND_EXPR, given its TYPE and its
-   three operands.  */
-
-rtx
-expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
-		      rtx target)
-{
-  class expand_operand ops[6];
-  enum insn_code icode;
-  rtx comparison, rtx_op1, rtx_op2;
-  machine_mode mode = TYPE_MODE (vec_cond_type);
-  machine_mode cmp_op_mode;
-  bool unsignedp;
-  tree op0a, op0b;
-  enum tree_code tcode;
-
-  if (COMPARISON_CLASS_P (op0))
-    {
-      op0a = TREE_OPERAND (op0, 0);
-      op0b = TREE_OPERAND (op0, 1);
-      tcode = TREE_CODE (op0);
-    }
-  else
-    {
-      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
-      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
-	  != CODE_FOR_nothing)
-	return expand_vec_cond_mask_expr (vec_cond_type, op0, op1,
-					  op2, target);
-      /* Fake op0 < 0.  */
-      else
-	{
-	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
-		      == MODE_VECTOR_INT);
-	  op0a = op0;
-	  op0b = build_zero_cst (TREE_TYPE (op0));
-	  tcode = LT_EXPR;
-	}
-    }
-  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
-  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
-
-
-  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
-	      && known_eq (GET_MODE_NUNITS (mode),
-			   GET_MODE_NUNITS (cmp_op_mode)));
-
-  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
-  if (icode == CODE_FOR_nothing)
-    {
-      if (tcode == LT_EXPR
-	  && op0a == op0
-	  && TREE_CODE (op0) == VECTOR_CST)
-	{
-	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
-	     into a constant when only get_vcond_eq_icode is supported.
-	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
-	  unsigned HOST_WIDE_INT nelts;
-	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
-	    {
-	      if (VECTOR_CST_STEPPED_P (op0))
-		return 0;
-	      nelts = vector_cst_encoded_nelts (op0);
-	    }
-	  for (unsigned int i = 0; i < nelts; ++i)
-	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
-	      return 0;
-	  tcode = NE_EXPR;
-	}
-      if (tcode == EQ_EXPR || tcode == NE_EXPR)
-	icode = get_vcond_eq_icode (mode, cmp_op_mode);
-      if (icode == CODE_FOR_nothing)
-	return 0;
-    }
-
-  comparison = vector_compare_rtx (VOIDmode, tcode, op0a, op0b, unsignedp,
-				   icode, 4);
-  rtx_op1 = expand_normal (op1);
-  rtx_op2 = expand_normal (op2);
-
-  create_output_operand (&ops[0], target, mode);
-  create_input_operand (&ops[1], rtx_op1, mode);
-  create_input_operand (&ops[2], rtx_op2, mode);
-  create_fixed_operand (&ops[3], comparison);
-  create_fixed_operand (&ops[4], XEXP (comparison, 0));
-  create_fixed_operand (&ops[5], XEXP (comparison, 1));
-  expand_insn (icode, 6, ops);
-  return ops[0].value;
-}
-
 /* Generate VEC_SERIES_EXPR <OP0, OP1>, returning a value of mode VMODE.
    Use TARGET for the result if nonnull and convenient.  */
 
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 5bd19503a0a..7c2ec257cb0 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -321,9 +321,6 @@ extern rtx expand_vec_perm_const (machine_mode, rtx, rtx,
 /* Generate code for vector comparison.  */
 extern rtx expand_vec_cmp_expr (tree, tree, rtx);
 
-/* Generate code for VEC_COND_EXPR.  */
-extern rtx expand_vec_cond_expr (tree, tree, tree, tree, rtx);
-
 /* Generate code for VEC_SERIES_EXPR.  */
 extern rtx expand_vec_series_expr (machine_mode, rtx, rtx, rtx);
 
@@ -364,5 +361,9 @@ extern void expand_jump_insn (enum insn_code icode, unsigned int nops,
 			      class expand_operand *ops);
 
 extern enum rtx_code get_rtx_code (enum tree_code tcode, bool unsignedp);
+extern rtx vector_compare_rtx (machine_mode cmp_mode, enum tree_code tcode,
+			       tree t_op0, tree t_op1, bool unsignedp,
+			       enum insn_code icode, unsigned int opno);
+
 
 #endif /* GCC_OPTABS_H */
diff --git a/gcc/passes.def b/gcc/passes.def
index 56322025226..2b1e09fdda3 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -399,6 +399,7 @@ along with GCC; see the file COPYING3.  If not see
   NEXT_PASS (pass_cleanup_eh);
   NEXT_PASS (pass_lower_resx);
   NEXT_PASS (pass_nrv);
+  NEXT_PASS (pass_gimple_isel);
   NEXT_PASS (pass_cleanup_cfg_post_optimizing);
   NEXT_PASS (pass_warn_function_noreturn);
   NEXT_PASS (pass_gen_hsail);
diff --git a/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C b/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C
new file mode 100644
index 00000000000..00fe2422444
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/vec-cond-expr-eh.C
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-fnon-call-exceptions" } */
+
+typedef double v2df __attribute__((vector_size(16)));
+
+v2df foo (v2df a, v2df b, v2df c, v2df d)
+{
+  try
+  {
+    v2df res = a < b ? c : d;
+    return res;
+    }
+    catch (...)
+    {
+    return (v2df){};
+    }
+}
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index d06a479e570..7a1ac80c2ac 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -4155,7 +4155,7 @@ verify_gimple_assign_ternary (gassign *stmt)
       return true;
     }
 
-  if (((rhs_code == VEC_COND_EXPR || rhs_code == COND_EXPR)
+  if ((rhs_code == COND_EXPR
        ? !is_gimple_condexpr (rhs1) : !is_gimple_val (rhs1))
       || !is_gimple_val (rhs2)
       || !is_gimple_val (rhs3))
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 396428f167f..215c8f2a337 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -627,6 +627,7 @@ extern gimple_opt_pass *make_pass_local_fn_summary (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_update_address_taken (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_convert_switch (gcc::context *ctxt);
 extern gimple_opt_pass *make_pass_lower_vaarg (gcc::context *ctxt);
+extern gimple_opt_pass *make_pass_gimple_isel (gcc::context *ctxt);
 
 /* Current optimization pass.  */
 extern opt_pass *current_pass;
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 494c9e9c20b..0ab8267fdf9 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -3133,8 +3133,7 @@ pass_forwprop::execute (function *fun)
 		    tree rhs1 = gimple_assign_rhs1 (stmt);
 		    enum tree_code code = gimple_assign_rhs_code (stmt);
 
-		    if (code == COND_EXPR
-			|| code == VEC_COND_EXPR)
+		    if (code == COND_EXPR)
 		      {
 			/* In this case the entire COND_EXPR is in rhs1. */
 			if (forward_propagate_into_cond (&gsi))
diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index af8faf2e6ea..8d80a2c768c 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -3831,7 +3831,8 @@ optimize_range_tests (enum tree_code opcode,
    to type of comparison.  */
 
 static tree_code
-ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
+ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type,
+		  tree *lhs, tree *rhs, gassign **vcond)
 {
   if (TREE_CODE (var) != SSA_NAME)
     return ERROR_MARK;
@@ -3839,6 +3840,8 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
   gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
   if (stmt == NULL)
     return ERROR_MARK;
+  if (*vcond)
+    *vcond = stmt;
 
   /* ??? If we start creating more COND_EXPR, we could perform
      this same optimization with them.	For now, simplify.  */
@@ -3847,9 +3850,20 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
 
   tree cond = gimple_assign_rhs1 (stmt);
   tree_code cmp = TREE_CODE (cond);
-  if (TREE_CODE_CLASS (cmp) != tcc_comparison)
+  if (cmp != SSA_NAME)
     return ERROR_MARK;
 
+  gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
+  if (stmt != NULL
+      && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
+    return ERROR_MARK;
+
+  cmp = gimple_assign_rhs_code (assign);
+  if (lhs)
+    *lhs = gimple_assign_rhs1 (assign);
+  if (rhs)
+    *rhs = gimple_assign_rhs2 (assign);
+
   /* ??? For now, allow only canonical true and false result vectors.
      We could expand this to other constants should the need arise,
      but at the moment we don't create them.  */
@@ -3870,7 +3884,7 @@ ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
 
   /* Success!  */
   if (rets)
-    *rets = stmt;
+    *rets = assign;
   if (reti)
     *reti = inv;
   if (type)
@@ -3894,10 +3908,11 @@ optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
     {
       tree elt0 = (*ops)[i]->op;
 
-      gassign *stmt0;
+      gassign *stmt0, *vcond0;
       bool invert;
-      tree type;
-      tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type);
+      tree type, lhs0, rhs0;
+      tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type, &lhs0,
+					 &rhs0, &vcond0);
       if (cmp0 == ERROR_MARK)
 	continue;
 
@@ -3905,26 +3920,20 @@ optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
 	{
 	  tree &elt1 = (*ops)[j]->op;
 
-	  gassign *stmt1;
-	  tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL);
+	  gassign *stmt1, *vcond1;
+	  tree lhs1, rhs1;
+	  tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL, &lhs1,
+					     &rhs1, &vcond1);
 	  if (cmp1 == ERROR_MARK)
 	    continue;
 
-	  tree cond0 = gimple_assign_rhs1 (stmt0);
-	  tree x0 = TREE_OPERAND (cond0, 0);
-	  tree y0 = TREE_OPERAND (cond0, 1);
-
-	  tree cond1 = gimple_assign_rhs1 (stmt1);
-	  tree x1 = TREE_OPERAND (cond1, 0);
-	  tree y1 = TREE_OPERAND (cond1, 1);
-
 	  tree comb;
 	  if (opcode == BIT_AND_EXPR)
-	    comb = maybe_fold_and_comparisons (type, cmp0, x0, y0, cmp1, x1,
-					       y1);
+	    comb = maybe_fold_and_comparisons (type, cmp0, lhs0, rhs0,
+					       cmp1, lhs1, rhs1);
 	  else if (opcode == BIT_IOR_EXPR)
-	    comb = maybe_fold_or_comparisons (type, cmp0, x0, y0, cmp1, x1,
-					      y1);
+	    comb = maybe_fold_or_comparisons (type, cmp0, lhs0, rhs0,
+					      cmp1, lhs1, rhs1);
 	  else
 	    gcc_unreachable ();
 	  if (comb == NULL)
@@ -3934,19 +3943,27 @@ optimize_vec_cond_expr (tree_code opcode, vec<operand_entry *> *ops)
 	  if (dump_file && (dump_flags & TDF_DETAILS))
 	    {
 	      fprintf (dump_file, "Transforming ");
-	      print_generic_expr (dump_file, cond0);
+	      print_generic_expr (dump_file, gimple_assign_lhs (stmt0));
 	      fprintf (dump_file, " %c ", opcode == BIT_AND_EXPR ? '&' : '|');
-	      print_generic_expr (dump_file, cond1);
+	      print_generic_expr (dump_file, gimple_assign_lhs (stmt1));
 	      fprintf (dump_file, " into ");
 	      print_generic_expr (dump_file, comb);
 	      fputc ('\n', dump_file);
 	    }
 
-	  gimple_assign_set_rhs1 (stmt0, comb);
+	  gimple_seq seq;
+	  tree exp = force_gimple_operand (comb, &seq, true, NULL_TREE);
+	  if (seq)
+	    {
+	      gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
+	      gsi_insert_before (&gsi, seq, GSI_SAME_STMT);
+	    }
 	  if (invert)
-	    std::swap (*gimple_assign_rhs2_ptr (stmt0),
-		       *gimple_assign_rhs3_ptr (stmt0));
-	  update_stmt (stmt0);
+	    std::swap (*gimple_assign_rhs2_ptr (vcond0),
+		       *gimple_assign_rhs3_ptr (vcond0));
+
+	  gimple_assign_set_rhs1 (vcond0, exp);
+	  update_stmt (vcond0);
 
 	  elt1 = error_mark_node;
 	  any_changes = true;
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index a7fe83da0e3..fb955bbf3d2 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -694,12 +694,14 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 	  if (addend == NULL_TREE
 	      && expand_vec_cond_expr_p (type, type, LT_EXPR))
 	    {
-	      tree zero, cst, cond, mask_type;
-	      gimple *stmt;
+	      tree zero, cst, mask_type, mask;
+	      gimple *stmt, *cond;
 
 	      mask_type = truth_type_for (type);
 	      zero = build_zero_cst (type);
-	      cond = build2 (LT_EXPR, mask_type, op0, zero);
+	      mask = make_ssa_name (mask_type);
+	      cond = gimple_build_assign (mask, LT_EXPR, op0, zero);
+	      gsi_insert_before (gsi, cond, GSI_SAME_STMT);
 	      tree_vector_builder vec (type, nunits, 1);
 	      for (i = 0; i < nunits; i++)
 		vec.quick_push (build_int_cst (TREE_TYPE (type),
@@ -707,8 +709,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
 						<< shifts[i]) - 1));
 	      cst = vec.build ();
 	      addend = make_ssa_name (type);
-	      stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
-					  cst, zero);
+	      stmt
+		= gimple_build_assign (addend, VEC_COND_EXPR, mask, cst, zero);
 	      gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
 	    }
 	}
@@ -950,21 +952,28 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
   tree index = bitsize_int (0);
   tree comp_width = width;
   tree comp_index = index;
-  int i;
   location_t loc = gimple_location (gsi_stmt (*gsi));
+  tree_code code = TREE_CODE (a);
 
-  if (!is_gimple_val (a))
+  if (code == SSA_NAME)
     {
-      gcc_assert (COMPARISON_CLASS_P (a));
-      a_is_comparison = true;
-      a1 = TREE_OPERAND (a, 0);
-      a2 = TREE_OPERAND (a, 1);
-      comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
-      comp_width = vector_element_bits_tree (TREE_TYPE (a1));
+      gimple *assign = SSA_NAME_DEF_STMT (a);
+      if (TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) == tcc_comparison)
+	{
+	  a_is_comparison = true;
+	  a1 = gimple_assign_rhs1 (assign);
+	  a2 = gimple_assign_rhs2 (assign);
+	  code = gimple_assign_rhs_code (assign);
+	  comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
+	  comp_width = vector_element_bits_tree (TREE_TYPE (a1));
+	}
     }
 
-  if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
-    return;
+  if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), code))
+    {
+      gcc_assert (TREE_CODE (a) == SSA_NAME || TREE_CODE (a) == VECTOR_CST);
+      return;
+    }
 
   /* Handle vector boolean types with bitmasks.  If there is a comparison
      and we can expand the comparison into the vector boolean bitmask,
@@ -987,7 +996,7 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
 	  : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
     {
       if (a_is_comparison)
-	a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2);
+	a = gimplify_build2 (gsi, code, type, a1, a2);
       a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
       a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
       a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
@@ -1018,7 +1027,7 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
 
   int nunits = nunits_for_known_piecewise_op (type);
   vec_alloc (v, nunits);
-  for (i = 0; i < nunits; i++)
+  for (int i = 0; i < nunits; i++)
     {
       tree aa, result;
       tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
@@ -1029,7 +1038,7 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
 				       comp_width, comp_index);
 	  tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
 				       comp_width, comp_index);
-	  aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
+	  aa = fold_build2 (code, cond_type, aa1, aa2);
 	}
       else if (a_is_scalar_bitmask)
 	{
diff --git a/gcc/tree-vect-isel.c b/gcc/tree-vect-isel.c
new file mode 100644
index 00000000000..97f92080503
--- /dev/null
+++ b/gcc/tree-vect-isel.c
@@ -0,0 +1,244 @@
+/* Schedule GIMPLE vector statements.
+   Copyright (C) 2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "ssa.h"
+#include "expmed.h"
+#include "optabs-tree.h"
+#include "tree-eh.h"
+#include "gimple-iterator.h"
+#include "gimplify-me.h"
+#include "gimplify.h"
+#include "tree-cfg.h"
+
+/* Expand all VEC_COND_EXPR gimple assignments into calls to internal
+   function based on type of selected expansion.  */
+
+static gimple *
+gimple_expand_vec_cond_expr (gimple_stmt_iterator *gsi,
+			     hash_map<tree, unsigned int> *vec_cond_ssa_name_uses)
+{
+  tree lhs, op0a = NULL_TREE, op0b = NULL_TREE;
+  enum tree_code code;
+  enum tree_code tcode;
+  machine_mode cmp_op_mode;
+  bool unsignedp;
+  enum insn_code icode;
+  imm_use_iterator imm_iter;
+
+  /* Only consider code == GIMPLE_ASSIGN.  */
+  gassign *stmt = dyn_cast<gassign *> (gsi_stmt (*gsi));
+  if (!stmt)
+    return NULL;
+
+  code = gimple_assign_rhs_code (stmt);
+  if (code != VEC_COND_EXPR)
+    return NULL;
+
+  tree op0 = gimple_assign_rhs1 (stmt);
+  tree op1 = gimple_assign_rhs2 (stmt);
+  tree op2 = gimple_assign_rhs3 (stmt);
+  lhs = gimple_assign_lhs (stmt);
+  machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
+
+  gcc_assert (!COMPARISON_CLASS_P (op0));
+  if (TREE_CODE (op0) == SSA_NAME)
+    {
+      unsigned int used_vec_cond_exprs = 0;
+      unsigned int *slot = vec_cond_ssa_name_uses->get (op0);
+      if (slot)
+	used_vec_cond_exprs = *slot;
+      else
+	{
+	  gimple *use_stmt;
+	  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, op0)
+	    {
+	      gassign *assign = dyn_cast<gassign *> (use_stmt);
+	      if (assign != NULL
+		  && gimple_assign_rhs_code (assign) == VEC_COND_EXPR
+		  && gimple_assign_rhs1 (assign) == op0)
+		used_vec_cond_exprs++;
+	    }
+	  vec_cond_ssa_name_uses->put (op0, used_vec_cond_exprs);
+	}
+
+      gassign *def_stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (op0));
+      if (def_stmt)
+	{
+	  tcode = gimple_assign_rhs_code (def_stmt);
+	  op0a = gimple_assign_rhs1 (def_stmt);
+	  op0b = gimple_assign_rhs2 (def_stmt);
+
+	  tree op0a_type = TREE_TYPE (op0a);
+	  if (used_vec_cond_exprs >= 2
+	      && (get_vcond_mask_icode (mode, TYPE_MODE (op0a_type))
+		  != CODE_FOR_nothing)
+	      && expand_vec_cmp_expr_p (op0a_type, TREE_TYPE (lhs), tcode))
+	    {
+	      /* Keep the SSA name and use vcond_mask.  */
+	      tcode = TREE_CODE (op0);
+	    }
+	}
+      else
+	tcode = TREE_CODE (op0);
+    }
+  else
+    tcode = TREE_CODE (op0);
+
+  if (TREE_CODE_CLASS (tcode) != tcc_comparison)
+    {
+      gcc_assert (VECTOR_BOOLEAN_TYPE_P (TREE_TYPE (op0)));
+      if (get_vcond_mask_icode (mode, TYPE_MODE (TREE_TYPE (op0)))
+	  != CODE_FOR_nothing)
+	return gimple_build_call_internal (IFN_VCOND_MASK, 3, op0, op1, op2);
+      /* Fake op0 < 0.  */
+      else
+	{
+	  gcc_assert (GET_MODE_CLASS (TYPE_MODE (TREE_TYPE (op0)))
+		      == MODE_VECTOR_INT);
+	  op0a = op0;
+	  op0b = build_zero_cst (TREE_TYPE (op0));
+	  tcode = LT_EXPR;
+	}
+    }
+  cmp_op_mode = TYPE_MODE (TREE_TYPE (op0a));
+  unsignedp = TYPE_UNSIGNED (TREE_TYPE (op0a));
+
+
+  gcc_assert (known_eq (GET_MODE_SIZE (mode), GET_MODE_SIZE (cmp_op_mode))
+	      && known_eq (GET_MODE_NUNITS (mode),
+			   GET_MODE_NUNITS (cmp_op_mode)));
+
+  icode = get_vcond_icode (mode, cmp_op_mode, unsignedp);
+  if (icode == CODE_FOR_nothing)
+    {
+      if (tcode == LT_EXPR
+	  && op0a == op0
+	  && TREE_CODE (op0) == VECTOR_CST)
+	{
+	  /* A VEC_COND_EXPR condition could be folded from EQ_EXPR/NE_EXPR
+	     into a constant when only get_vcond_eq_icode is supported.
+	     Verify < 0 and != 0 behave the same and change it to NE_EXPR.  */
+	  unsigned HOST_WIDE_INT nelts;
+	  if (!VECTOR_CST_NELTS (op0).is_constant (&nelts))
+	    {
+	      if (VECTOR_CST_STEPPED_P (op0))
+		gcc_unreachable ();
+	      nelts = vector_cst_encoded_nelts (op0);
+	    }
+	  for (unsigned int i = 0; i < nelts; ++i)
+	    if (tree_int_cst_sgn (vector_cst_elt (op0, i)) == 1)
+	      gcc_unreachable ();
+	  tcode = NE_EXPR;
+	}
+      if (tcode == EQ_EXPR || tcode == NE_EXPR)
+	{
+	  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+	  return gimple_build_call_internal (IFN_VCONDEQ, 5, op0a, op0b, op1,
+					     op2, tcode_tree);
+	}
+    }
+
+  gcc_assert (icode != CODE_FOR_nothing);
+  tree tcode_tree = build_int_cst (integer_type_node, tcode);
+  return gimple_build_call_internal (unsignedp ? IFN_VCONDU : IFN_VCOND,
+				     5, op0a, op0b, op1, op2, tcode_tree);
+}
+
+
+
+/* Iterate all gimple statements and try to expand
+   VEC_COND_EXPR assignments.  */
+
+static unsigned int
+gimple_expand_vec_cond_exprs (void)
+{
+  gimple_stmt_iterator gsi;
+  basic_block bb;
+  bool cfg_changed = false;
+  hash_map<tree, unsigned int> vec_cond_ssa_name_uses;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+	{
+	  gimple *g = gimple_expand_vec_cond_expr (&gsi,
+						   &vec_cond_ssa_name_uses);
+	  if (g != NULL)
+	    {
+	      tree lhs = gimple_assign_lhs (gsi_stmt (gsi));
+	      gimple_set_lhs (g, lhs);
+	      gsi_replace (&gsi, g, false);
+	    }
+	}
+    }
+
+  return cfg_changed ? TODO_cleanup_cfg : 0;
+}
+
+namespace {
+
+const pass_data pass_data_gimple_isel =
+{
+  GIMPLE_PASS, /* type */
+  "isel", /* name */
+  OPTGROUP_VEC, /* optinfo_flags */
+  TV_NONE, /* tv_id */
+  PROP_cfg, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  TODO_update_ssa, /* todo_flags_finish */
+};
+
+class pass_gimple_isel : public gimple_opt_pass
+{
+public:
+  pass_gimple_isel (gcc::context *ctxt)
+    : gimple_opt_pass (pass_data_gimple_isel, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return true;
+    }
+
+  virtual unsigned int execute (function *)
+    {
+      return gimple_expand_vec_cond_exprs ();
+    }
+
+}; // class pass_gimple_isel
+
+} // anon namespace
+
+gimple_opt_pass *
+make_pass_gimple_isel (gcc::context *ctxt)
+{
+  return new pass_gimple_isel (ctxt);
+}
+
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index cf2d979fea1..710b17a7c5c 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -9937,8 +9937,8 @@ vectorizable_condition (vec_info *vinfo,
 	{
 	  vec_cond_rhs = vec_oprnds1[i];
 	  if (bitop1 == NOP_EXPR)
-	    vec_compare = build2 (cond_code, vec_cmp_type,
-				  vec_cond_lhs, vec_cond_rhs);
+	    vec_compare = gimplify_build2 (gsi, cond_code, vec_cmp_type,
+					   vec_cond_lhs, vec_cond_rhs);
 	  else
 	    {
 	      new_temp = make_ssa_name (vec_cmp_type);
-- 
2.27.0


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-15 12:20                                                                 ` Martin Liška
@ 2020-06-17  8:50                                                                   ` Richard Biener
  2020-06-17 13:15                                                                     ` Richard Biener
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Biener @ 2020-06-17  8:50 UTC (permalink / raw)
  To: Martin Liška
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

On Mon, Jun 15, 2020 at 2:20 PM Martin Liška <mliska@suse.cz> wrote:
>
> On 6/15/20 1:59 PM, Richard Biener wrote:
> > On Mon, Jun 15, 2020 at 1:19 PM Martin Liška <mliska@suse.cz> wrote:
> >>
> >> On 6/15/20 9:14 AM, Richard Biener wrote:
> >>> On Fri, Jun 12, 2020 at 3:24 PM Martin Liška <mliska@suse.cz> wrote:
> >>>>
> >>>> On 6/12/20 11:43 AM, Richard Biener wrote:
> >>>>> So ... how far are you with enforcing a split VEC_COND_EXPR?
> >>>>> Thus can we avoid the above completely (even as intermediate
> >>>>> state)?
> >>>>
> >>>> Apparently, I'm quite close. Using the attached patch I see only 2 testsuite
> >>>> failures:
> >>>>
> >>>> FAIL: gcc.dg/tree-ssa/pr68714.c scan-tree-dump-times reassoc1 " <= " 1
> >>>> FAIL: gcc.target/i386/pr78102.c scan-assembler-times pcmpeqq 3
> >>>>
> >>>> The first one is about teaching reassoc about the SSA_NAMEs in VEC_COND_EXPR. I haven't
> >>>> analyze the second failure.
> >>>>
> >>>> I'm also not sure about the gimlification change, I see a superfluous assignments:
> >>>>      vec_cond_cmp.5 = _1 == _2;
> >>>>      vec_cond_cmp.6 = vec_cond_cmp.5;
> >>>>      vec_cond_cmp.7 = vec_cond_cmp.6;
> >>>>      _3 = VEC_COND_EXPR <vec_cond_cmp.7, { -1, -1, -1, -1, -1, -1, -1, -1 }, { 0, 0, 0, 0, 0, 0, 0, 0 }>;
> >>>> ?
> >>>>
> >>>> So with the suggested patch, the EH should be gone as you suggested. Right?
> >>>
> >>> Right, it should be on the comparison already from the start.
> >>>
> >>> @@ -14221,9 +14221,13 @@ gimplify_expr (tree *expr_p, gimple_seq
> >>> *pre_p, gimple_seq *post_p,
> >>>           case VEC_COND_EXPR:
> >>>             {
> >>>               enum gimplify_status r0, r1, r2;
> >>> -
> >>>               r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
> >>>                                   post_p, is_gimple_condexpr, fb_rvalue);
> >>> +           tree xop0 = TREE_OPERAND (*expr_p, 0);
> >>> +           tmp = create_tmp_var_raw (TREE_TYPE (xop0), "vec_cond_cmp");
> >>> +           gimple_add_tmp_var (tmp);
> >>> +           gimplify_assign (tmp, xop0, pre_p);
> >>> +           TREE_OPERAND (*expr_p, 0) = tmp;
> >>>               r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
> >>>                                   post_p, is_gimple_val, fb_rvalue);
> >>>
> >>> all of VEC_COND_EXPR can now be a simple goto expr_3;
> >>
> >> Works for me, thanks!
> >>
> >>>
> >>> diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
> >>> index 494c9e9c20b..090fb52a2f1 100644
> >>> --- a/gcc/tree-ssa-forwprop.c
> >>> +++ b/gcc/tree-ssa-forwprop.c
> >>> @@ -3136,6 +3136,10 @@ pass_forwprop::execute (function *fun)
> >>>                       if (code == COND_EXPR
> >>>                           || code == VEC_COND_EXPR)
> >>>                         {
> >>> +                       /* Do not propagate into VEC_COND_EXPRs.  */
> >>> +                       if (code == VEC_COND_EXPR)
> >>> +                         break;
> >>> +
> >>>
> >>> err - remove the || code == VEC_COND_EXPR instead?
> >>
> >> Yep.
> >>
> >>>
> >>> @@ -2221,24 +2226,12 @@ expand_vector_operations (void)
> >>>    {
> >>>      gimple_stmt_iterator gsi;
> >>>      basic_block bb;
> >>> -  bool cfg_changed = false;
> >>>
> >>>      FOR_EACH_BB_FN (bb, cfun)
> >>> -    {
> >>> -      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> >>> -       {
> >>> -         expand_vector_operations_1 (&gsi);
> >>> -         /* ???  If we do not cleanup EH then we will ICE in
> >>> -            verification.  But in reality we have created wrong-code
> >>> -            as we did not properly transition EH info and edges to
> >>> -            the piecewise computations.  */
> >>> -         if (maybe_clean_eh_stmt (gsi_stmt (gsi))
> >>> -             && gimple_purge_dead_eh_edges (bb))
> >>> -           cfg_changed = true;
> >>> -       }
> >>> -    }
> >>>
> >>> I'm not sure about this.  Consider the C++ testcase where
> >>> the ?: is replaced by a division.  If veclower needs to replace
> >>> that with four scalrar division statements then the above
> >>> still applies - veclower does not correctly duplicate EH info
> >>> and EH edges to the individual divisions (and we do not know
> >>> which component might trap).
> >>>
> >>> So please leave the above in.  You can try if using integer
> >>> division makes it break and add such a testcase if there's
> >>> no coverage for this in the testsuite.
> >>
> >> I'm leaving that above. Can you please explain how can a division test-case
> >> be created?
> >
> > typedef long v2di __attribute__((vector_size(16)));
> >
> > v2di foo (v2di a, v2di b)
> > {
> >    try
> >    {
> >      v2di res = a / b;
> >      return res;
> >      }
> >      catch (...)
> >      {
> >      return (v2di){};
> >      }
> > }
> >
> > with -fnon-call-exceptions I see in t.ii.090t.ehdisp (correctly):
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >    [LP 1] _6 = a_4(D) / b_5(D);
> > ;;    succ:       5
> > ;;                3
> >
> > while after t.ii.226t.veclower we have
> >
> > ;;   basic block 2, loop depth 0
> > ;;    pred:       ENTRY
> >    _13 = BIT_FIELD_REF <a_4(D), 64, 0>;
> >    _14 = BIT_FIELD_REF <b_5(D), 64, 0>;
> >    _15 = _13 / _14;
> >    _16 = BIT_FIELD_REF <a_4(D), 64, 64>;
> >    _17 = BIT_FIELD_REF <b_5(D), 64, 64>;
> >    _18 = _16 / _17;
> >    _6 = {_15, _18};
> >    res_7 = _6;
> >    _8 = res_7;
> > ;;    succ:       3
> >
> > and all EH is gone and we'd ICE if you remove the above hunk.  Hopefully.
>
> Yes, it ICEs then:
>
>
> ./xg++ -B. ~/Programming/testcases/ice.c -c -fnon-call-exceptions -O3
> /home/marxin/Programming/testcases/ice.c: In function ‘v2di foo(v2di, v2di)’:
> /home/marxin/Programming/testcases/ice.c:3:6: error: statement marked for throw, but doesn’t
>      3 | v2di foo (v2di a, v2di b)
>        |      ^~~
> _6 = {_12, _15};
> during GIMPLE pass: veclower2
> /home/marxin/Programming/testcases/ice.c:3:6: internal compiler error: verify_gimple failed
> 0x10e308a verify_gimple_in_cfg(function*, bool)
>         /home/marxin/Programming/gcc/gcc/tree-cfg.c:5461
> 0xfc9caf execute_function_todo
>         /home/marxin/Programming/gcc/gcc/passes.c:1985
> 0xfcaafc do_per_function
>         /home/marxin/Programming/gcc/gcc/passes.c:1640
> 0xfcaafc execute_todo
>         /home/marxin/Programming/gcc/gcc/passes.c:2039
> Please submit a full bug report,
> with preprocessed source if appropriate.
> Please include the complete backtrace with any bug report.
> See <https://gcc.gnu.org/bugs/> for instructions.
>
> >
> > We still generate wrong-code obviously as we'd need to duplicate the
> > EH info on each component division (and split blocks and generate
> > extra EH edges).  That's a pre-existing bug of course.  I just wanted
> > to avoid to create a new instance just because of the early instruction
> > selection for VEC_COND_EXPR.
>
> Fine!
>
> >
> >>>
> >>> What's missing from the patch is adjusting
> >>> verify_gimple_assign_ternary from
> >>>
> >>>     if (((rhs_code == VEC_COND_EXPR || rhs_code == COND_EXPR)
> >>>          ? !is_gimple_condexpr (rhs1) : !is_gimple_val (rhs1))
> >>>         || !is_gimple_val (rhs2)
> >>>         || !is_gimple_val (rhs3))
> >>>       {
> >>>         error ("invalid operands in ternary operation");
> >>>         return true;
> >>>
> >>> to the same with the rhs_code == VEC_COND_EXPR case removed.
> >>
> >> Hmm. I'm not sure I've got this comment. Why do we want to change it
> >> and is it done wright in the patch?
> >
> > Ah, I missed the hunk you added.
>
> That explains the confusion I got.
>
> >  But the check should be an inclusive
> > one, not an exclusive one and earlier accepting a is_gimple_condexpr
> > is superfluous when you later reject the tcc_comparison part.  Just
> > testing is_gimple_val is better.  So yes, remove your tree-cfg.c hunk
> > and just adjust the above test.
>
> I simplified that.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Please double-check the changelog

        (do_store_flag):

+       tree-vect-isel.o \

IMHO we want to move more of the pattern matching magic of RTL
expansion here to obsolete TER.  So please name it gimple-isel.cc
(.cc!, not .c)

+  gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
+  if (stmt != NULL
+      && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
+    return ERROR_MARK;

you want stmt == NULL || TREE_CODE_CLASS (...)

in case the def stmt is a call.

+         gimple_seq seq;
+         tree exp = force_gimple_operand (comb, &seq, true, NULL_TREE);
+         if (seq)
+           {
+             gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
+             gsi_insert_before (&gsi, seq, GSI_SAME_STMT);
+           }

use force_gimple_operand_gsi that makes the above simpler.

          if (invert)
-           std::swap (*gimple_assign_rhs2_ptr (stmt0),
-                      *gimple_assign_rhs3_ptr (stmt0));
-         update_stmt (stmt0);
+           std::swap (*gimple_assign_rhs2_ptr (vcond0),
+                      *gimple_assign_rhs3_ptr (vcond0));

use swap_ssa_operands.

+         gimple_assign_set_rhs1 (vcond0, exp);
+         update_stmt (vcond0);

diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index cf2d979fea1..710b17a7c5c 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -9937,8 +9937,8 @@ vectorizable_condition (vec_info *vinfo,
        {
          vec_cond_rhs = vec_oprnds1[i];
          if (bitop1 == NOP_EXPR)
-           vec_compare = build2 (cond_code, vec_cmp_type,
-                                 vec_cond_lhs, vec_cond_rhs);
+           vec_compare = gimplify_build2 (gsi, cond_code, vec_cmp_type,
+                                          vec_cond_lhs, vec_cond_rhs);
          else
            {

please don't introduce more uses of gimplify_buildN - I'd like to
get rid of those.  You can use

     gimple_seq stmts = NULL;
     vec_compare = gimple_build (&stmts, cond_code, ...);
     gsi_insert_seq_before/after (...);

OK with those changes.

Thanks,
Richard.


> Thanks,
> Martin
>
> >
> >>>
> >>> You'll likely figure the vectorizer still creates some VEC_COND_EXPRs
> >>> with embedded comparisons.
> >>
> >> I've fixed 2 failing test-cases I mentioned in the previous email.
> >>
> >> Martin
> >>
> >>>
> >>> Thanks,
> >>> Richard.
> >>>
> >>>
> >>>> Martin
> >>
>

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-17  8:50                                                                   ` Richard Biener
@ 2020-06-17 13:15                                                                     ` Richard Biener
  2020-06-18  8:10                                                                       ` Martin Liška
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Biener @ 2020-06-17 13:15 UTC (permalink / raw)
  To: Martin Liška
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

On Wed, Jun 17, 2020 at 10:50 AM Richard Biener
<richard.guenther@gmail.com> wrote:
>
> On Mon, Jun 15, 2020 at 2:20 PM Martin Liška <mliska@suse.cz> wrote:
> >
> > On 6/15/20 1:59 PM, Richard Biener wrote:
> > > On Mon, Jun 15, 2020 at 1:19 PM Martin Liška <mliska@suse.cz> wrote:
> > >>
> > >> On 6/15/20 9:14 AM, Richard Biener wrote:
> > >>> On Fri, Jun 12, 2020 at 3:24 PM Martin Liška <mliska@suse.cz> wrote:
> > >>>>
> > >>>> On 6/12/20 11:43 AM, Richard Biener wrote:
> > >>>>> So ... how far are you with enforcing a split VEC_COND_EXPR?
> > >>>>> Thus can we avoid the above completely (even as intermediate
> > >>>>> state)?
> > >>>>
> > >>>> Apparently, I'm quite close. Using the attached patch I see only 2 testsuite
> > >>>> failures:
> > >>>>
> > >>>> FAIL: gcc.dg/tree-ssa/pr68714.c scan-tree-dump-times reassoc1 " <= " 1
> > >>>> FAIL: gcc.target/i386/pr78102.c scan-assembler-times pcmpeqq 3
> > >>>>
> > >>>> The first one is about teaching reassoc about the SSA_NAMEs in VEC_COND_EXPR. I haven't
> > >>>> analyze the second failure.
> > >>>>
> > >>>> I'm also not sure about the gimlification change, I see a superfluous assignments:
> > >>>>      vec_cond_cmp.5 = _1 == _2;
> > >>>>      vec_cond_cmp.6 = vec_cond_cmp.5;
> > >>>>      vec_cond_cmp.7 = vec_cond_cmp.6;
> > >>>>      _3 = VEC_COND_EXPR <vec_cond_cmp.7, { -1, -1, -1, -1, -1, -1, -1, -1 }, { 0, 0, 0, 0, 0, 0, 0, 0 }>;
> > >>>> ?
> > >>>>
> > >>>> So with the suggested patch, the EH should be gone as you suggested. Right?
> > >>>
> > >>> Right, it should be on the comparison already from the start.
> > >>>
> > >>> @@ -14221,9 +14221,13 @@ gimplify_expr (tree *expr_p, gimple_seq
> > >>> *pre_p, gimple_seq *post_p,
> > >>>           case VEC_COND_EXPR:
> > >>>             {
> > >>>               enum gimplify_status r0, r1, r2;
> > >>> -
> > >>>               r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
> > >>>                                   post_p, is_gimple_condexpr, fb_rvalue);
> > >>> +           tree xop0 = TREE_OPERAND (*expr_p, 0);
> > >>> +           tmp = create_tmp_var_raw (TREE_TYPE (xop0), "vec_cond_cmp");
> > >>> +           gimple_add_tmp_var (tmp);
> > >>> +           gimplify_assign (tmp, xop0, pre_p);
> > >>> +           TREE_OPERAND (*expr_p, 0) = tmp;
> > >>>               r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
> > >>>                                   post_p, is_gimple_val, fb_rvalue);
> > >>>
> > >>> all of VEC_COND_EXPR can now be a simple goto expr_3;
> > >>
> > >> Works for me, thanks!
> > >>
> > >>>
> > >>> diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
> > >>> index 494c9e9c20b..090fb52a2f1 100644
> > >>> --- a/gcc/tree-ssa-forwprop.c
> > >>> +++ b/gcc/tree-ssa-forwprop.c
> > >>> @@ -3136,6 +3136,10 @@ pass_forwprop::execute (function *fun)
> > >>>                       if (code == COND_EXPR
> > >>>                           || code == VEC_COND_EXPR)
> > >>>                         {
> > >>> +                       /* Do not propagate into VEC_COND_EXPRs.  */
> > >>> +                       if (code == VEC_COND_EXPR)
> > >>> +                         break;
> > >>> +
> > >>>
> > >>> err - remove the || code == VEC_COND_EXPR instead?
> > >>
> > >> Yep.
> > >>
> > >>>
> > >>> @@ -2221,24 +2226,12 @@ expand_vector_operations (void)
> > >>>    {
> > >>>      gimple_stmt_iterator gsi;
> > >>>      basic_block bb;
> > >>> -  bool cfg_changed = false;
> > >>>
> > >>>      FOR_EACH_BB_FN (bb, cfun)
> > >>> -    {
> > >>> -      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> > >>> -       {
> > >>> -         expand_vector_operations_1 (&gsi);
> > >>> -         /* ???  If we do not cleanup EH then we will ICE in
> > >>> -            verification.  But in reality we have created wrong-code
> > >>> -            as we did not properly transition EH info and edges to
> > >>> -            the piecewise computations.  */
> > >>> -         if (maybe_clean_eh_stmt (gsi_stmt (gsi))
> > >>> -             && gimple_purge_dead_eh_edges (bb))
> > >>> -           cfg_changed = true;
> > >>> -       }
> > >>> -    }
> > >>>
> > >>> I'm not sure about this.  Consider the C++ testcase where
> > >>> the ?: is replaced by a division.  If veclower needs to replace
> > >>> that with four scalrar division statements then the above
> > >>> still applies - veclower does not correctly duplicate EH info
> > >>> and EH edges to the individual divisions (and we do not know
> > >>> which component might trap).
> > >>>
> > >>> So please leave the above in.  You can try if using integer
> > >>> division makes it break and add such a testcase if there's
> > >>> no coverage for this in the testsuite.
> > >>
> > >> I'm leaving that above. Can you please explain how can a division test-case
> > >> be created?
> > >
> > > typedef long v2di __attribute__((vector_size(16)));
> > >
> > > v2di foo (v2di a, v2di b)
> > > {
> > >    try
> > >    {
> > >      v2di res = a / b;
> > >      return res;
> > >      }
> > >      catch (...)
> > >      {
> > >      return (v2di){};
> > >      }
> > > }
> > >
> > > with -fnon-call-exceptions I see in t.ii.090t.ehdisp (correctly):
> > >
> > > ;;   basic block 2, loop depth 0
> > > ;;    pred:       ENTRY
> > >    [LP 1] _6 = a_4(D) / b_5(D);
> > > ;;    succ:       5
> > > ;;                3
> > >
> > > while after t.ii.226t.veclower we have
> > >
> > > ;;   basic block 2, loop depth 0
> > > ;;    pred:       ENTRY
> > >    _13 = BIT_FIELD_REF <a_4(D), 64, 0>;
> > >    _14 = BIT_FIELD_REF <b_5(D), 64, 0>;
> > >    _15 = _13 / _14;
> > >    _16 = BIT_FIELD_REF <a_4(D), 64, 64>;
> > >    _17 = BIT_FIELD_REF <b_5(D), 64, 64>;
> > >    _18 = _16 / _17;
> > >    _6 = {_15, _18};
> > >    res_7 = _6;
> > >    _8 = res_7;
> > > ;;    succ:       3
> > >
> > > and all EH is gone and we'd ICE if you remove the above hunk.  Hopefully.
> >
> > Yes, it ICEs then:
> >
> >
> > ./xg++ -B. ~/Programming/testcases/ice.c -c -fnon-call-exceptions -O3
> > /home/marxin/Programming/testcases/ice.c: In function ‘v2di foo(v2di, v2di)’:
> > /home/marxin/Programming/testcases/ice.c:3:6: error: statement marked for throw, but doesn’t
> >      3 | v2di foo (v2di a, v2di b)
> >        |      ^~~
> > _6 = {_12, _15};
> > during GIMPLE pass: veclower2
> > /home/marxin/Programming/testcases/ice.c:3:6: internal compiler error: verify_gimple failed
> > 0x10e308a verify_gimple_in_cfg(function*, bool)
> >         /home/marxin/Programming/gcc/gcc/tree-cfg.c:5461
> > 0xfc9caf execute_function_todo
> >         /home/marxin/Programming/gcc/gcc/passes.c:1985
> > 0xfcaafc do_per_function
> >         /home/marxin/Programming/gcc/gcc/passes.c:1640
> > 0xfcaafc execute_todo
> >         /home/marxin/Programming/gcc/gcc/passes.c:2039
> > Please submit a full bug report,
> > with preprocessed source if appropriate.
> > Please include the complete backtrace with any bug report.
> > See <https://gcc.gnu.org/bugs/> for instructions.
> >
> > >
> > > We still generate wrong-code obviously as we'd need to duplicate the
> > > EH info on each component division (and split blocks and generate
> > > extra EH edges).  That's a pre-existing bug of course.  I just wanted
> > > to avoid to create a new instance just because of the early instruction
> > > selection for VEC_COND_EXPR.
> >
> > Fine!
> >
> > >
> > >>>
> > >>> What's missing from the patch is adjusting
> > >>> verify_gimple_assign_ternary from
> > >>>
> > >>>     if (((rhs_code == VEC_COND_EXPR || rhs_code == COND_EXPR)
> > >>>          ? !is_gimple_condexpr (rhs1) : !is_gimple_val (rhs1))
> > >>>         || !is_gimple_val (rhs2)
> > >>>         || !is_gimple_val (rhs3))
> > >>>       {
> > >>>         error ("invalid operands in ternary operation");
> > >>>         return true;
> > >>>
> > >>> to the same with the rhs_code == VEC_COND_EXPR case removed.
> > >>
> > >> Hmm. I'm not sure I've got this comment. Why do we want to change it
> > >> and is it done wright in the patch?
> > >
> > > Ah, I missed the hunk you added.
> >
> > That explains the confusion I got.
> >
> > >  But the check should be an inclusive
> > > one, not an exclusive one and earlier accepting a is_gimple_condexpr
> > > is superfluous when you later reject the tcc_comparison part.  Just
> > > testing is_gimple_val is better.  So yes, remove your tree-cfg.c hunk
> > > and just adjust the above test.
> >
> > I simplified that.
> >
> > Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Please double-check the changelog
>
>         (do_store_flag):
>
> +       tree-vect-isel.o \
>
> IMHO we want to move more of the pattern matching magic of RTL
> expansion here to obsolete TER.  So please name it gimple-isel.cc
> (.cc!, not .c)
>
> +  gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
> +  if (stmt != NULL
> +      && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
> +    return ERROR_MARK;
>
> you want stmt == NULL || TREE_CODE_CLASS (...)
>
> in case the def stmt is a call.
>
> +         gimple_seq seq;
> +         tree exp = force_gimple_operand (comb, &seq, true, NULL_TREE);
> +         if (seq)
> +           {
> +             gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
> +             gsi_insert_before (&gsi, seq, GSI_SAME_STMT);
> +           }
>
> use force_gimple_operand_gsi that makes the above simpler.
>
>           if (invert)
> -           std::swap (*gimple_assign_rhs2_ptr (stmt0),
> -                      *gimple_assign_rhs3_ptr (stmt0));
> -         update_stmt (stmt0);
> +           std::swap (*gimple_assign_rhs2_ptr (vcond0),
> +                      *gimple_assign_rhs3_ptr (vcond0));
>
> use swap_ssa_operands.
>
> +         gimple_assign_set_rhs1 (vcond0, exp);
> +         update_stmt (vcond0);
>
> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> index cf2d979fea1..710b17a7c5c 100644
> --- a/gcc/tree-vect-stmts.c
> +++ b/gcc/tree-vect-stmts.c
> @@ -9937,8 +9937,8 @@ vectorizable_condition (vec_info *vinfo,
>         {
>           vec_cond_rhs = vec_oprnds1[i];
>           if (bitop1 == NOP_EXPR)
> -           vec_compare = build2 (cond_code, vec_cmp_type,
> -                                 vec_cond_lhs, vec_cond_rhs);
> +           vec_compare = gimplify_build2 (gsi, cond_code, vec_cmp_type,
> +                                          vec_cond_lhs, vec_cond_rhs);
>           else
>             {
>
> please don't introduce more uses of gimplify_buildN - I'd like to
> get rid of those.  You can use
>
>      gimple_seq stmts = NULL;
>      vec_compare = gimple_build (&stmts, cond_code, ...);
>      gsi_insert_seq_before/after (...);
>
> OK with those changes.

Applying the patch caused

Running target unix//-m32
FAIL: gcc.c-torture/execute/ieee/pr50310.c execution,  -O3
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
-finline-functions
FAIL: gcc.c-torture/execute/ieee/pr50310.c execution,  -O3 -g

and

FAIL: ext/random/simd_fast_mersenne_twister_engine/operators/inequal.cc
(test for excess errors)
UNRESOLVED: ext/random/simd_fast_mersenne_twister_engine/operators/inequal.cc
compilation failed to produce executable

Richard.

> Thanks,
> Richard.
>
>
> > Thanks,
> > Martin
> >
> > >
> > >>>
> > >>> You'll likely figure the vectorizer still creates some VEC_COND_EXPRs
> > >>> with embedded comparisons.
> > >>
> > >> I've fixed 2 failing test-cases I mentioned in the previous email.
> > >>
> > >> Martin
> > >>
> > >>>
> > >>> Thanks,
> > >>> Richard.
> > >>>
> > >>>
> > >>>> Martin
> > >>
> >

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-17 13:15                                                                     ` Richard Biener
@ 2020-06-18  8:10                                                                       ` Martin Liška
  2020-06-18  8:52                                                                         ` Richard Biener
  0 siblings, 1 reply; 65+ messages in thread
From: Martin Liška @ 2020-06-18  8:10 UTC (permalink / raw)
  To: Richard Biener
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

On 6/17/20 3:15 PM, Richard Biener wrote:
> On Wed, Jun 17, 2020 at 10:50 AM Richard Biener
> <richard.guenther@gmail.com> wrote:
>>
>> On Mon, Jun 15, 2020 at 2:20 PM Martin Liška <mliska@suse.cz> wrote:
>>>
>>> On 6/15/20 1:59 PM, Richard Biener wrote:
>>>> On Mon, Jun 15, 2020 at 1:19 PM Martin Liška <mliska@suse.cz> wrote:
>>>>>
>>>>> On 6/15/20 9:14 AM, Richard Biener wrote:
>>>>>> On Fri, Jun 12, 2020 at 3:24 PM Martin Liška <mliska@suse.cz> wrote:
>>>>>>>
>>>>>>> On 6/12/20 11:43 AM, Richard Biener wrote:
>>>>>>>> So ... how far are you with enforcing a split VEC_COND_EXPR?
>>>>>>>> Thus can we avoid the above completely (even as intermediate
>>>>>>>> state)?
>>>>>>>
>>>>>>> Apparently, I'm quite close. Using the attached patch I see only 2 testsuite
>>>>>>> failures:
>>>>>>>
>>>>>>> FAIL: gcc.dg/tree-ssa/pr68714.c scan-tree-dump-times reassoc1 " <= " 1
>>>>>>> FAIL: gcc.target/i386/pr78102.c scan-assembler-times pcmpeqq 3
>>>>>>>
>>>>>>> The first one is about teaching reassoc about the SSA_NAMEs in VEC_COND_EXPR. I haven't
>>>>>>> analyze the second failure.
>>>>>>>
>>>>>>> I'm also not sure about the gimlification change, I see a superfluous assignments:
>>>>>>>       vec_cond_cmp.5 = _1 == _2;
>>>>>>>       vec_cond_cmp.6 = vec_cond_cmp.5;
>>>>>>>       vec_cond_cmp.7 = vec_cond_cmp.6;
>>>>>>>       _3 = VEC_COND_EXPR <vec_cond_cmp.7, { -1, -1, -1, -1, -1, -1, -1, -1 }, { 0, 0, 0, 0, 0, 0, 0, 0 }>;
>>>>>>> ?
>>>>>>>
>>>>>>> So with the suggested patch, the EH should be gone as you suggested. Right?
>>>>>>
>>>>>> Right, it should be on the comparison already from the start.
>>>>>>
>>>>>> @@ -14221,9 +14221,13 @@ gimplify_expr (tree *expr_p, gimple_seq
>>>>>> *pre_p, gimple_seq *post_p,
>>>>>>            case VEC_COND_EXPR:
>>>>>>              {
>>>>>>                enum gimplify_status r0, r1, r2;
>>>>>> -
>>>>>>                r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
>>>>>>                                    post_p, is_gimple_condexpr, fb_rvalue);
>>>>>> +           tree xop0 = TREE_OPERAND (*expr_p, 0);
>>>>>> +           tmp = create_tmp_var_raw (TREE_TYPE (xop0), "vec_cond_cmp");
>>>>>> +           gimple_add_tmp_var (tmp);
>>>>>> +           gimplify_assign (tmp, xop0, pre_p);
>>>>>> +           TREE_OPERAND (*expr_p, 0) = tmp;
>>>>>>                r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
>>>>>>                                    post_p, is_gimple_val, fb_rvalue);
>>>>>>
>>>>>> all of VEC_COND_EXPR can now be a simple goto expr_3;
>>>>>
>>>>> Works for me, thanks!
>>>>>
>>>>>>
>>>>>> diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
>>>>>> index 494c9e9c20b..090fb52a2f1 100644
>>>>>> --- a/gcc/tree-ssa-forwprop.c
>>>>>> +++ b/gcc/tree-ssa-forwprop.c
>>>>>> @@ -3136,6 +3136,10 @@ pass_forwprop::execute (function *fun)
>>>>>>                        if (code == COND_EXPR
>>>>>>                            || code == VEC_COND_EXPR)
>>>>>>                          {
>>>>>> +                       /* Do not propagate into VEC_COND_EXPRs.  */
>>>>>> +                       if (code == VEC_COND_EXPR)
>>>>>> +                         break;
>>>>>> +
>>>>>>
>>>>>> err - remove the || code == VEC_COND_EXPR instead?
>>>>>
>>>>> Yep.
>>>>>
>>>>>>
>>>>>> @@ -2221,24 +2226,12 @@ expand_vector_operations (void)
>>>>>>     {
>>>>>>       gimple_stmt_iterator gsi;
>>>>>>       basic_block bb;
>>>>>> -  bool cfg_changed = false;
>>>>>>
>>>>>>       FOR_EACH_BB_FN (bb, cfun)
>>>>>> -    {
>>>>>> -      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>>>> -       {
>>>>>> -         expand_vector_operations_1 (&gsi);
>>>>>> -         /* ???  If we do not cleanup EH then we will ICE in
>>>>>> -            verification.  But in reality we have created wrong-code
>>>>>> -            as we did not properly transition EH info and edges to
>>>>>> -            the piecewise computations.  */
>>>>>> -         if (maybe_clean_eh_stmt (gsi_stmt (gsi))
>>>>>> -             && gimple_purge_dead_eh_edges (bb))
>>>>>> -           cfg_changed = true;
>>>>>> -       }
>>>>>> -    }
>>>>>>
>>>>>> I'm not sure about this.  Consider the C++ testcase where
>>>>>> the ?: is replaced by a division.  If veclower needs to replace
>>>>>> that with four scalrar division statements then the above
>>>>>> still applies - veclower does not correctly duplicate EH info
>>>>>> and EH edges to the individual divisions (and we do not know
>>>>>> which component might trap).
>>>>>>
>>>>>> So please leave the above in.  You can try if using integer
>>>>>> division makes it break and add such a testcase if there's
>>>>>> no coverage for this in the testsuite.
>>>>>
>>>>> I'm leaving that above. Can you please explain how can a division test-case
>>>>> be created?
>>>>
>>>> typedef long v2di __attribute__((vector_size(16)));
>>>>
>>>> v2di foo (v2di a, v2di b)
>>>> {
>>>>     try
>>>>     {
>>>>       v2di res = a / b;
>>>>       return res;
>>>>       }
>>>>       catch (...)
>>>>       {
>>>>       return (v2di){};
>>>>       }
>>>> }
>>>>
>>>> with -fnon-call-exceptions I see in t.ii.090t.ehdisp (correctly):
>>>>
>>>> ;;   basic block 2, loop depth 0
>>>> ;;    pred:       ENTRY
>>>>     [LP 1] _6 = a_4(D) / b_5(D);
>>>> ;;    succ:       5
>>>> ;;                3
>>>>
>>>> while after t.ii.226t.veclower we have
>>>>
>>>> ;;   basic block 2, loop depth 0
>>>> ;;    pred:       ENTRY
>>>>     _13 = BIT_FIELD_REF <a_4(D), 64, 0>;
>>>>     _14 = BIT_FIELD_REF <b_5(D), 64, 0>;
>>>>     _15 = _13 / _14;
>>>>     _16 = BIT_FIELD_REF <a_4(D), 64, 64>;
>>>>     _17 = BIT_FIELD_REF <b_5(D), 64, 64>;
>>>>     _18 = _16 / _17;
>>>>     _6 = {_15, _18};
>>>>     res_7 = _6;
>>>>     _8 = res_7;
>>>> ;;    succ:       3
>>>>
>>>> and all EH is gone and we'd ICE if you remove the above hunk.  Hopefully.
>>>
>>> Yes, it ICEs then:
>>>
>>>
>>> ./xg++ -B. ~/Programming/testcases/ice.c -c -fnon-call-exceptions -O3
>>> /home/marxin/Programming/testcases/ice.c: In function ‘v2di foo(v2di, v2di)’:
>>> /home/marxin/Programming/testcases/ice.c:3:6: error: statement marked for throw, but doesn’t
>>>       3 | v2di foo (v2di a, v2di b)
>>>         |      ^~~
>>> _6 = {_12, _15};
>>> during GIMPLE pass: veclower2
>>> /home/marxin/Programming/testcases/ice.c:3:6: internal compiler error: verify_gimple failed
>>> 0x10e308a verify_gimple_in_cfg(function*, bool)
>>>          /home/marxin/Programming/gcc/gcc/tree-cfg.c:5461
>>> 0xfc9caf execute_function_todo
>>>          /home/marxin/Programming/gcc/gcc/passes.c:1985
>>> 0xfcaafc do_per_function
>>>          /home/marxin/Programming/gcc/gcc/passes.c:1640
>>> 0xfcaafc execute_todo
>>>          /home/marxin/Programming/gcc/gcc/passes.c:2039
>>> Please submit a full bug report,
>>> with preprocessed source if appropriate.
>>> Please include the complete backtrace with any bug report.
>>> See <https://gcc.gnu.org/bugs/> for instructions.
>>>
>>>>
>>>> We still generate wrong-code obviously as we'd need to duplicate the
>>>> EH info on each component division (and split blocks and generate
>>>> extra EH edges).  That's a pre-existing bug of course.  I just wanted
>>>> to avoid to create a new instance just because of the early instruction
>>>> selection for VEC_COND_EXPR.
>>>
>>> Fine!
>>>
>>>>
>>>>>>
>>>>>> What's missing from the patch is adjusting
>>>>>> verify_gimple_assign_ternary from
>>>>>>
>>>>>>      if (((rhs_code == VEC_COND_EXPR || rhs_code == COND_EXPR)
>>>>>>           ? !is_gimple_condexpr (rhs1) : !is_gimple_val (rhs1))
>>>>>>          || !is_gimple_val (rhs2)
>>>>>>          || !is_gimple_val (rhs3))
>>>>>>        {
>>>>>>          error ("invalid operands in ternary operation");
>>>>>>          return true;
>>>>>>
>>>>>> to the same with the rhs_code == VEC_COND_EXPR case removed.
>>>>>
>>>>> Hmm. I'm not sure I've got this comment. Why do we want to change it
>>>>> and is it done wright in the patch?
>>>>
>>>> Ah, I missed the hunk you added.
>>>
>>> That explains the confusion I got.
>>>
>>>>   But the check should be an inclusive
>>>> one, not an exclusive one and earlier accepting a is_gimple_condexpr
>>>> is superfluous when you later reject the tcc_comparison part.  Just
>>>> testing is_gimple_val is better.  So yes, remove your tree-cfg.c hunk
>>>> and just adjust the above test.
>>>
>>> I simplified that.
>>>
>>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>>
>> Please double-check the changelog
>>
>>          (do_store_flag):
>>
>> +       tree-vect-isel.o \
>>
>> IMHO we want to move more of the pattern matching magic of RTL
>> expansion here to obsolete TER.  So please name it gimple-isel.cc
>> (.cc!, not .c)
>>
>> +  gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
>> +  if (stmt != NULL
>> +      && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
>> +    return ERROR_MARK;
>>
>> you want stmt == NULL || TREE_CODE_CLASS (...)
>>
>> in case the def stmt is a call.
>>
>> +         gimple_seq seq;
>> +         tree exp = force_gimple_operand (comb, &seq, true, NULL_TREE);
>> +         if (seq)
>> +           {
>> +             gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
>> +             gsi_insert_before (&gsi, seq, GSI_SAME_STMT);
>> +           }
>>
>> use force_gimple_operand_gsi that makes the above simpler.
>>
>>            if (invert)
>> -           std::swap (*gimple_assign_rhs2_ptr (stmt0),
>> -                      *gimple_assign_rhs3_ptr (stmt0));
>> -         update_stmt (stmt0);
>> +           std::swap (*gimple_assign_rhs2_ptr (vcond0),
>> +                      *gimple_assign_rhs3_ptr (vcond0));
>>
>> use swap_ssa_operands.
>>
>> +         gimple_assign_set_rhs1 (vcond0, exp);
>> +         update_stmt (vcond0);
>>
>> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
>> index cf2d979fea1..710b17a7c5c 100644
>> --- a/gcc/tree-vect-stmts.c
>> +++ b/gcc/tree-vect-stmts.c
>> @@ -9937,8 +9937,8 @@ vectorizable_condition (vec_info *vinfo,
>>          {
>>            vec_cond_rhs = vec_oprnds1[i];
>>            if (bitop1 == NOP_EXPR)
>> -           vec_compare = build2 (cond_code, vec_cmp_type,
>> -                                 vec_cond_lhs, vec_cond_rhs);
>> +           vec_compare = gimplify_build2 (gsi, cond_code, vec_cmp_type,
>> +                                          vec_cond_lhs, vec_cond_rhs);
>>            else
>>              {
>>
>> please don't introduce more uses of gimplify_buildN - I'd like to
>> get rid of those.  You can use
>>
>>       gimple_seq stmts = NULL;
>>       vec_compare = gimple_build (&stmts, cond_code, ...);
>>       gsi_insert_seq_before/after (...);
>>
>> OK with those changes.
> 
> Applying the patch caused
> 
> Running target unix//-m32
> FAIL: gcc.c-torture/execute/ieee/pr50310.c execution,  -O3
> -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
> -finline-functions
> FAIL: gcc.c-torture/execute/ieee/pr50310.c execution,  -O3 -g

I can't reproduce that with current master. Can you?

> 
> and
> 
> FAIL: ext/random/simd_fast_mersenne_twister_engine/operators/inequal.cc
> (test for excess errors)
> UNRESOLVED: ext/random/simd_fast_mersenne_twister_engine/operators/inequal.cc
> compilation failed to produce executable

I've just fixed this one.

Martin

> 
> Richard.
> 
>> Thanks,
>> Richard.
>>
>>
>>> Thanks,
>>> Martin
>>>
>>>>
>>>>>>
>>>>>> You'll likely figure the vectorizer still creates some VEC_COND_EXPRs
>>>>>> with embedded comparisons.
>>>>>
>>>>> I've fixed 2 failing test-cases I mentioned in the previous email.
>>>>>
>>>>> Martin
>>>>>
>>>>>>
>>>>>> Thanks,
>>>>>> Richard.
>>>>>>
>>>>>>
>>>>>>> Martin
>>>>>
>>>


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-18  8:10                                                                       ` Martin Liška
@ 2020-06-18  8:52                                                                         ` Richard Biener
  2020-06-18  9:02                                                                           ` Martin Liška
  0 siblings, 1 reply; 65+ messages in thread
From: Richard Biener @ 2020-06-18  8:52 UTC (permalink / raw)
  To: Martin Liška
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

On Thu, Jun 18, 2020 at 10:10 AM Martin Liška <mliska@suse.cz> wrote:
>
> On 6/17/20 3:15 PM, Richard Biener wrote:
> > On Wed, Jun 17, 2020 at 10:50 AM Richard Biener
> > <richard.guenther@gmail.com> wrote:
> >>
> >> On Mon, Jun 15, 2020 at 2:20 PM Martin Liška <mliska@suse.cz> wrote:
> >>>
> >>> On 6/15/20 1:59 PM, Richard Biener wrote:
> >>>> On Mon, Jun 15, 2020 at 1:19 PM Martin Liška <mliska@suse.cz> wrote:
> >>>>>
> >>>>> On 6/15/20 9:14 AM, Richard Biener wrote:
> >>>>>> On Fri, Jun 12, 2020 at 3:24 PM Martin Liška <mliska@suse.cz> wrote:
> >>>>>>>
> >>>>>>> On 6/12/20 11:43 AM, Richard Biener wrote:
> >>>>>>>> So ... how far are you with enforcing a split VEC_COND_EXPR?
> >>>>>>>> Thus can we avoid the above completely (even as intermediate
> >>>>>>>> state)?
> >>>>>>>
> >>>>>>> Apparently, I'm quite close. Using the attached patch I see only 2 testsuite
> >>>>>>> failures:
> >>>>>>>
> >>>>>>> FAIL: gcc.dg/tree-ssa/pr68714.c scan-tree-dump-times reassoc1 " <= " 1
> >>>>>>> FAIL: gcc.target/i386/pr78102.c scan-assembler-times pcmpeqq 3
> >>>>>>>
> >>>>>>> The first one is about teaching reassoc about the SSA_NAMEs in VEC_COND_EXPR. I haven't
> >>>>>>> analyze the second failure.
> >>>>>>>
> >>>>>>> I'm also not sure about the gimlification change, I see a superfluous assignments:
> >>>>>>>       vec_cond_cmp.5 = _1 == _2;
> >>>>>>>       vec_cond_cmp.6 = vec_cond_cmp.5;
> >>>>>>>       vec_cond_cmp.7 = vec_cond_cmp.6;
> >>>>>>>       _3 = VEC_COND_EXPR <vec_cond_cmp.7, { -1, -1, -1, -1, -1, -1, -1, -1 }, { 0, 0, 0, 0, 0, 0, 0, 0 }>;
> >>>>>>> ?
> >>>>>>>
> >>>>>>> So with the suggested patch, the EH should be gone as you suggested. Right?
> >>>>>>
> >>>>>> Right, it should be on the comparison already from the start.
> >>>>>>
> >>>>>> @@ -14221,9 +14221,13 @@ gimplify_expr (tree *expr_p, gimple_seq
> >>>>>> *pre_p, gimple_seq *post_p,
> >>>>>>            case VEC_COND_EXPR:
> >>>>>>              {
> >>>>>>                enum gimplify_status r0, r1, r2;
> >>>>>> -
> >>>>>>                r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
> >>>>>>                                    post_p, is_gimple_condexpr, fb_rvalue);
> >>>>>> +           tree xop0 = TREE_OPERAND (*expr_p, 0);
> >>>>>> +           tmp = create_tmp_var_raw (TREE_TYPE (xop0), "vec_cond_cmp");
> >>>>>> +           gimple_add_tmp_var (tmp);
> >>>>>> +           gimplify_assign (tmp, xop0, pre_p);
> >>>>>> +           TREE_OPERAND (*expr_p, 0) = tmp;
> >>>>>>                r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
> >>>>>>                                    post_p, is_gimple_val, fb_rvalue);
> >>>>>>
> >>>>>> all of VEC_COND_EXPR can now be a simple goto expr_3;
> >>>>>
> >>>>> Works for me, thanks!
> >>>>>
> >>>>>>
> >>>>>> diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
> >>>>>> index 494c9e9c20b..090fb52a2f1 100644
> >>>>>> --- a/gcc/tree-ssa-forwprop.c
> >>>>>> +++ b/gcc/tree-ssa-forwprop.c
> >>>>>> @@ -3136,6 +3136,10 @@ pass_forwprop::execute (function *fun)
> >>>>>>                        if (code == COND_EXPR
> >>>>>>                            || code == VEC_COND_EXPR)
> >>>>>>                          {
> >>>>>> +                       /* Do not propagate into VEC_COND_EXPRs.  */
> >>>>>> +                       if (code == VEC_COND_EXPR)
> >>>>>> +                         break;
> >>>>>> +
> >>>>>>
> >>>>>> err - remove the || code == VEC_COND_EXPR instead?
> >>>>>
> >>>>> Yep.
> >>>>>
> >>>>>>
> >>>>>> @@ -2221,24 +2226,12 @@ expand_vector_operations (void)
> >>>>>>     {
> >>>>>>       gimple_stmt_iterator gsi;
> >>>>>>       basic_block bb;
> >>>>>> -  bool cfg_changed = false;
> >>>>>>
> >>>>>>       FOR_EACH_BB_FN (bb, cfun)
> >>>>>> -    {
> >>>>>> -      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
> >>>>>> -       {
> >>>>>> -         expand_vector_operations_1 (&gsi);
> >>>>>> -         /* ???  If we do not cleanup EH then we will ICE in
> >>>>>> -            verification.  But in reality we have created wrong-code
> >>>>>> -            as we did not properly transition EH info and edges to
> >>>>>> -            the piecewise computations.  */
> >>>>>> -         if (maybe_clean_eh_stmt (gsi_stmt (gsi))
> >>>>>> -             && gimple_purge_dead_eh_edges (bb))
> >>>>>> -           cfg_changed = true;
> >>>>>> -       }
> >>>>>> -    }
> >>>>>>
> >>>>>> I'm not sure about this.  Consider the C++ testcase where
> >>>>>> the ?: is replaced by a division.  If veclower needs to replace
> >>>>>> that with four scalrar division statements then the above
> >>>>>> still applies - veclower does not correctly duplicate EH info
> >>>>>> and EH edges to the individual divisions (and we do not know
> >>>>>> which component might trap).
> >>>>>>
> >>>>>> So please leave the above in.  You can try if using integer
> >>>>>> division makes it break and add such a testcase if there's
> >>>>>> no coverage for this in the testsuite.
> >>>>>
> >>>>> I'm leaving that above. Can you please explain how can a division test-case
> >>>>> be created?
> >>>>
> >>>> typedef long v2di __attribute__((vector_size(16)));
> >>>>
> >>>> v2di foo (v2di a, v2di b)
> >>>> {
> >>>>     try
> >>>>     {
> >>>>       v2di res = a / b;
> >>>>       return res;
> >>>>       }
> >>>>       catch (...)
> >>>>       {
> >>>>       return (v2di){};
> >>>>       }
> >>>> }
> >>>>
> >>>> with -fnon-call-exceptions I see in t.ii.090t.ehdisp (correctly):
> >>>>
> >>>> ;;   basic block 2, loop depth 0
> >>>> ;;    pred:       ENTRY
> >>>>     [LP 1] _6 = a_4(D) / b_5(D);
> >>>> ;;    succ:       5
> >>>> ;;                3
> >>>>
> >>>> while after t.ii.226t.veclower we have
> >>>>
> >>>> ;;   basic block 2, loop depth 0
> >>>> ;;    pred:       ENTRY
> >>>>     _13 = BIT_FIELD_REF <a_4(D), 64, 0>;
> >>>>     _14 = BIT_FIELD_REF <b_5(D), 64, 0>;
> >>>>     _15 = _13 / _14;
> >>>>     _16 = BIT_FIELD_REF <a_4(D), 64, 64>;
> >>>>     _17 = BIT_FIELD_REF <b_5(D), 64, 64>;
> >>>>     _18 = _16 / _17;
> >>>>     _6 = {_15, _18};
> >>>>     res_7 = _6;
> >>>>     _8 = res_7;
> >>>> ;;    succ:       3
> >>>>
> >>>> and all EH is gone and we'd ICE if you remove the above hunk.  Hopefully.
> >>>
> >>> Yes, it ICEs then:
> >>>
> >>>
> >>> ./xg++ -B. ~/Programming/testcases/ice.c -c -fnon-call-exceptions -O3
> >>> /home/marxin/Programming/testcases/ice.c: In function ‘v2di foo(v2di, v2di)’:
> >>> /home/marxin/Programming/testcases/ice.c:3:6: error: statement marked for throw, but doesn’t
> >>>       3 | v2di foo (v2di a, v2di b)
> >>>         |      ^~~
> >>> _6 = {_12, _15};
> >>> during GIMPLE pass: veclower2
> >>> /home/marxin/Programming/testcases/ice.c:3:6: internal compiler error: verify_gimple failed
> >>> 0x10e308a verify_gimple_in_cfg(function*, bool)
> >>>          /home/marxin/Programming/gcc/gcc/tree-cfg.c:5461
> >>> 0xfc9caf execute_function_todo
> >>>          /home/marxin/Programming/gcc/gcc/passes.c:1985
> >>> 0xfcaafc do_per_function
> >>>          /home/marxin/Programming/gcc/gcc/passes.c:1640
> >>> 0xfcaafc execute_todo
> >>>          /home/marxin/Programming/gcc/gcc/passes.c:2039
> >>> Please submit a full bug report,
> >>> with preprocessed source if appropriate.
> >>> Please include the complete backtrace with any bug report.
> >>> See <https://gcc.gnu.org/bugs/> for instructions.
> >>>
> >>>>
> >>>> We still generate wrong-code obviously as we'd need to duplicate the
> >>>> EH info on each component division (and split blocks and generate
> >>>> extra EH edges).  That's a pre-existing bug of course.  I just wanted
> >>>> to avoid to create a new instance just because of the early instruction
> >>>> selection for VEC_COND_EXPR.
> >>>
> >>> Fine!
> >>>
> >>>>
> >>>>>>
> >>>>>> What's missing from the patch is adjusting
> >>>>>> verify_gimple_assign_ternary from
> >>>>>>
> >>>>>>      if (((rhs_code == VEC_COND_EXPR || rhs_code == COND_EXPR)
> >>>>>>           ? !is_gimple_condexpr (rhs1) : !is_gimple_val (rhs1))
> >>>>>>          || !is_gimple_val (rhs2)
> >>>>>>          || !is_gimple_val (rhs3))
> >>>>>>        {
> >>>>>>          error ("invalid operands in ternary operation");
> >>>>>>          return true;
> >>>>>>
> >>>>>> to the same with the rhs_code == VEC_COND_EXPR case removed.
> >>>>>
> >>>>> Hmm. I'm not sure I've got this comment. Why do we want to change it
> >>>>> and is it done wright in the patch?
> >>>>
> >>>> Ah, I missed the hunk you added.
> >>>
> >>> That explains the confusion I got.
> >>>
> >>>>   But the check should be an inclusive
> >>>> one, not an exclusive one and earlier accepting a is_gimple_condexpr
> >>>> is superfluous when you later reject the tcc_comparison part.  Just
> >>>> testing is_gimple_val is better.  So yes, remove your tree-cfg.c hunk
> >>>> and just adjust the above test.
> >>>
> >>> I simplified that.
> >>>
> >>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> >>
> >> Please double-check the changelog
> >>
> >>          (do_store_flag):
> >>
> >> +       tree-vect-isel.o \
> >>
> >> IMHO we want to move more of the pattern matching magic of RTL
> >> expansion here to obsolete TER.  So please name it gimple-isel.cc
> >> (.cc!, not .c)
> >>
> >> +  gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
> >> +  if (stmt != NULL
> >> +      && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
> >> +    return ERROR_MARK;
> >>
> >> you want stmt == NULL || TREE_CODE_CLASS (...)
> >>
> >> in case the def stmt is a call.
> >>
> >> +         gimple_seq seq;
> >> +         tree exp = force_gimple_operand (comb, &seq, true, NULL_TREE);
> >> +         if (seq)
> >> +           {
> >> +             gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
> >> +             gsi_insert_before (&gsi, seq, GSI_SAME_STMT);
> >> +           }
> >>
> >> use force_gimple_operand_gsi that makes the above simpler.
> >>
> >>            if (invert)
> >> -           std::swap (*gimple_assign_rhs2_ptr (stmt0),
> >> -                      *gimple_assign_rhs3_ptr (stmt0));
> >> -         update_stmt (stmt0);
> >> +           std::swap (*gimple_assign_rhs2_ptr (vcond0),
> >> +                      *gimple_assign_rhs3_ptr (vcond0));
> >>
> >> use swap_ssa_operands.
> >>
> >> +         gimple_assign_set_rhs1 (vcond0, exp);
> >> +         update_stmt (vcond0);
> >>
> >> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
> >> index cf2d979fea1..710b17a7c5c 100644
> >> --- a/gcc/tree-vect-stmts.c
> >> +++ b/gcc/tree-vect-stmts.c
> >> @@ -9937,8 +9937,8 @@ vectorizable_condition (vec_info *vinfo,
> >>          {
> >>            vec_cond_rhs = vec_oprnds1[i];
> >>            if (bitop1 == NOP_EXPR)
> >> -           vec_compare = build2 (cond_code, vec_cmp_type,
> >> -                                 vec_cond_lhs, vec_cond_rhs);
> >> +           vec_compare = gimplify_build2 (gsi, cond_code, vec_cmp_type,
> >> +                                          vec_cond_lhs, vec_cond_rhs);
> >>            else
> >>              {
> >>
> >> please don't introduce more uses of gimplify_buildN - I'd like to
> >> get rid of those.  You can use
> >>
> >>       gimple_seq stmts = NULL;
> >>       vec_compare = gimple_build (&stmts, cond_code, ...);
> >>       gsi_insert_seq_before/after (...);
> >>
> >> OK with those changes.
> >
> > Applying the patch caused
> >
> > Running target unix//-m32
> > FAIL: gcc.c-torture/execute/ieee/pr50310.c execution,  -O3
> > -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
> > -finline-functions
> > FAIL: gcc.c-torture/execute/ieee/pr50310.c execution,  -O3 -g
>
> I can't reproduce that with current master. Can you?

Yes.

> make check-gcc RUNTESTFLAGS="--target_board=unix/-m32 ieee.exp=pr50310.c"
...
FAIL: gcc.c-torture/execute/ieee/pr50310.c execution,  -O3
-fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
-finline-functions
FAIL: gcc.c-torture/execute/ieee/pr50310.c execution,  -O3 -g

mind the -m32

> >
> > and
> >
> > FAIL: ext/random/simd_fast_mersenne_twister_engine/operators/inequal.cc
> > (test for excess errors)
> > UNRESOLVED: ext/random/simd_fast_mersenne_twister_engine/operators/inequal.cc
> > compilation failed to produce executable
>
> I've just fixed this one.
>
> Martin
>
> >
> > Richard.
> >
> >> Thanks,
> >> Richard.
> >>
> >>
> >>> Thanks,
> >>> Martin
> >>>
> >>>>
> >>>>>>
> >>>>>> You'll likely figure the vectorizer still creates some VEC_COND_EXPRs
> >>>>>> with embedded comparisons.
> >>>>>
> >>>>> I've fixed 2 failing test-cases I mentioned in the previous email.
> >>>>>
> >>>>> Martin
> >>>>>
> >>>>>>
> >>>>>> Thanks,
> >>>>>> Richard.
> >>>>>>
> >>>>>>
> >>>>>>> Martin
> >>>>>
> >>>
>

^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-18  8:52                                                                         ` Richard Biener
@ 2020-06-18  9:02                                                                           ` Martin Liška
  2020-06-18  9:29                                                                             ` Martin Liška
  0 siblings, 1 reply; 65+ messages in thread
From: Martin Liška @ 2020-06-18  9:02 UTC (permalink / raw)
  To: Richard Biener
  Cc: Segher Boessenkool, GCC Patches, Richard Sandiford, David Edelsohn

On 6/18/20 10:52 AM, Richard Biener wrote:
> On Thu, Jun 18, 2020 at 10:10 AM Martin Liška <mliska@suse.cz> wrote:
>>
>> On 6/17/20 3:15 PM, Richard Biener wrote:
>>> On Wed, Jun 17, 2020 at 10:50 AM Richard Biener
>>> <richard.guenther@gmail.com> wrote:
>>>>
>>>> On Mon, Jun 15, 2020 at 2:20 PM Martin Liška <mliska@suse.cz> wrote:
>>>>>
>>>>> On 6/15/20 1:59 PM, Richard Biener wrote:
>>>>>> On Mon, Jun 15, 2020 at 1:19 PM Martin Liška <mliska@suse.cz> wrote:
>>>>>>>
>>>>>>> On 6/15/20 9:14 AM, Richard Biener wrote:
>>>>>>>> On Fri, Jun 12, 2020 at 3:24 PM Martin Liška <mliska@suse.cz> wrote:
>>>>>>>>>
>>>>>>>>> On 6/12/20 11:43 AM, Richard Biener wrote:
>>>>>>>>>> So ... how far are you with enforcing a split VEC_COND_EXPR?
>>>>>>>>>> Thus can we avoid the above completely (even as intermediate
>>>>>>>>>> state)?
>>>>>>>>>
>>>>>>>>> Apparently, I'm quite close. Using the attached patch I see only 2 testsuite
>>>>>>>>> failures:
>>>>>>>>>
>>>>>>>>> FAIL: gcc.dg/tree-ssa/pr68714.c scan-tree-dump-times reassoc1 " <= " 1
>>>>>>>>> FAIL: gcc.target/i386/pr78102.c scan-assembler-times pcmpeqq 3
>>>>>>>>>
>>>>>>>>> The first one is about teaching reassoc about the SSA_NAMEs in VEC_COND_EXPR. I haven't
>>>>>>>>> analyze the second failure.
>>>>>>>>>
>>>>>>>>> I'm also not sure about the gimlification change, I see a superfluous assignments:
>>>>>>>>>        vec_cond_cmp.5 = _1 == _2;
>>>>>>>>>        vec_cond_cmp.6 = vec_cond_cmp.5;
>>>>>>>>>        vec_cond_cmp.7 = vec_cond_cmp.6;
>>>>>>>>>        _3 = VEC_COND_EXPR <vec_cond_cmp.7, { -1, -1, -1, -1, -1, -1, -1, -1 }, { 0, 0, 0, 0, 0, 0, 0, 0 }>;
>>>>>>>>> ?
>>>>>>>>>
>>>>>>>>> So with the suggested patch, the EH should be gone as you suggested. Right?
>>>>>>>>
>>>>>>>> Right, it should be on the comparison already from the start.
>>>>>>>>
>>>>>>>> @@ -14221,9 +14221,13 @@ gimplify_expr (tree *expr_p, gimple_seq
>>>>>>>> *pre_p, gimple_seq *post_p,
>>>>>>>>             case VEC_COND_EXPR:
>>>>>>>>               {
>>>>>>>>                 enum gimplify_status r0, r1, r2;
>>>>>>>> -
>>>>>>>>                 r0 = gimplify_expr (&TREE_OPERAND (*expr_p, 0), pre_p,
>>>>>>>>                                     post_p, is_gimple_condexpr, fb_rvalue);
>>>>>>>> +           tree xop0 = TREE_OPERAND (*expr_p, 0);
>>>>>>>> +           tmp = create_tmp_var_raw (TREE_TYPE (xop0), "vec_cond_cmp");
>>>>>>>> +           gimple_add_tmp_var (tmp);
>>>>>>>> +           gimplify_assign (tmp, xop0, pre_p);
>>>>>>>> +           TREE_OPERAND (*expr_p, 0) = tmp;
>>>>>>>>                 r1 = gimplify_expr (&TREE_OPERAND (*expr_p, 1), pre_p,
>>>>>>>>                                     post_p, is_gimple_val, fb_rvalue);
>>>>>>>>
>>>>>>>> all of VEC_COND_EXPR can now be a simple goto expr_3;
>>>>>>>
>>>>>>> Works for me, thanks!
>>>>>>>
>>>>>>>>
>>>>>>>> diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
>>>>>>>> index 494c9e9c20b..090fb52a2f1 100644
>>>>>>>> --- a/gcc/tree-ssa-forwprop.c
>>>>>>>> +++ b/gcc/tree-ssa-forwprop.c
>>>>>>>> @@ -3136,6 +3136,10 @@ pass_forwprop::execute (function *fun)
>>>>>>>>                         if (code == COND_EXPR
>>>>>>>>                             || code == VEC_COND_EXPR)
>>>>>>>>                           {
>>>>>>>> +                       /* Do not propagate into VEC_COND_EXPRs.  */
>>>>>>>> +                       if (code == VEC_COND_EXPR)
>>>>>>>> +                         break;
>>>>>>>> +
>>>>>>>>
>>>>>>>> err - remove the || code == VEC_COND_EXPR instead?
>>>>>>>
>>>>>>> Yep.
>>>>>>>
>>>>>>>>
>>>>>>>> @@ -2221,24 +2226,12 @@ expand_vector_operations (void)
>>>>>>>>      {
>>>>>>>>        gimple_stmt_iterator gsi;
>>>>>>>>        basic_block bb;
>>>>>>>> -  bool cfg_changed = false;
>>>>>>>>
>>>>>>>>        FOR_EACH_BB_FN (bb, cfun)
>>>>>>>> -    {
>>>>>>>> -      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
>>>>>>>> -       {
>>>>>>>> -         expand_vector_operations_1 (&gsi);
>>>>>>>> -         /* ???  If we do not cleanup EH then we will ICE in
>>>>>>>> -            verification.  But in reality we have created wrong-code
>>>>>>>> -            as we did not properly transition EH info and edges to
>>>>>>>> -            the piecewise computations.  */
>>>>>>>> -         if (maybe_clean_eh_stmt (gsi_stmt (gsi))
>>>>>>>> -             && gimple_purge_dead_eh_edges (bb))
>>>>>>>> -           cfg_changed = true;
>>>>>>>> -       }
>>>>>>>> -    }
>>>>>>>>
>>>>>>>> I'm not sure about this.  Consider the C++ testcase where
>>>>>>>> the ?: is replaced by a division.  If veclower needs to replace
>>>>>>>> that with four scalrar division statements then the above
>>>>>>>> still applies - veclower does not correctly duplicate EH info
>>>>>>>> and EH edges to the individual divisions (and we do not know
>>>>>>>> which component might trap).
>>>>>>>>
>>>>>>>> So please leave the above in.  You can try if using integer
>>>>>>>> division makes it break and add such a testcase if there's
>>>>>>>> no coverage for this in the testsuite.
>>>>>>>
>>>>>>> I'm leaving that above. Can you please explain how can a division test-case
>>>>>>> be created?
>>>>>>
>>>>>> typedef long v2di __attribute__((vector_size(16)));
>>>>>>
>>>>>> v2di foo (v2di a, v2di b)
>>>>>> {
>>>>>>      try
>>>>>>      {
>>>>>>        v2di res = a / b;
>>>>>>        return res;
>>>>>>        }
>>>>>>        catch (...)
>>>>>>        {
>>>>>>        return (v2di){};
>>>>>>        }
>>>>>> }
>>>>>>
>>>>>> with -fnon-call-exceptions I see in t.ii.090t.ehdisp (correctly):
>>>>>>
>>>>>> ;;   basic block 2, loop depth 0
>>>>>> ;;    pred:       ENTRY
>>>>>>      [LP 1] _6 = a_4(D) / b_5(D);
>>>>>> ;;    succ:       5
>>>>>> ;;                3
>>>>>>
>>>>>> while after t.ii.226t.veclower we have
>>>>>>
>>>>>> ;;   basic block 2, loop depth 0
>>>>>> ;;    pred:       ENTRY
>>>>>>      _13 = BIT_FIELD_REF <a_4(D), 64, 0>;
>>>>>>      _14 = BIT_FIELD_REF <b_5(D), 64, 0>;
>>>>>>      _15 = _13 / _14;
>>>>>>      _16 = BIT_FIELD_REF <a_4(D), 64, 64>;
>>>>>>      _17 = BIT_FIELD_REF <b_5(D), 64, 64>;
>>>>>>      _18 = _16 / _17;
>>>>>>      _6 = {_15, _18};
>>>>>>      res_7 = _6;
>>>>>>      _8 = res_7;
>>>>>> ;;    succ:       3
>>>>>>
>>>>>> and all EH is gone and we'd ICE if you remove the above hunk.  Hopefully.
>>>>>
>>>>> Yes, it ICEs then:
>>>>>
>>>>>
>>>>> ./xg++ -B. ~/Programming/testcases/ice.c -c -fnon-call-exceptions -O3
>>>>> /home/marxin/Programming/testcases/ice.c: In function ‘v2di foo(v2di, v2di)’:
>>>>> /home/marxin/Programming/testcases/ice.c:3:6: error: statement marked for throw, but doesn’t
>>>>>        3 | v2di foo (v2di a, v2di b)
>>>>>          |      ^~~
>>>>> _6 = {_12, _15};
>>>>> during GIMPLE pass: veclower2
>>>>> /home/marxin/Programming/testcases/ice.c:3:6: internal compiler error: verify_gimple failed
>>>>> 0x10e308a verify_gimple_in_cfg(function*, bool)
>>>>>           /home/marxin/Programming/gcc/gcc/tree-cfg.c:5461
>>>>> 0xfc9caf execute_function_todo
>>>>>           /home/marxin/Programming/gcc/gcc/passes.c:1985
>>>>> 0xfcaafc do_per_function
>>>>>           /home/marxin/Programming/gcc/gcc/passes.c:1640
>>>>> 0xfcaafc execute_todo
>>>>>           /home/marxin/Programming/gcc/gcc/passes.c:2039
>>>>> Please submit a full bug report,
>>>>> with preprocessed source if appropriate.
>>>>> Please include the complete backtrace with any bug report.
>>>>> See <https://gcc.gnu.org/bugs/> for instructions.
>>>>>
>>>>>>
>>>>>> We still generate wrong-code obviously as we'd need to duplicate the
>>>>>> EH info on each component division (and split blocks and generate
>>>>>> extra EH edges).  That's a pre-existing bug of course.  I just wanted
>>>>>> to avoid to create a new instance just because of the early instruction
>>>>>> selection for VEC_COND_EXPR.
>>>>>
>>>>> Fine!
>>>>>
>>>>>>
>>>>>>>>
>>>>>>>> What's missing from the patch is adjusting
>>>>>>>> verify_gimple_assign_ternary from
>>>>>>>>
>>>>>>>>       if (((rhs_code == VEC_COND_EXPR || rhs_code == COND_EXPR)
>>>>>>>>            ? !is_gimple_condexpr (rhs1) : !is_gimple_val (rhs1))
>>>>>>>>           || !is_gimple_val (rhs2)
>>>>>>>>           || !is_gimple_val (rhs3))
>>>>>>>>         {
>>>>>>>>           error ("invalid operands in ternary operation");
>>>>>>>>           return true;
>>>>>>>>
>>>>>>>> to the same with the rhs_code == VEC_COND_EXPR case removed.
>>>>>>>
>>>>>>> Hmm. I'm not sure I've got this comment. Why do we want to change it
>>>>>>> and is it done wright in the patch?
>>>>>>
>>>>>> Ah, I missed the hunk you added.
>>>>>
>>>>> That explains the confusion I got.
>>>>>
>>>>>>    But the check should be an inclusive
>>>>>> one, not an exclusive one and earlier accepting a is_gimple_condexpr
>>>>>> is superfluous when you later reject the tcc_comparison part.  Just
>>>>>> testing is_gimple_val is better.  So yes, remove your tree-cfg.c hunk
>>>>>> and just adjust the above test.
>>>>>
>>>>> I simplified that.
>>>>>
>>>>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>>>>
>>>> Please double-check the changelog
>>>>
>>>>           (do_store_flag):
>>>>
>>>> +       tree-vect-isel.o \
>>>>
>>>> IMHO we want to move more of the pattern matching magic of RTL
>>>> expansion here to obsolete TER.  So please name it gimple-isel.cc
>>>> (.cc!, not .c)
>>>>
>>>> +  gassign *assign = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (cond));
>>>> +  if (stmt != NULL
>>>> +      && TREE_CODE_CLASS (gimple_assign_rhs_code (assign)) != tcc_comparison)
>>>> +    return ERROR_MARK;
>>>>
>>>> you want stmt == NULL || TREE_CODE_CLASS (...)
>>>>
>>>> in case the def stmt is a call.
>>>>
>>>> +         gimple_seq seq;
>>>> +         tree exp = force_gimple_operand (comb, &seq, true, NULL_TREE);
>>>> +         if (seq)
>>>> +           {
>>>> +             gimple_stmt_iterator gsi = gsi_for_stmt (vcond0);
>>>> +             gsi_insert_before (&gsi, seq, GSI_SAME_STMT);
>>>> +           }
>>>>
>>>> use force_gimple_operand_gsi that makes the above simpler.
>>>>
>>>>             if (invert)
>>>> -           std::swap (*gimple_assign_rhs2_ptr (stmt0),
>>>> -                      *gimple_assign_rhs3_ptr (stmt0));
>>>> -         update_stmt (stmt0);
>>>> +           std::swap (*gimple_assign_rhs2_ptr (vcond0),
>>>> +                      *gimple_assign_rhs3_ptr (vcond0));
>>>>
>>>> use swap_ssa_operands.
>>>>
>>>> +         gimple_assign_set_rhs1 (vcond0, exp);
>>>> +         update_stmt (vcond0);
>>>>
>>>> diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
>>>> index cf2d979fea1..710b17a7c5c 100644
>>>> --- a/gcc/tree-vect-stmts.c
>>>> +++ b/gcc/tree-vect-stmts.c
>>>> @@ -9937,8 +9937,8 @@ vectorizable_condition (vec_info *vinfo,
>>>>           {
>>>>             vec_cond_rhs = vec_oprnds1[i];
>>>>             if (bitop1 == NOP_EXPR)
>>>> -           vec_compare = build2 (cond_code, vec_cmp_type,
>>>> -                                 vec_cond_lhs, vec_cond_rhs);
>>>> +           vec_compare = gimplify_build2 (gsi, cond_code, vec_cmp_type,
>>>> +                                          vec_cond_lhs, vec_cond_rhs);
>>>>             else
>>>>               {
>>>>
>>>> please don't introduce more uses of gimplify_buildN - I'd like to
>>>> get rid of those.  You can use
>>>>
>>>>        gimple_seq stmts = NULL;
>>>>        vec_compare = gimple_build (&stmts, cond_code, ...);
>>>>        gsi_insert_seq_before/after (...);
>>>>
>>>> OK with those changes.
>>>
>>> Applying the patch caused
>>>
>>> Running target unix//-m32
>>> FAIL: gcc.c-torture/execute/ieee/pr50310.c execution,  -O3
>>> -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
>>> -finline-functions
>>> FAIL: gcc.c-torture/execute/ieee/pr50310.c execution,  -O3 -g
>>
>> I can't reproduce that with current master. Can you?
> 
> Yes.
> 
>> make check-gcc RUNTESTFLAGS="--target_board=unix/-m32 ieee.exp=pr50310.c"
> ...
> FAIL: gcc.c-torture/execute/ieee/pr50310.c execution,  -O3
> -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer
> -finline-functions
> FAIL: gcc.c-torture/execute/ieee/pr50310.c execution,  -O3 -g

Now I've got it.

> 
> mind the -m32

I did, but -ffloat-store was not mentioned in the previous list of options ;)

Martin

> 
>>>
>>> and
>>>
>>> FAIL: ext/random/simd_fast_mersenne_twister_engine/operators/inequal.cc
>>> (test for excess errors)
>>> UNRESOLVED: ext/random/simd_fast_mersenne_twister_engine/operators/inequal.cc
>>> compilation failed to produce executable
>>
>> I've just fixed this one.
>>
>> Martin
>>
>>>
>>> Richard.
>>>
>>>> Thanks,
>>>> Richard.
>>>>
>>>>
>>>>> Thanks,
>>>>> Martin
>>>>>
>>>>>>
>>>>>>>>
>>>>>>>> You'll likely figure the vectorizer still creates some VEC_COND_EXPRs
>>>>>>>> with embedded comparisons.
>>>>>>>
>>>>>>> I've fixed 2 failing test-cases I mentioned in the previous email.
>>>>>>>
>>>>>>> Martin
>>>>>>>
>>>>>>>>
>>>>>>>> Thanks,
>>>>>>>> Richard.
>>>>>>>>
>>>>>>>>
>>>>>>>>> Martin
>>>>>>>
>>>>>
>>


^ permalink raw reply	[flat|nested] 65+ messages in thread

* Re: [stage1][PATCH] Lower VEC_COND_EXPR into internal functions.
  2020-06-18  9:02                                                                           ` Martin Liška
@ 2020-06-18  9:29                                                                             ` Martin Liška
  0 siblings, 0 replies; 65+ messages in thread
From: Martin Liška @ 2020-06-18  9:29 UTC (permalink / raw)
  To: Richard Biener; +Cc: GCC Patches, David Edelsohn, Segher Boessenkool

On 6/18/20 11:02 AM, Martin Liška wrote:
> Now I've got it.

I've just reduced that to:

$ cat pr50310.c
double s1[4], s2[4], s3[64];

int
main ()
{
   s1[0] = 5.0;
   s1[1] = 6.0;
   s1[2] = 5.0;
   s1[3] = __builtin_nan ("");
   s2[0] = 6.0;
   s2[1] = 5.0;
   s2[2] = 5.0;
   s2[3] = 5.0;

   asm volatile ("" : : : "memory");
   for (int i = 1; i < 4; i++)
     s3[i] = __builtin_isgreater (s1[i], s2[i]) ? -1.0 : 0.0;
   asm volatile ("" : : : "memory");

   __builtin_printf ("val: %f\n", s3[1]);
   if (s3[1] != -1.0)
     __builtin_abort ();
   return 0;
}

Optimized dump differs in expected manner:

    <bb 2> [local count: 805306369]:
    MEM <vector(2) double> [(double *)&s1] = { 5.0e+0, 6.0e+0 };
    MEM <vector(2) double> [(double *)&s1 + 16B] = { 5.0e+0,  Nan };
    MEM <vector(2) double> [(double *)&s2] = { 6.0e+0, 5.0e+0 };
    MEM <vector(2) double> [(double *)&s2 + 16B] = { 5.0e+0, 5.0e+0 };
    __asm__ __volatile__("" :  :  : "memory");
    vect__1.13_51 = MEM <vector(2) double> [(double *)&s1 + 8B];
    vect__2.16_55 = MEM <vector(2) double> [(double *)&s2 + 8B];
-  vect_iftmp.17_58 = VEC_COND_EXPR <vect__1.13_51 u<= vect__2.16_55, { 0.0, 0.0 }, { -1.0e+0, -1.0e+0 }>;
-  MEM <vector(2) double> [(double *)&s3 + 8B] = vect_iftmp.17_58;
+  _58 = vect__1.13_51 u<= vect__2.16_55;
+  vect_iftmp.17_59 = .VCOND (vect__1.13_51, vect__2.16_55, { 0.0, 0.0 }, { -1.0e+0, -1.0e+0 }, 117);
+  MEM <vector(2) double> [(double *)&s3 + 8B] = vect_iftmp.17_59;
    _41 = s1[3];
    _42 = s2[3];
    if (_41 u<= _42)
      goto <bb 3>; [50.00%]
    else
      goto <bb 4>; [50.00%]
  
    <bb 3> [local count: 402653185]:
  
    <bb 4> [local count: 805306369]:
    # iftmp.0_43 = PHI <-1.0e+0(2), 0.0(3)>
    s3[3] = iftmp.0_43;
    __asm__ __volatile__("" :  :  : "memory");

but we fail with:

$ gcc pr50310.c -m32 -O3 -ffloat-store && ./a.out
val: -nan
Aborted (core dumped)

I'm digging deeper.
Martin

^ permalink raw reply	[flat|nested] 65+ messages in thread

end of thread, other threads:[~2020-06-18  9:29 UTC | newest]

Thread overview: 65+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2019-09-24 10:25 [PATCH][RFC] Come up with VEC_COND_OP_EXPRs Martin Liška
2019-09-24 11:11 ` Richard Sandiford
2019-09-24 11:29   ` Richard Biener
2019-09-24 11:57     ` Richard Sandiford
2019-09-24 12:18       ` Richard Biener
2019-09-24 14:51         ` Richard Sandiford
2020-04-01 10:19 ` [stage1][PATCH] Lower VEC_COND_EXPR into internal functions Martin Liška
2020-04-06  9:17   ` Richard Sandiford
2020-04-06 12:30     ` Richard Biener
2020-05-21 12:51       ` Martin Liška
2020-05-21 13:29         ` Martin Liška
2020-05-21 20:16           ` Segher Boessenkool
2020-05-22 11:14             ` Richard Biener
2020-05-26 10:15               ` Richard Sandiford
2020-05-27 14:04                 ` Martin Liška
2020-05-27 16:13                   ` Richard Sandiford
2020-05-27 16:32                     ` Richard Biener
2020-05-28 14:46                       ` Martin Liška
2020-05-28 15:28                         ` Richard Sandiford
2020-05-29 12:17                           ` Richard Biener
2020-05-29 12:43                             ` Richard Biener
2020-05-29 16:47                               ` Segher Boessenkool
2020-05-29 17:05                                 ` Richard Sandiford
2020-05-29 17:30                                   ` Segher Boessenkool
2020-05-29 15:39                             ` Segher Boessenkool
2020-05-29 16:57                               ` Richard Sandiford
2020-05-29 17:09                                 ` Segher Boessenkool
2020-05-29 17:26                                   ` Richard Sandiford
2020-05-29 17:37                                     ` Segher Boessenkool
2020-05-30  7:15                                       ` Richard Sandiford
2020-05-30 13:08                                         ` Segher Boessenkool
2020-06-02 11:09                                           ` Richard Biener
2020-06-02 15:00                                             ` Martin Liška
2020-06-03  7:38                                               ` Richard Biener
2020-06-03 13:41                                                 ` Richard Sandiford
2020-06-03 14:17                                                   ` David Edelsohn
2020-06-03 14:46                                                     ` Richard Biener
2020-06-03 17:01                                                       ` Segher Boessenkool
2020-06-03 17:23                                                         ` Richard Biener
2020-06-03 18:23                                                           ` Segher Boessenkool
2020-06-03 18:38                                                             ` Richard Biener
2020-06-03 18:46                                                               ` David Edelsohn
2020-06-03 19:09                                                               ` Segher Boessenkool
2020-06-03 19:13                                                                 ` Jakub Jelinek
2020-06-03 18:27                                               ` Segher Boessenkool
2020-06-08 11:04                                                 ` Martin Liška
2020-06-09 13:42                                                   ` Richard Biener
2020-06-10  8:51                                                     ` Martin Liška
2020-06-10 10:50                                                       ` Richard Biener
2020-06-10 12:27                                                         ` Martin Liška
2020-06-10 13:01                                                           ` Martin Liška
2020-06-11  8:52                                                     ` Martin Liška
2020-06-12  9:43                                                       ` Richard Biener
2020-06-12 13:24                                                         ` Martin Liška
2020-06-15  7:14                                                           ` Richard Biener
2020-06-15 11:19                                                             ` Martin Liška
2020-06-15 11:59                                                               ` Richard Biener
2020-06-15 12:20                                                                 ` Martin Liška
2020-06-17  8:50                                                                   ` Richard Biener
2020-06-17 13:15                                                                     ` Richard Biener
2020-06-18  8:10                                                                       ` Martin Liška
2020-06-18  8:52                                                                         ` Richard Biener
2020-06-18  9:02                                                                           ` Martin Liška
2020-06-18  9:29                                                                             ` Martin Liška
2020-04-06 12:33     ` Richard Biener

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).