[PATCH 1/7] Add VEC_WIDEN_MULT_EVEN/ODD

public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed

* [PATCH 1/7] Add VEC_WIDEN_MULT_EVEN/ODD_EXPR
  2012-07-10  8:23 [PATCH 0/7] Clean up widen mult even/odd Richard Henderson
  2012-07-10  8:23 ` [PATCH 4/7] spu: Rename patterns for vec_widen_<s>mult_even/odd_<mode> Richard Henderson
@ 2012-07-10  8:23 ` Richard Henderson
  2012-07-10  8:23 ` [PATCH 2/7] i386: Rename patterns for vec_widen_<s>mult_even/odd_<mode> Richard Henderson
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2012-07-10  8:23 UTC (permalink / raw)
  To: gcc-patches

        * tree.def (VEC_WIDEN_MULT_EVEN_EXPR, VEC_WIDEN_MULT_ODD_EXPR): New.
        * cfgexpand.c (expand_debug_expr): Handle them.
        * expr.c (expand_expr_real_2): Likewise.
        * fold-const.c (fold_binary_loc): Likewise.
        * gimple-pretty-print.c (dump_binary_rhs): Likewise.
        * optabs.c (optab_for_tree_code): Likewise.
        * tree-cfg.c (verify_gimple_assign_binary): Likewise.
        * tree-inline.c (estimate_operator_cost): Likewise.
        * tree-pretty-print.c (dump_generic_node): Likewise.
        * tree.c (commutative_tree_code): Likewise.
        * tree-vect-generic.c (expand_vector_operations_1): Likewise.
        Handle type change before looking up optab.
        * optabs.h (OTI_vec_widen_umult_even, OTI_vec_widen_umult_odd): New.
        (OTI_vec_widen_smult_even, OTI_vec_widen_smult_odd): New.
        (vec_widen_umult_even_optab, vec_widen_umult_odd_optab): New.
        (vec_widen_smult_even_optab, vec_widen_smult_odd_optab): New.
        * genopinit.c (optabs): Initialize them.
        * doc/md.texi: Document them.
---
 gcc/ChangeLog             |   21 +++++++++++++++++++++
 gcc/cfgexpand.c           |    4 +++-
 gcc/doc/md.texi           |   12 +++++++++---
 gcc/expr.c                |   28 +++++++---------------------
 gcc/fold-const.c          |   36 ++++++++++++++++++++++++------------
 gcc/genopinit.c           |    4 ++++
 gcc/gimple-pretty-print.c |    2 ++
 gcc/optabs.c              |    8 ++++++++
 gcc/optabs.h              |   12 ++++++++++--
 gcc/tree-cfg.c            |    2 ++
 gcc/tree-inline.c         |    2 ++
 gcc/tree-pretty-print.c   |   32 +++++++++-----------------------
 gcc/tree-vect-generic.c   |   32 +++++++++++++++++---------------
 gcc/tree.c                |    2 ++
 gcc/tree.def              |    4 ++++
 15 files changed, 124 insertions(+), 77 deletions(-)

diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index ad2f667..c8d09c7 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -1,5 +1,5 @@
 /* A pass for lowering trees to RTL.
-   Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+   Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
    Free Software Foundation, Inc.
 
 This file is part of GCC.
@@ -3410,6 +3410,8 @@ expand_debug_expr (tree exp)
     case VEC_UNPACK_LO_EXPR:
     case VEC_WIDEN_MULT_HI_EXPR:
     case VEC_WIDEN_MULT_LO_EXPR:
+    case VEC_WIDEN_MULT_EVEN_EXPR:
+    case VEC_WIDEN_MULT_ODD_EXPR:
     case VEC_WIDEN_LSHIFT_HI_EXPR:
     case VEC_WIDEN_LSHIFT_LO_EXPR:
     case VEC_PERM_EXPR:
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index c71c59c..99f6528 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -4561,15 +4561,21 @@ floating point conversion and place the resulting N/2 values of size 2*S in
 the output vector (operand 0).
 
 @cindex @code{vec_widen_umult_hi_@var{m}} instruction pattern
-@cindex @code{vec_widen_umult_lo__@var{m}} instruction pattern
+@cindex @code{vec_widen_umult_lo_@var{m}} instruction pattern
 @cindex @code{vec_widen_smult_hi_@var{m}} instruction pattern
 @cindex @code{vec_widen_smult_lo_@var{m}} instruction pattern
+@cindex @code{vec_widen_umult_even_@var{m}} instruction pattern
+@cindex @code{vec_widen_umult_odd_@var{m}} instruction pattern
+@cindex @code{vec_widen_smult_even_@var{m}} instruction pattern
+@cindex @code{vec_widen_smult_odd_@var{m}} instruction pattern
 @item @samp{vec_widen_umult_hi_@var{m}}, @samp{vec_widen_umult_lo_@var{m}}
 @itemx @samp{vec_widen_smult_hi_@var{m}}, @samp{vec_widen_smult_lo_@var{m}}
+@itemx @samp{vec_widen_umult_even_@var{m}}, @samp{vec_widen_umult_odd_@var{m}}
+@itemx @samp{vec_widen_smult_even_@var{m}}, @samp{vec_widen_smult_odd_@var{m}}
 Signed/Unsigned widening multiplication.  The two inputs (operands 1 and 2)
 are vectors with N signed/unsigned elements of size S@.  Multiply the high/low
-elements of the two vectors, and put the N/2 products of size 2*S in the
-output vector (operand 0).
+or even/odd elements of the two vectors, and put the N/2 products of size 2*S
+in the output vector (operand 0).
 
 @cindex @code{vec_widen_ushiftl_hi_@var{m}} instruction pattern
 @cindex @code{vec_widen_ushiftl_lo_@var{m}} instruction pattern
diff --git a/gcc/expr.c b/gcc/expr.c
index 1279186..c56b0e5 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -8917,29 +8917,15 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
 
     case VEC_WIDEN_MULT_HI_EXPR:
     case VEC_WIDEN_MULT_LO_EXPR:
-      {
-	tree oprnd0 = treeop0;
-	tree oprnd1 = treeop1;
-
-	expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
-	target = expand_widen_pattern_expr (ops, op0, op1, NULL_RTX,
-					    target, unsignedp);
-	gcc_assert (target);
-	return target;
-      }
-
+    case VEC_WIDEN_MULT_EVEN_EXPR:
+    case VEC_WIDEN_MULT_ODD_EXPR:
     case VEC_WIDEN_LSHIFT_HI_EXPR:
     case VEC_WIDEN_LSHIFT_LO_EXPR:
-      {
-        tree oprnd0 = treeop0;
-        tree oprnd1 = treeop1;
-
-        expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
-        target = expand_widen_pattern_expr (ops, op0, op1, NULL_RTX,
-                                            target, unsignedp);
-        gcc_assert (target);
-        return target;
-      }
+      expand_operands (treeop0, treeop1, NULL_RTX, &op0, &op1, EXPAND_NORMAL);
+      target = expand_widen_pattern_expr (ops, op0, op1, NULL_RTX,
+					  target, unsignedp);
+      gcc_assert (target);
+      return target;
 
     case VEC_PACK_TRUNC_EXPR:
     case VEC_PACK_SAT_EXPR:
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 702f4e0..a491499 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -13657,8 +13657,11 @@ fold_binary_loc (location_t loc,
 
     case VEC_WIDEN_MULT_LO_EXPR:
     case VEC_WIDEN_MULT_HI_EXPR:
+    case VEC_WIDEN_MULT_EVEN_EXPR:
+    case VEC_WIDEN_MULT_ODD_EXPR:
       {
-	unsigned int nelts = TYPE_VECTOR_SUBPARTS (type), i;
+	unsigned int nelts = TYPE_VECTOR_SUBPARTS (type);
+	unsigned int out, ofs, scale;
 	tree *elts;
 
 	gcc_assert (TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0)) == nelts * 2
@@ -13671,19 +13674,28 @@ fold_binary_loc (location_t loc,
 	    || !vec_cst_ctor_to_array (arg1, elts + nelts * 2))
 	  return NULL_TREE;
 
-	if ((!BYTES_BIG_ENDIAN) ^ (code == VEC_WIDEN_MULT_LO_EXPR))
-	  elts += nelts;
-
-	for (i = 0; i < nelts; i++)
+	if (code == VEC_WIDEN_MULT_LO_EXPR)
+	  scale = 0, ofs = BYTES_BIG_ENDIAN ? nelts : 0;
+	else if (code == VEC_WIDEN_MULT_HI_EXPR)
+	  scale = 0, ofs = BYTES_BIG_ENDIAN ? 0 : nelts;
+	else if (code == VEC_WIDEN_MULT_EVEN_EXPR)
+	  scale = 1, ofs = 0;
+	else /* if (code == VEC_WIDEN_MULT_ODD_EXPR) */
+	  scale = 1, ofs = 1;
+	
+	for (out = 0; out < nelts; out++)
 	  {
-	    elts[i] = fold_convert_const (NOP_EXPR, TREE_TYPE (type), elts[i]);
-	    elts[i + nelts * 2]
-	      = fold_convert_const (NOP_EXPR, TREE_TYPE (type),
-				    elts[i + nelts * 2]);
-	    if (elts[i] == NULL_TREE || elts[i + nelts * 2] == NULL_TREE)
+	    unsigned int in1 = (out << scale) + ofs;
+	    unsigned int in2 = in1 + nelts * 2;
+	    tree t1, t2;
+
+	    t1 = fold_convert_const (NOP_EXPR, TREE_TYPE (type), elts[in1]);
+	    t2 = fold_convert_const (NOP_EXPR, TREE_TYPE (type), elts[in2]);
+
+	    if (t1 == NULL_TREE || t2 == NULL_TREE)
 	      return NULL_TREE;
-	    elts[i] = const_binop (MULT_EXPR, elts[i], elts[i + nelts * 2]);
-	    if (elts[i] == NULL_TREE || !CONSTANT_CLASS_P (elts[i]))
+	    elts[out] = const_binop (MULT_EXPR, t1, t2);
+	    if (elts[out] == NULL_TREE || !CONSTANT_CLASS_P (elts[out]))
 	      return NULL_TREE;
 	  }
 
diff --git a/gcc/genopinit.c b/gcc/genopinit.c
index baccd45..2d6757e 100644
--- a/gcc/genopinit.c
+++ b/gcc/genopinit.c
@@ -289,6 +289,10 @@ static const char * const optabs[] =
   "set_optab_handler (vec_widen_umult_lo_optab, $A, CODE_FOR_$(vec_widen_umult_lo_$a$))",
   "set_optab_handler (vec_widen_smult_hi_optab, $A, CODE_FOR_$(vec_widen_smult_hi_$a$))",
   "set_optab_handler (vec_widen_smult_lo_optab, $A, CODE_FOR_$(vec_widen_smult_lo_$a$))",
+  "set_optab_handler (vec_widen_umult_even_optab, $A, CODE_FOR_$(vec_widen_umult_even_$a$))",
+  "set_optab_handler (vec_widen_umult_odd_optab, $A, CODE_FOR_$(vec_widen_umult_odd_$a$))",
+  "set_optab_handler (vec_widen_smult_even_optab, $A, CODE_FOR_$(vec_widen_smult_even_$a$))",
+  "set_optab_handler (vec_widen_smult_odd_optab, $A, CODE_FOR_$(vec_widen_smult_odd_$a$))",
   "set_optab_handler (vec_widen_ushiftl_hi_optab, $A, CODE_FOR_$(vec_widen_ushiftl_hi_$a$))",
   "set_optab_handler (vec_widen_ushiftl_lo_optab, $A, CODE_FOR_$(vec_widen_ushiftl_lo_$a$))",
   "set_optab_handler (vec_widen_sshiftl_hi_optab, $A, CODE_FOR_$(vec_widen_sshiftl_hi_$a$))",
diff --git a/gcc/gimple-pretty-print.c b/gcc/gimple-pretty-print.c
index a80ae90..648597a 100644
--- a/gcc/gimple-pretty-print.c
+++ b/gcc/gimple-pretty-print.c
@@ -342,6 +342,8 @@ dump_binary_rhs (pretty_printer *buffer, gimple gs, int spc, int flags)
     case MAX_EXPR:
     case VEC_WIDEN_MULT_HI_EXPR:
     case VEC_WIDEN_MULT_LO_EXPR:
+    case VEC_WIDEN_MULT_EVEN_EXPR:
+    case VEC_WIDEN_MULT_ODD_EXPR:
     case VEC_PACK_TRUNC_EXPR:
     case VEC_PACK_SAT_EXPR:
     case VEC_PACK_FIX_TRUNC_EXPR:
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 3094476..fbea879 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -483,6 +483,14 @@ optab_for_tree_code (enum tree_code code, const_tree type,
       return TYPE_UNSIGNED (type) ?
 	vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
 
+    case VEC_WIDEN_MULT_EVEN_EXPR:
+      return TYPE_UNSIGNED (type) ?
+	vec_widen_umult_even_optab : vec_widen_smult_even_optab;
+
+    case VEC_WIDEN_MULT_ODD_EXPR:
+      return TYPE_UNSIGNED (type) ?
+	vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
+
     case VEC_WIDEN_LSHIFT_HI_EXPR:
       return TYPE_UNSIGNED (type) ?
         vec_widen_ushiftl_hi_optab : vec_widen_sshiftl_hi_optab;
diff --git a/gcc/optabs.h b/gcc/optabs.h
index d87aff8..37a6bfd 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -340,12 +340,16 @@ enum optab_index
   OTI_vec_shr,
   /* Extract specified elements from vectors, for vector load.  */
   OTI_vec_realign_load,
-  /* Widening multiplication.
-     The high/low part of the resulting vector of products is returned.  */
+  /* Widening multiplication.  The high/low/even/odd part of the
+     resulting vector of products is returned.  */
   OTI_vec_widen_umult_hi,
   OTI_vec_widen_umult_lo,
   OTI_vec_widen_smult_hi,
   OTI_vec_widen_smult_lo,
+  OTI_vec_widen_umult_even,
+  OTI_vec_widen_umult_odd,
+  OTI_vec_widen_smult_even,
+  OTI_vec_widen_smult_odd,
   /* Widening shift left.
      The high/low part of the resulting vector is returned.  */
   OTI_vec_widen_ushiftl_hi,
@@ -565,6 +569,10 @@ enum optab_index
 #define vec_widen_umult_lo_optab (&optab_table[OTI_vec_widen_umult_lo])
 #define vec_widen_smult_hi_optab (&optab_table[OTI_vec_widen_smult_hi])
 #define vec_widen_smult_lo_optab (&optab_table[OTI_vec_widen_smult_lo])
+#define vec_widen_umult_even_optab (&optab_table[OTI_vec_widen_umult_even])
+#define vec_widen_umult_odd_optab (&optab_table[OTI_vec_widen_umult_odd])
+#define vec_widen_smult_even_optab (&optab_table[OTI_vec_widen_smult_even])
+#define vec_widen_smult_odd_optab (&optab_table[OTI_vec_widen_smult_odd])
 #define vec_widen_ushiftl_hi_optab (&optab_table[OTI_vec_widen_ushiftl_hi])
 #define vec_widen_ushiftl_lo_optab (&optab_table[OTI_vec_widen_ushiftl_lo])
 #define vec_widen_sshiftl_hi_optab (&optab_table[OTI_vec_widen_sshiftl_hi])
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index e03313e..d8a396f 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -3724,6 +3724,8 @@ do_pointer_plus_expr_check:
     case WIDEN_SUM_EXPR:
     case VEC_WIDEN_MULT_HI_EXPR:
     case VEC_WIDEN_MULT_LO_EXPR:
+    case VEC_WIDEN_MULT_EVEN_EXPR:
+    case VEC_WIDEN_MULT_ODD_EXPR:
     case VEC_PACK_TRUNC_EXPR:
     case VEC_PACK_SAT_EXPR:
     case VEC_PACK_FIX_TRUNC_EXPR:
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index 7d444e1..f576ee5 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -3456,6 +3456,8 @@ estimate_operator_cost (enum tree_code code, eni_weights *weights,
 
     case VEC_WIDEN_MULT_HI_EXPR:
     case VEC_WIDEN_MULT_LO_EXPR:
+    case VEC_WIDEN_MULT_EVEN_EXPR:
+    case VEC_WIDEN_MULT_ODD_EXPR:
     case VEC_UNPACK_HI_EXPR:
     case VEC_UNPACK_LO_EXPR:
     case VEC_UNPACK_FLOAT_HI_EXPR:
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index f418d39..cdf3f28 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -602,6 +602,7 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
   tree op0, op1;
   const char *str;
   bool is_expr;
+  enum tree_code code;
 
   if (node == NULL_TREE)
     return spc;
@@ -614,7 +615,8 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
   if ((flags & TDF_LINENO) && EXPR_HAS_LOCATION (node))
     dump_location (buffer, EXPR_LOCATION (node));
 
-  switch (TREE_CODE (node))
+  code = TREE_CODE (node);
+  switch (code)
     {
     case ERROR_MARK:
       pp_string (buffer, "<<< error >>>");
@@ -2336,31 +2338,15 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
       break;
 
     case VEC_WIDEN_MULT_HI_EXPR:
-      pp_string (buffer, " VEC_WIDEN_MULT_HI_EXPR < ");
-      dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
-      pp_string (buffer, ", ");
-      dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
-      pp_string (buffer, " > ");
-      break;
-
     case VEC_WIDEN_MULT_LO_EXPR:
-      pp_string (buffer, " VEC_WIDEN_MULT_LO_EXPR < ");
-      dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
-      pp_string (buffer, ", ");
-      dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
-      pp_string (buffer, " > ");
-      break;
-
+    case VEC_WIDEN_MULT_EVEN_EXPR:
+    case VEC_WIDEN_MULT_ODD_EXPR:
     case VEC_WIDEN_LSHIFT_HI_EXPR:
-      pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < ");
-      dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
-      pp_string (buffer, ", ");
-      dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
-      pp_string (buffer, " > ");
-      break;
-
     case VEC_WIDEN_LSHIFT_LO_EXPR:
-      pp_string (buffer, " VEC_WIDEN_LSHIFT_HI_EXPR < ");
+      pp_character (buffer, ' ');
+      for (str = tree_code_name [code]; *str; str++)
+	pp_character (buffer, TOUPPER (*str));
+      pp_string (buffer, " < ");
       dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
       pp_string (buffer, ", ");
       dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index 8d05101..e37c631 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -1361,6 +1361,23 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
       || code == VEC_UNPACK_FLOAT_LO_EXPR)
     type = TREE_TYPE (rhs1);
 
+  /* For widening/narrowing vector operations, the relevant type is of the
+     arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
+     calculated in the same way above.  */
+  if (code == WIDEN_SUM_EXPR
+      || code == VEC_WIDEN_MULT_HI_EXPR
+      || code == VEC_WIDEN_MULT_LO_EXPR
+      || code == VEC_WIDEN_MULT_EVEN_EXPR
+      || code == VEC_WIDEN_MULT_ODD_EXPR
+      || code == VEC_UNPACK_HI_EXPR
+      || code == VEC_UNPACK_LO_EXPR
+      || code == VEC_PACK_TRUNC_EXPR
+      || code == VEC_PACK_SAT_EXPR
+      || code == VEC_PACK_FIX_TRUNC_EXPR
+      || code == VEC_WIDEN_LSHIFT_HI_EXPR
+      || code == VEC_WIDEN_LSHIFT_LO_EXPR)
+    type = TREE_TYPE (rhs1);
+
   /* Choose between vector shift/rotate by vector and vector shift/rotate by
      scalar */
   if (code == LSHIFT_EXPR
@@ -1409,21 +1426,6 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
   else
     op = optab_for_tree_code (code, type, optab_default);
 
-  /* For widening/narrowing vector operations, the relevant type is of the
-     arguments, not the widened result.  VEC_UNPACK_FLOAT_*_EXPR is
-     calculated in the same way above.  */
-  if (code == WIDEN_SUM_EXPR
-      || code == VEC_WIDEN_MULT_HI_EXPR
-      || code == VEC_WIDEN_MULT_LO_EXPR
-      || code == VEC_UNPACK_HI_EXPR
-      || code == VEC_UNPACK_LO_EXPR
-      || code == VEC_PACK_TRUNC_EXPR
-      || code == VEC_PACK_SAT_EXPR
-      || code == VEC_PACK_FIX_TRUNC_EXPR
-      || code == VEC_WIDEN_LSHIFT_HI_EXPR
-      || code == VEC_WIDEN_LSHIFT_LO_EXPR)
-    type = TREE_TYPE (rhs1);
-
   /* Optabs will try converting a negation into a subtraction, so
      look for it as well.  TODO: negation of floating-point vectors
      might be turned into an exclusive OR toggling the sign bit.  */
diff --git a/gcc/tree.c b/gcc/tree.c
index f92f070..d10b9ab 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -6927,6 +6927,8 @@ commutative_tree_code (enum tree_code code)
     case WIDEN_MULT_EXPR:
     case VEC_WIDEN_MULT_HI_EXPR:
     case VEC_WIDEN_MULT_LO_EXPR:
+    case VEC_WIDEN_MULT_EVEN_EXPR:
+    case VEC_WIDEN_MULT_ODD_EXPR:
       return true;
 
     default:
diff --git a/gcc/tree.def b/gcc/tree.def
index b0d4aea..70188ff 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -1171,6 +1171,10 @@ DEFTREECODE (VEC_RSHIFT_EXPR, "vec_rshift_expr", tcc_binary, 2)
 DEFTREECODE (VEC_WIDEN_MULT_HI_EXPR, "widen_mult_hi_expr", tcc_binary, 2)
 DEFTREECODE (VEC_WIDEN_MULT_LO_EXPR, "widen_mult_lo_expr", tcc_binary, 2)
 
+/* Similarly, but return the even or odd N/2 products.  */
+DEFTREECODE (VEC_WIDEN_MULT_EVEN_EXPR, "widen_mult_even_expr", tcc_binary, 2)
+DEFTREECODE (VEC_WIDEN_MULT_ODD_EXPR, "widen_mult_odd_expr", tcc_binary, 2)
+
 /* Unpack (extract and promote/widen) the high/low elements of the input
    vector into the output vector.  The input vector has twice as many
    elements as the output vector, that are half the size of the elements
-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 0/7] Clean up widen mult even/odd
@ 2012-07-10  8:23 Richard Henderson
  2012-07-10  8:23 ` [PATCH 4/7] spu: Rename patterns for vec_widen_<s>mult_even/odd_<mode> Richard Henderson
                   ` (8 more replies)
  0 siblings, 9 replies; 11+ messages in thread
From: Richard Henderson @ 2012-07-10  8:23 UTC (permalink / raw)
  To: gcc-patches

I find it instructive that 4 of the 5 isas that actually implement
widening integer multiplication do have mult-widen-even as the isa
primitive (even if the -odd variant is missing).  The fact that this
operation is implemented as a set of builtins and target hooks has
lead to disturbingly cookie-cutter implementations of these hooks
in the various backends.

Thus I choose to add VEC_WIDEN_MULT_EVEN/ODD_EXPR as tree codes and
optabs.  This removes a farily trivial amount of code from three
backends (the fourth backend, ia64, never grew this support).

The existance of optabs then allows the expansion of MULT_HIGHPART_EXPR
at the rtl-expansion level without having to resort to builtin expansion
in order to emit the even/odd alternative.  This saves a fairly 
substantial amount of code from the vectorizer.

I've not touched the interface to supportable_widening_operation,
which is still prepared to return a CALL_EXPR and some decls.  After
this patch set it will never do so.  I'm undecided as to whether we
ought to be prepared for such in the future, or whether this should
simply go in as a completely separate patch that could in the future
be easily reverted.

Tested on x86_64; cross-compiled to ppc64 and spu, spot checking the
relevant division-by-constant testcases.

r~

Richard Henderson (7):
  Add VEC_WIDEN_MULT_EVEN/ODD_EXPR
  i386: Rename patterns for vec_widen_<s>mult_even/odd_<mode>
  rs6000: Rename patterns for vec_widen_<s>mult_even/odd_<mode>
  spu: Rename patterns for vec_widen_<s>mult_even/odd_<mode>
  Move vector highpart emulation to the optabs layer
  Use VEC_WIDEN_MULT_EVEN/ODD_EXPR in supportable_widening_operation
  Zap now unused builtin_mul_widen_even/odd target hooks

 gcc/ChangeLog                        |   89 ++++++++++++
 gcc/cfgexpand.c                      |    4 +-
 gcc/config/i386/i386.c               |  103 ++-----------
 gcc/config/i386/sse.md               |   18 +--
 gcc/config/rs6000/altivec.md         |   54 +++----
 gcc/config/rs6000/rs6000-builtin.def |   24 +--
 gcc/config/rs6000/rs6000.c           |   51 -------
 gcc/config/spu/spu-builtins.def      |   24 +--
 gcc/config/spu/spu-builtins.md       |   65 ---------
 gcc/config/spu/spu.c                 |   42 ------
 gcc/config/spu/spu.md                |   86 +++++++++--
 gcc/doc/md.texi                      |   12 +-
 gcc/doc/tm.texi                      |   22 ---
 gcc/doc/tm.texi.in                   |   22 ---
 gcc/expmed.c                         |   32 ++--
 gcc/expr.c                           |   35 ++---
 gcc/fold-const.c                     |   36 +++--
 gcc/genopinit.c                      |    4 +
 gcc/gimple-pretty-print.c            |    2 +
 gcc/optabs.c                         |  134 +++++++++++++++++
 gcc/optabs.h                         |   18 ++-
 gcc/system.h                         |    4 +-
 gcc/target.def                       |   14 --
 gcc/tree-cfg.c                       |    2 +
 gcc/tree-inline.c                    |    2 +
 gcc/tree-pretty-print.c              |   32 ++--
 gcc/tree-vect-generic.c              |  145 +++++-------------
 gcc/tree-vect-patterns.c             |   23 +--
 gcc/tree-vect-stmts.c                |  267 +++++++++-------------------------
 gcc/tree.c                           |    2 +
 gcc/tree.def                         |    4 +
 31 files changed, 580 insertions(+), 792 deletions(-)

-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 5/7] Move vector highpart emulation to the optabs layer
  2012-07-10  8:23 [PATCH 0/7] Clean up widen mult even/odd Richard Henderson
                   ` (3 preceding siblings ...)
  2012-07-10  8:23 ` [PATCH 3/7] rs6000: " Richard Henderson
@ 2012-07-10  8:23 ` Richard Henderson
  2012-07-10  8:23 ` [PATCH 7/7] Zap now unused builtin_mul_widen_even/odd target hooks Richard Henderson
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2012-07-10  8:23 UTC (permalink / raw)
  To: gcc-patches

        * expmed.c (expmed_mult_highpart): Rename from expand_mult_highpart.
        (expmed_mult_highpart_optab): Rename from expand_mult_highpart_optab.
        * optabs.c (can_mult_highpart_p): New.
        (expand_mult_highpart): New.
        * expr.c (expand_expr_real_2) [MULT_HIGHPART_EXPR): Use it.
        * tree-vect-generic.c (expand_vector_operations_1): Don't expand
        by pieces if can_mult_highpart_p.
        (expand_vector_divmod): Use can_mult_highpart_p and always
        generate MULT_HIGHPART_EXPR.
        * tree-vect-patterns.c (vect_recog_divmod_pattern): Likewise.
        * tree-vect-stmts.c (vectorizable_operation): Likewise.
---
 gcc/ChangeLog            |   12 ++++
 gcc/expmed.c             |   32 ++++-----
 gcc/expr.c               |    7 +-
 gcc/optabs.c             |  126 ++++++++++++++++++++++++++++++++++
 gcc/optabs.h             |    6 ++
 gcc/tree-vect-generic.c  |  113 +++++-------------------------
 gcc/tree-vect-patterns.c |   23 +------
 gcc/tree-vect-stmts.c    |  171 ++++++----------------------------------------
 8 files changed, 204 insertions(+), 286 deletions(-)

diff --git a/gcc/expmed.c b/gcc/expmed.c
index cec8d23..4101f61 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -2381,8 +2381,8 @@ static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
 			      const struct algorithm *, enum mult_variant);
 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
 static rtx extract_high_half (enum machine_mode, rtx);
-static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
-static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
+static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
+static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
 				       int, int);
 /* Compute and return the best algorithm for multiplying by T.
    The algorithm must cost less than cost_limit
@@ -3477,7 +3477,7 @@ expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
   return target;
 }
 
-/* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
+/* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
 
 static rtx
 extract_high_half (enum machine_mode mode, rtx op)
@@ -3495,11 +3495,11 @@ extract_high_half (enum machine_mode mode, rtx op)
   return convert_modes (mode, wider_mode, op, 0);
 }
 
-/* Like expand_mult_highpart, but only consider using a multiplication
+/* Like expmed_mult_highpart, but only consider using a multiplication
    optab.  OP1 is an rtx for the constant operand.  */
 
 static rtx
-expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
+expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
 			    rtx target, int unsignedp, int max_cost)
 {
   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
@@ -3610,7 +3610,7 @@ expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
    MAX_COST is the total allowed cost for the expanded RTL.  */
 
 static rtx
-expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
+expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
 		      rtx target, int unsignedp, int max_cost)
 {
   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
@@ -3633,7 +3633,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
      mode == word_mode, however all the cost calculations in
      synth_mult etc. assume single-word operations.  */
   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
-    return expand_mult_highpart_optab (mode, op0, op1, target,
+    return expmed_mult_highpart_optab (mode, op0, op1, target,
 				       unsignedp, max_cost);
 
   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
@@ -3651,7 +3651,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
     {
       /* See whether the specialized multiplication optabs are
 	 cheaper than the shift/add version.  */
-      tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
+      tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
 					alg.cost.cost + extra_cost);
       if (tem)
 	return tem;
@@ -3666,7 +3666,7 @@ expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
 
       return tem;
     }
-  return expand_mult_highpart_optab (mode, op0, op1, target,
+  return expmed_mult_highpart_optab (mode, op0, op1, target,
 				     unsignedp, max_cost);
 }
 
@@ -3940,7 +3940,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 
      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
      half of the product.  Different strategies for generating the product are
-     implemented in expand_mult_highpart.
+     implemented in expmed_mult_highpart.
 
      If what we actually want is the remainder, we generate that by another
      by-constant multiplication and a subtraction.  */
@@ -3990,7 +3990,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
      mode for which we can do the operation with a library call.  */
 
   /* We might want to refine this now that we have division-by-constant
-     optimization.  Since expand_mult_highpart tries so many variants, it is
+     optimization.  Since expmed_mult_highpart tries so many variants, it is
      not straightforward to generalize this.  Maybe we should make an array
      of possible modes in init_expmed?  Save this for GCC 2.7.  */
 
@@ -4155,7 +4155,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			      = (shift_cost[speed][compute_mode][post_shift - 1]
 				 + shift_cost[speed][compute_mode][1]
 				 + 2 * add_cost[speed][compute_mode]);
-			    t1 = expand_mult_highpart (compute_mode, op0,
+			    t1 = expmed_mult_highpart (compute_mode, op0,
 						       GEN_INT (ml),
 						       NULL_RTX, 1,
 						       max_cost - extra_cost);
@@ -4187,7 +4187,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			    extra_cost
 			      = (shift_cost[speed][compute_mode][pre_shift]
 				 + shift_cost[speed][compute_mode][post_shift]);
-			    t2 = expand_mult_highpart (compute_mode, t1,
+			    t2 = expmed_mult_highpart (compute_mode, t1,
 						       GEN_INT (ml),
 						       NULL_RTX, 1,
 						       max_cost - extra_cost);
@@ -4313,7 +4313,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			extra_cost = (shift_cost[speed][compute_mode][post_shift]
 				      + shift_cost[speed][compute_mode][size - 1]
 				      + add_cost[speed][compute_mode]);
-			t1 = expand_mult_highpart (compute_mode, op0,
+			t1 = expmed_mult_highpart (compute_mode, op0,
 						   GEN_INT (ml), NULL_RTX, 0,
 						   max_cost - extra_cost);
 			if (t1 == 0)
@@ -4348,7 +4348,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			extra_cost = (shift_cost[speed][compute_mode][post_shift]
 				      + shift_cost[speed][compute_mode][size - 1]
 				      + 2 * add_cost[speed][compute_mode]);
-			t1 = expand_mult_highpart (compute_mode, op0, mlr,
+			t1 = expmed_mult_highpart (compute_mode, op0, mlr,
 						   NULL_RTX, 0,
 						   max_cost - extra_cost);
 			if (t1 == 0)
@@ -4436,7 +4436,7 @@ expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
 			extra_cost = (shift_cost[speed][compute_mode][post_shift]
 				      + shift_cost[speed][compute_mode][size - 1]
 				      + 2 * add_cost[speed][compute_mode]);
-			t3 = expand_mult_highpart (compute_mode, t2,
+			t3 = expmed_mult_highpart (compute_mode, t2,
 						   GEN_INT (ml), NULL_RTX, 1,
 						   max_cost - extra_cost);
 			if (t3 != 0)
diff --git a/gcc/expr.c b/gcc/expr.c
index c56b0e5..916dee0 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -8554,9 +8554,14 @@ expand_expr_real_2 (sepops ops, rtx target, enum machine_mode tmode,
       return expand_divmod (0, code, mode, op0, op1, target, unsignedp);
 
     case RDIV_EXPR:
-    case MULT_HIGHPART_EXPR:
       goto binop;
 
+    case MULT_HIGHPART_EXPR:
+      expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
+      temp = expand_mult_highpart (mode, op0, op1, target, unsignedp);
+      gcc_assert (temp);
+      return temp;
+
     case TRUNC_MOD_EXPR:
     case FLOOR_MOD_EXPR:
     case CEIL_MOD_EXPR:
diff --git a/gcc/optabs.c b/gcc/optabs.c
index fbea879..e1ecc65 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -7162,6 +7162,132 @@ expand_vec_cond_expr (tree vec_cond_type, tree op0, tree op1, tree op2,
   return ops[0].value;
 }
 
+/* Return non-zero if a highpart multiply is supported of can be synthisized.
+   For the benefit of expand_mult_highpart, the return value is 1 for direct,
+   2 for even/odd widening, and 3 for hi/lo widening.  */
+
+int
+can_mult_highpart_p (enum machine_mode mode, bool uns_p)
+{
+  optab op;
+  unsigned char *sel;
+  unsigned i, nunits;
+
+  op = uns_p ? umul_highpart_optab : smul_highpart_optab;
+  if (optab_handler (op, mode) != CODE_FOR_nothing)
+    return 1;
+
+  /* If the mode is an integral vector, synth from widening operations.  */
+  if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+    return 0;
+
+  nunits = GET_MODE_NUNITS (mode);
+  sel = XALLOCAVEC (unsigned char, nunits);
+
+  op = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab;
+  if (optab_handler (op, mode) != CODE_FOR_nothing)
+    {
+      op = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
+      if (optab_handler (op, mode) != CODE_FOR_nothing)
+	{
+	  for (i = 0; i < nunits; ++i)
+	    sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0);
+	  if (can_vec_perm_p (mode, false, sel))
+	    return 2;
+	}
+    }
+
+  op = uns_p ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
+  if (optab_handler (op, mode) != CODE_FOR_nothing)
+    {
+      op = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
+      if (optab_handler (op, mode) != CODE_FOR_nothing)
+	{
+	  for (i = 0; i < nunits; ++i)
+	    sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
+	  if (can_vec_perm_p (mode, false, sel))
+	    return 3;
+	}
+    }
+
+  return 0;
+}
+
+/* Expand a highpart multiply.  */
+
+rtx
+expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
+		      rtx target, bool uns_p)
+{
+  struct expand_operand eops[3];
+  enum insn_code icode;
+  int method, i, nunits;
+  enum machine_mode wmode;
+  rtx m1, m2, perm;
+  optab tab1, tab2;
+  rtvec v;
+
+  method = can_mult_highpart_p (mode, uns_p);
+  switch (method)
+    {
+    case 0:
+      return NULL_RTX;
+    case 1:
+      tab1 = uns_p ? umul_highpart_optab : smul_highpart_optab;
+      return expand_binop (mode, tab1, op0, op1, target, uns_p,
+			   OPTAB_LIB_WIDEN);
+    case 2:
+      tab1 = uns_p ? vec_widen_umult_even_optab : vec_widen_smult_even_optab;
+      tab2 = uns_p ? vec_widen_umult_odd_optab : vec_widen_smult_odd_optab;
+      break;
+    case 3:
+      tab1 = uns_p ? vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
+      tab2 = uns_p ? vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
+      if (BYTES_BIG_ENDIAN)
+	{
+	  optab t = tab1;
+	  tab1 = tab2;
+	  tab2 = t;
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  icode = optab_handler (tab1, mode);
+  nunits = GET_MODE_NUNITS (mode);
+  wmode = insn_data[icode].operand[0].mode;
+  gcc_checking_assert (2 * GET_MODE_NUNITS (wmode) == nunits);
+  gcc_checking_assert (GET_MODE_SIZE (wmode) == GET_MODE_SIZE (mode));
+
+  create_output_operand (&eops[0], gen_reg_rtx (wmode), wmode);
+  create_input_operand (&eops[1], op0, mode);
+  create_input_operand (&eops[2], op1, mode);
+  expand_insn (icode, 3, eops);
+  m1 = gen_lowpart (mode, eops[0].value);
+
+  create_output_operand (&eops[0], gen_reg_rtx (wmode), wmode);
+  create_input_operand (&eops[1], op0, mode);
+  create_input_operand (&eops[2], op1, mode);
+  expand_insn (optab_handler (tab2, mode), 3, eops);
+  m2 = gen_lowpart (mode, eops[0].value);
+
+  v = rtvec_alloc (nunits);
+  if (method == 2)
+    {
+      for (i = 0; i < nunits; ++i)
+	RTVEC_ELT (v, i) = GEN_INT (!BYTES_BIG_ENDIAN + (i & ~1)
+				    + ((i & 1) ? nunits : 0));
+    }
+  else
+    {
+      for (i = 0; i < nunits; ++i)
+	RTVEC_ELT (v, i) = GEN_INT (2 * i + (BYTES_BIG_ENDIAN ? 0 : 1));
+    }
+  perm = gen_rtx_CONST_VECTOR (mode, v);
+
+  return expand_vec_perm (mode, m1, m2, perm, target);
+}
 \f
 /* Return true if there is a compare_and_swap pattern.  */
 
diff --git a/gcc/optabs.h b/gcc/optabs.h
index 37a6bfd..8b04e17 100644
--- a/gcc/optabs.h
+++ b/gcc/optabs.h
@@ -1014,6 +1014,12 @@ extern bool can_vec_perm_p (enum machine_mode, bool, const unsigned char *);
 /* Generate code for VEC_PERM_EXPR.  */
 extern rtx expand_vec_perm (enum machine_mode, rtx, rtx, rtx, rtx);
 
+/* Return non-zero if target supports a given highpart multiplication.  */
+extern int can_mult_highpart_p (enum machine_mode, bool);
+
+/* Generate code for MULT_HIGHPART_EXPR.  */
+extern rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, bool);
+
 /* Return the insn used to implement mode MODE of OP, or CODE_FOR_nothing
    if the target does not have such an insn.  */
 
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index e37c631..b0f41d1 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -454,10 +454,9 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
   int dummy_int;
   unsigned int i, unsignedp = TYPE_UNSIGNED (TREE_TYPE (type));
   unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
-  optab op;
   tree *vec;
-  unsigned char *sel = NULL;
-  tree cur_op, m1, m2, mulcst, perm_mask, wider_type, tem, decl_e, decl_o;
+  tree cur_op, mulcst, tem;
+  optab op;
 
   if (prec > HOST_BITS_PER_WIDE_INT)
     return NULL_TREE;
@@ -745,54 +744,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
   if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
     return NULL_TREE;
 
-  op = optab_for_tree_code (MULT_HIGHPART_EXPR, type, optab_default);
-  if (op != NULL && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
-    wider_type = decl_e = decl_o = NULL_TREE;
-  else
-    {
-      wider_type = build_nonstandard_integer_type (prec * 2, unsignedp),
-      wider_type = build_vector_type (wider_type, nunits / 2);
-      if (GET_MODE_CLASS (TYPE_MODE (wider_type)) != MODE_VECTOR_INT
-	  || GET_MODE_BITSIZE (TYPE_MODE (wider_type))
-	     != GET_MODE_BITSIZE (TYPE_MODE (type)))
-	return NULL_TREE;
-
-      sel = XALLOCAVEC (unsigned char, nunits);
-
-      if (targetm.vectorize.builtin_mul_widen_even
-	  && targetm.vectorize.builtin_mul_widen_odd
-	  && (decl_e = targetm.vectorize.builtin_mul_widen_even (type))
-	  && (decl_o = targetm.vectorize.builtin_mul_widen_odd (type))
-	  && (TYPE_MODE (TREE_TYPE (TREE_TYPE (decl_e)))
-	      == TYPE_MODE (wider_type)))
-	{
-	  for (i = 0; i < nunits; i++)
-	    sel[i] = !BYTES_BIG_ENDIAN + (i & ~1) + ((i & 1) ? nunits : 0);
-	  if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
-	    decl_e = decl_o = NULL_TREE;
-	}
-      else
-	decl_e = decl_o = NULL_TREE;
-
-      if (decl_e == NULL_TREE)
-	{
-	  op = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR,
-				    type, optab_default);
-	  if (op == NULL
-	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
-	    return NULL_TREE;
-	  op = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
-				    type, optab_default);
-	  if (op == NULL
-	      || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
-	    return NULL_TREE;
-
-	  for (i = 0; i < nunits; i++)
-	    sel[i] = 2 * i + (BYTES_BIG_ENDIAN ? 0 : 1);
-	  if (!can_vec_perm_p (TYPE_MODE (type), false, sel))
-	    return NULL_TREE;
-	}
-    }
+  if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
+    return NULL_TREE;
 
   cur_op = op0;
 
@@ -830,46 +783,8 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
   for (i = 0; i < nunits; i++)
     vec[i] = build_int_cst (TREE_TYPE (type), mulc[i]);
   mulcst = build_vector (type, vec);
-  if (wider_type == NULL_TREE)
-    cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
-  else
-    {
-      for (i = 0; i < nunits; i++)
-	vec[i] = build_int_cst (TREE_TYPE (type), sel[i]);
-      perm_mask = build_vector (type, vec);
-
-      if (decl_e != NULL_TREE)
-	{
-	  gimple call;
-
-	  call = gimple_build_call (decl_e, 2, cur_op, mulcst);
-	  m1 = create_tmp_reg (wider_type, NULL);
-	  add_referenced_var (m1);
-	  m1 = make_ssa_name (m1, call);
-	  gimple_call_set_lhs (call, m1);
-	  gsi_insert_seq_before (gsi, call, GSI_SAME_STMT);
-
-	  call = gimple_build_call (decl_o, 2, cur_op, mulcst);
-	  m2 = create_tmp_reg (wider_type, NULL);
-	  add_referenced_var (m2);
-	  m2 = make_ssa_name (m2, call);
-	  gimple_call_set_lhs (call, m2);
-	  gsi_insert_seq_before (gsi, call, GSI_SAME_STMT);
-	}
-      else
-	{
-	  m1 = gimplify_build2 (gsi, BYTES_BIG_ENDIAN ? VEC_WIDEN_MULT_HI_EXPR
-						      : VEC_WIDEN_MULT_LO_EXPR,
-				wider_type, cur_op, mulcst);
-	  m2 = gimplify_build2 (gsi, BYTES_BIG_ENDIAN ? VEC_WIDEN_MULT_LO_EXPR
-						      : VEC_WIDEN_MULT_HI_EXPR,
-				wider_type, cur_op, mulcst);
-	}
 
-      m1 = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, m1);
-      m2 = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, m2);
-      cur_op = gimplify_build3 (gsi, VEC_PERM_EXPR, type, m1, m2, perm_mask);
-    }
+  cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
 
   switch (mode)
     {
@@ -1454,13 +1369,17 @@ expand_vector_operations_1 (gimple_stmt_iterator *gsi)
   if (compute_type == type)
     {
       compute_mode = TYPE_MODE (compute_type);
-      if (VECTOR_MODE_P (compute_mode)
-          && op != NULL
-	  && optab_handler (op, compute_mode) != CODE_FOR_nothing)
-	return;
-      else
-	/* There is no operation in hardware, so fall back to scalars.  */
-	compute_type = TREE_TYPE (type);
+      if (VECTOR_MODE_P (compute_mode))
+	{
+          if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
+	    return;
+	  if (code == MULT_HIGHPART_EXPR
+	      && can_mult_highpart_p (compute_mode,
+				      TYPE_UNSIGNED (compute_type)))
+	    return;
+	}
+      /* There is no operation in hardware, so fall back to scalars.  */
+      compute_type = TREE_TYPE (type);
     }
 
   gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index 3f57e5d..ab9e927 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -1642,10 +1642,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_vinfo);
   optab optab;
-  tree dummy, q;
-  enum tree_code dummy_code;
+  tree q;
   int dummy_int, prec;
-  VEC (tree, heap) *dummy_vec;
   stmt_vec_info def_stmt_vinfo;
 
   if (!is_gimple_assign (last_stmt))
@@ -1814,23 +1812,8 @@ vect_recog_divmod_pattern (VEC (gimple, heap) **stmts,
       || prec > HOST_BITS_PER_WIDE_INT)
     return NULL;
 
-  optab = optab_for_tree_code (MULT_HIGHPART_EXPR, vectype, optab_default);
-  if (optab == NULL
-      || optab_handler (optab, TYPE_MODE (vectype)) == CODE_FOR_nothing)
-    {
-      tree witype = build_nonstandard_integer_type (prec * 2,
-						    TYPE_UNSIGNED (itype));
-      tree vecwtype = get_vectype_for_scalar_type (witype);
-
-      if (vecwtype == NULL_TREE)
-	return NULL;
-      if (!supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
-					   vecwtype, vectype,
-					   &dummy, &dummy, &dummy_code,
-					   &dummy_code, &dummy_int,
-					   &dummy_vec))
-	return NULL;
-    }
+  if (!can_mult_highpart_p (TYPE_MODE (vectype), TYPE_UNSIGNED (itype)))
+    return NULL;
 
   STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) = NULL;
 
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 8e0965a..9caf1c6 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -3304,18 +3304,17 @@ static bool
 vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
 			gimple *vec_stmt, slp_tree slp_node)
 {
-  tree vec_dest, vec_dest2 = NULL_TREE;
-  tree vec_dest3 = NULL_TREE, vec_dest4 = NULL_TREE;
+  tree vec_dest;
   tree scalar_dest;
   tree op0, op1 = NULL_TREE, op2 = NULL_TREE;
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
-  tree vectype, wide_vectype = NULL_TREE;
+  tree vectype;
   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
   enum tree_code code;
   enum machine_mode vec_mode;
   tree new_temp;
   int op_type;
-  optab optab, optab2 = NULL;
+  optab optab;
   int icode;
   tree def;
   gimple def_stmt;
@@ -3332,8 +3331,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
   tree vop0, vop1, vop2;
   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
   int vf;
-  unsigned char *sel = NULL;
-  tree decl1 = NULL_TREE, decl2 = NULL_TREE, perm_mask = NULL_TREE;
 
   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
     return false;
@@ -3455,87 +3452,26 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
       || code == RROTATE_EXPR)
    return false;
 
-  optab = optab_for_tree_code (code, vectype, optab_default);
-
   /* Supportable by target?  */
-  if (!optab && code != MULT_HIGHPART_EXPR)
+
+  vec_mode = TYPE_MODE (vectype);
+  if (code == MULT_HIGHPART_EXPR)
     {
-      if (vect_print_dump_info (REPORT_DETAILS))
-	fprintf (vect_dump, "no optab.");
-      return false;
+      if (can_mult_highpart_p (vec_mode, TYPE_UNSIGNED (vectype)))
+	icode = 0;
+      else
+	icode = CODE_FOR_nothing;
     }
-  vec_mode = TYPE_MODE (vectype);
-  icode = optab ? (int) optab_handler (optab, vec_mode) : CODE_FOR_nothing;
-
-  if (icode == CODE_FOR_nothing
-      && code == MULT_HIGHPART_EXPR
-      && VECTOR_MODE_P (vec_mode)
-      && BYTES_BIG_ENDIAN == WORDS_BIG_ENDIAN)
-    {
-      /* If MULT_HIGHPART_EXPR isn't supported by the backend, see
-	 if we can emit VEC_WIDEN_MULT_{LO,HI}_EXPR followed by VEC_PERM_EXPR
-	 or builtin_mul_widen_{even,odd} followed by VEC_PERM_EXPR.  */
-      unsigned int prec = TYPE_PRECISION (TREE_TYPE (scalar_dest));
-      unsigned int unsignedp = TYPE_UNSIGNED (TREE_TYPE (scalar_dest));
-      tree wide_type
-	= build_nonstandard_integer_type (prec * 2, unsignedp);
-      wide_vectype
-        = get_same_sized_vectype (wide_type, vectype);
-
-      sel = XALLOCAVEC (unsigned char, nunits_in);
-      if (VECTOR_MODE_P (TYPE_MODE (wide_vectype))
-	  && GET_MODE_SIZE (TYPE_MODE (wide_vectype))
-	     == GET_MODE_SIZE (vec_mode))
-	{
-	  if (targetm.vectorize.builtin_mul_widen_even
-	      && (decl1 = targetm.vectorize.builtin_mul_widen_even (vectype))
-	      && targetm.vectorize.builtin_mul_widen_odd
-	      && (decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype))
-	      && TYPE_MODE (TREE_TYPE (TREE_TYPE (decl1)))
-		 == TYPE_MODE (wide_vectype))
-	    {
-	      for (i = 0; i < nunits_in; i++)
-		sel[i] = !BYTES_BIG_ENDIAN + (i & ~1)
-			 + ((i & 1) ? nunits_in : 0);
-	      if (can_vec_perm_p (vec_mode, false, sel))
-		icode = 0;
-	    }
-	  if (icode == CODE_FOR_nothing)
-	    {
-	      decl1 = NULL_TREE;
-	      decl2 = NULL_TREE;
-	      optab = optab_for_tree_code (VEC_WIDEN_MULT_LO_EXPR,
-					   vectype, optab_default);
-	      optab2 = optab_for_tree_code (VEC_WIDEN_MULT_HI_EXPR,
-					    vectype, optab_default);
-	      if (optab != NULL
-		  && optab2 != NULL
-		  && optab_handler (optab, vec_mode) != CODE_FOR_nothing
-		  && optab_handler (optab2, vec_mode) != CODE_FOR_nothing
-		  && insn_data[optab_handler (optab, vec_mode)].operand[0].mode
-		     == TYPE_MODE (wide_vectype)
-		  && insn_data[optab_handler (optab2,
-					      vec_mode)].operand[0].mode
-		     == TYPE_MODE (wide_vectype))
-		{
-		  for (i = 0; i < nunits_in; i++)
-		    sel[i] = !BYTES_BIG_ENDIAN + 2 * i;
-		  if (can_vec_perm_p (vec_mode, false, sel))
-		    icode = optab_handler (optab, vec_mode);
-		}
-	    }
-	}
-      if (icode == CODE_FOR_nothing)
+  else
+    {
+      optab = optab_for_tree_code (code, vectype, optab_default);
+      if (!optab)
 	{
-	  if (optab_for_tree_code (code, vectype, optab_default) == NULL)
-	    {
-	      if (vect_print_dump_info (REPORT_DETAILS))
-		fprintf (vect_dump, "no optab.");
-	      return false;
-	    }
-	  wide_vectype = NULL_TREE;
-	  optab2 = NULL;
+	  if (vect_print_dump_info (REPORT_DETAILS))
+	    fprintf (vect_dump, "no optab.");
+	  return false;
 	}
+      icode = (int) optab_handler (optab, vec_mode);
     }
 
   if (icode == CODE_FOR_nothing)
@@ -3575,16 +3511,7 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
     fprintf (vect_dump, "transform binary/unary operation.");
 
   /* Handle def.  */
-  if (wide_vectype)
-    {
-      vec_dest = vect_create_destination_var (scalar_dest, wide_vectype);
-      vec_dest2 = vect_create_destination_var (scalar_dest, wide_vectype);
-      vec_dest3 = vect_create_destination_var (scalar_dest, vectype);
-      vec_dest4 = vect_create_destination_var (scalar_dest, vectype);
-      perm_mask = vect_gen_perm_mask (vectype, sel);
-    }
-  else
-    vec_dest = vect_create_destination_var (scalar_dest, vectype);
+  vec_dest = vect_create_destination_var (scalar_dest, vectype);
 
   /* Allocate VECs for vector operands.  In case of SLP, vector operands are
      created in the previous stages of the recursion, so no allocation is
@@ -3693,66 +3620,6 @@ vectorizable_operation (gimple stmt, gimple_stmt_iterator *gsi,
 		  ? VEC_index (tree, vec_oprnds1, i) : NULL_TREE);
 	  vop2 = ((op_type == ternary_op)
 		  ? VEC_index (tree, vec_oprnds2, i) : NULL_TREE);
-	  if (wide_vectype)
-	    {
-	      tree new_temp2, vce;
-
-	      gcc_assert (code == MULT_HIGHPART_EXPR);
-	      if (decl1 != NULL_TREE)
-		{
-		  new_stmt = gimple_build_call (decl1, 2, vop0, vop1);
-		  new_temp = make_ssa_name (vec_dest, new_stmt);
-		  gimple_call_set_lhs (new_stmt, new_temp);
-		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
-
-		  new_stmt = gimple_build_call (decl2, 2, vop0, vop1);
-		  new_temp2 = make_ssa_name (vec_dest2, new_stmt);
-		  gimple_call_set_lhs (new_stmt, new_temp2);
-		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
-		}
-	      else
-		{
-		  new_temp = make_ssa_name (vec_dest, NULL);
-		  new_stmt
-		    = gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
-						    ? VEC_WIDEN_MULT_HI_EXPR
-						    : VEC_WIDEN_MULT_LO_EXPR,
-						    new_temp, vop0, vop1);
-		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
-
-		  new_temp2 = make_ssa_name (vec_dest2, NULL);
-		  new_stmt
-		    = gimple_build_assign_with_ops (BYTES_BIG_ENDIAN
-						    ? VEC_WIDEN_MULT_LO_EXPR
-						    : VEC_WIDEN_MULT_HI_EXPR,
-						    new_temp2, vop0, vop1);
-		  vect_finish_stmt_generation (stmt, new_stmt, gsi);
-		}
-
-	      vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp);
-	      new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
-						       vec_dest3, vce,
-						       NULL_TREE);
-	      new_temp = make_ssa_name (vec_dest3, new_stmt);
-	      gimple_assign_set_lhs (new_stmt, new_temp);
-	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
-
-	      vce = build1 (VIEW_CONVERT_EXPR, vectype, new_temp2);
-	      new_stmt = gimple_build_assign_with_ops (VIEW_CONVERT_EXPR,
-						       vec_dest4, vce,
-						       NULL_TREE);
-	      new_temp2 = make_ssa_name (vec_dest4, new_stmt);
-	      gimple_assign_set_lhs (new_stmt, new_temp2);
-	      vect_finish_stmt_generation (stmt, new_stmt, gsi);
-
-	      new_temp = permute_vec_elements (new_temp, new_temp2,
-					       perm_mask, stmt, gsi);
-	      new_stmt = SSA_NAME_DEF_STMT (new_temp);
-	      if (slp_node)
-		VEC_quick_push (gimple, SLP_TREE_VEC_STMTS (slp_node),
-				new_stmt);
-	      continue;
-	    }
 	  new_stmt = gimple_build_assign_with_ops3 (code, vec_dest,
 						    vop0, vop1, vop2);
 	  new_temp = make_ssa_name (vec_dest, new_stmt);
-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 3/7] rs6000: Rename patterns for vec_widen_<s>mult_even/odd_<mode>
  2012-07-10  8:23 [PATCH 0/7] Clean up widen mult even/odd Richard Henderson
                   ` (2 preceding siblings ...)
  2012-07-10  8:23 ` [PATCH 2/7] i386: Rename patterns for vec_widen_<s>mult_even/odd_<mode> Richard Henderson
@ 2012-07-10  8:23 ` Richard Henderson
  2012-07-10  8:23 ` [PATCH 5/7] Move vector highpart emulation to the optabs layer Richard Henderson
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2012-07-10  8:23 UTC (permalink / raw)
  To: gcc-patches

        * config/rs6000/altivec.md (vec_widen_umult_even_v16qi): Rename
        from altivec_vmuleub.
        (vec_widen_smult_even_v16qi): Rename from altivec_vmulesb.
        (vec_widen_umult_even_v8hi): Rename from altivec_vmuleuh.
        (vec_widen_smult_even_v8hi): Rename from altivec_vmulesh.
        (vec_widen_umult_odd_v16qi): Rename from altivec_vmuloub.
        (vec_widen_smult_odd_v16qi): Rename from altivec_vmulosb.
        (vec_widen_umult_odd_v8hi): Rename from altivec_vmulouh.
        (vec_widen_smult_odd_v8hi): Rename from altivec_vmulosh.
        * config/rs6000/rs6000-builtin.def: Update pattern names to match.
---
 gcc/ChangeLog                        |   11 +++++++
 gcc/config/rs6000/altivec.md         |   54 +++++++++++++++++-----------------
 gcc/config/rs6000/rs6000-builtin.def |   24 +++++++--------
 3 files changed, 50 insertions(+), 39 deletions(-)

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index fd4bc9d..8c168c8 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -643,7 +643,7 @@
    convert_move (small_swap, swap, 0);
  
    low_product = gen_reg_rtx (V4SImode);
-   emit_insn (gen_altivec_vmulouh (low_product, one, two));
+   emit_insn (gen_vec_widen_umult_odd_v8hi (low_product, one, two));
  
    high_product = gen_reg_rtx (V4SImode);
    emit_insn (gen_altivec_vmsumuhm (high_product, one, small_swap, zero));
@@ -667,8 +667,8 @@
    rtx high = gen_reg_rtx (V4SImode);
    rtx low = gen_reg_rtx (V4SImode);
 
-   emit_insn (gen_altivec_vmulesh (even, operands[1], operands[2]));
-   emit_insn (gen_altivec_vmulosh (odd, operands[1], operands[2]));
+   emit_insn (gen_vec_widen_smult_even_v8hi (even, operands[1], operands[2]));
+   emit_insn (gen_vec_widen_smult_odd_v8hi (odd, operands[1], operands[2]));
 
    emit_insn (gen_altivec_vmrghw (high, even, odd));
    emit_insn (gen_altivec_vmrglw (low, even, odd));
@@ -936,7 +936,7 @@
   "vmrglw %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
-(define_insn "altivec_vmuleub"
+(define_insn "vec_widen_umult_even_v16qi"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
                       (match_operand:V16QI 2 "register_operand" "v")]
@@ -945,7 +945,7 @@
   "vmuleub %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "altivec_vmulesb"
+(define_insn "vec_widen_smult_even_v16qi"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
                       (match_operand:V16QI 2 "register_operand" "v")]
@@ -954,7 +954,7 @@
   "vmulesb %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "altivec_vmuleuh"
+(define_insn "vec_widen_umult_even_v8hi"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
                       (match_operand:V8HI 2 "register_operand" "v")]
@@ -963,7 +963,7 @@
   "vmuleuh %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "altivec_vmulesh"
+(define_insn "vec_widen_smult_even_v8hi"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
                       (match_operand:V8HI 2 "register_operand" "v")]
@@ -972,7 +972,7 @@
   "vmulesh %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "altivec_vmuloub"
+(define_insn "vec_widen_umult_odd_v16qi"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
                       (match_operand:V16QI 2 "register_operand" "v")]
@@ -981,7 +981,7 @@
   "vmuloub %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "altivec_vmulosb"
+(define_insn "vec_widen_smult_odd_v16qi"
   [(set (match_operand:V8HI 0 "register_operand" "=v")
         (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
                       (match_operand:V16QI 2 "register_operand" "v")]
@@ -990,7 +990,7 @@
   "vmulosb %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "altivec_vmulouh"
+(define_insn "vec_widen_umult_odd_v8hi"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
                       (match_operand:V8HI 2 "register_operand" "v")]
@@ -999,7 +999,7 @@
   "vmulouh %0,%1,%2"
   [(set_attr "type" "veccomplex")])
 
-(define_insn "altivec_vmulosh"
+(define_insn "vec_widen_smult_odd_v8hi"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
                       (match_operand:V8HI 2 "register_operand" "v")]
@@ -2175,8 +2175,8 @@
   rtx ve = gen_reg_rtx (V8HImode);
   rtx vo = gen_reg_rtx (V8HImode);
   
-  emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_even_v16qi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_odd_v16qi (vo, operands[1], operands[2]));
   emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
   DONE;
 }")
@@ -2192,8 +2192,8 @@
   rtx ve = gen_reg_rtx (V8HImode);
   rtx vo = gen_reg_rtx (V8HImode);
   
-  emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_even_v16qi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_odd_v16qi (vo, operands[1], operands[2]));
   emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
   DONE;
 }")
@@ -2209,8 +2209,8 @@
   rtx ve = gen_reg_rtx (V8HImode);
   rtx vo = gen_reg_rtx (V8HImode);
   
-  emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_smult_even_v16qi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_smult_odd_v16qi (vo, operands[1], operands[2]));
   emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
   DONE;
 }")
@@ -2226,8 +2226,8 @@
   rtx ve = gen_reg_rtx (V8HImode);
   rtx vo = gen_reg_rtx (V8HImode);
   
-  emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_smult_even_v16qi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_smult_odd_v16qi (vo, operands[1], operands[2]));
   emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
   DONE;
 }")
@@ -2243,8 +2243,8 @@
   rtx ve = gen_reg_rtx (V4SImode);
   rtx vo = gen_reg_rtx (V4SImode);
   
-  emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
   emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
   DONE;
 }")
@@ -2260,8 +2260,8 @@
   rtx ve = gen_reg_rtx (V4SImode);
   rtx vo = gen_reg_rtx (V4SImode);
   
-  emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
   emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
   DONE;
 }")
@@ -2277,8 +2277,8 @@
   rtx ve = gen_reg_rtx (V4SImode);
   rtx vo = gen_reg_rtx (V4SImode);
   
-  emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
   emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
   DONE;
 }")
@@ -2294,8 +2294,8 @@
   rtx ve = gen_reg_rtx (V4SImode);
   rtx vo = gen_reg_rtx (V4SImode);
   
-  emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
-  emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
   emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
   DONE;
 }")
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 2fd51af..8ef4b05 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -503,18 +503,18 @@ BU_ALTIVEC_2 (VMINSH,	      "vminsh",		CONST,	sminv8hi3)
 BU_ALTIVEC_2 (VMINUW,	      "vminuw",		CONST,	uminv4si3)
 BU_ALTIVEC_2 (VMINSW,	      "vminsw",		CONST,	sminv4si3)
 BU_ALTIVEC_2 (VMINFP,	      "vminfp",		CONST,	sminv4sf3)
-BU_ALTIVEC_2 (VMULEUB,	      "vmuleub",	CONST,	altivec_vmuleub)
-BU_ALTIVEC_2 (VMULEUB_UNS,    "vmuleub_uns",	CONST,	altivec_vmuleub)
-BU_ALTIVEC_2 (VMULESB,	      "vmulesb",	CONST,	altivec_vmulesb)
-BU_ALTIVEC_2 (VMULEUH,	      "vmuleuh",	CONST,	altivec_vmuleuh)
-BU_ALTIVEC_2 (VMULEUH_UNS,    "vmuleuh_uns",	CONST,	altivec_vmuleuh)
-BU_ALTIVEC_2 (VMULESH,	      "vmulesh",	CONST,	altivec_vmulesh)
-BU_ALTIVEC_2 (VMULOUB,	      "vmuloub",	CONST,	altivec_vmuloub)
-BU_ALTIVEC_2 (VMULOUB_UNS,    "vmuloub_uns",	CONST,	altivec_vmuloub)
-BU_ALTIVEC_2 (VMULOSB,	      "vmulosb",	CONST,	altivec_vmulosb)
-BU_ALTIVEC_2 (VMULOUH,	      "vmulouh",	CONST,	altivec_vmulouh)
-BU_ALTIVEC_2 (VMULOUH_UNS,    "vmulouh_uns",	CONST,	altivec_vmulouh)
-BU_ALTIVEC_2 (VMULOSH,	      "vmulosh",	CONST,	altivec_vmulosh)
+BU_ALTIVEC_2 (VMULEUB,	      "vmuleub",	CONST,	vec_widen_umult_even_v16qi)
+BU_ALTIVEC_2 (VMULEUB_UNS,    "vmuleub_uns",	CONST,	vec_widen_umult_even_v16qi)
+BU_ALTIVEC_2 (VMULESB,	      "vmulesb",	CONST,	vec_widen_smult_even_v16qi)
+BU_ALTIVEC_2 (VMULEUH,	      "vmuleuh",	CONST,	vec_widen_umult_even_v8hi)
+BU_ALTIVEC_2 (VMULEUH_UNS,    "vmuleuh_uns",	CONST,	vec_widen_umult_even_v8hi)
+BU_ALTIVEC_2 (VMULESH,	      "vmulesh",	CONST,	vec_widen_smult_even_v8hi)
+BU_ALTIVEC_2 (VMULOUB,	      "vmuloub",	CONST,	vec_widen_umult_odd_v16qi)
+BU_ALTIVEC_2 (VMULOUB_UNS,    "vmuloub_uns",	CONST,	vec_widen_umult_odd_v16qi)
+BU_ALTIVEC_2 (VMULOSB,	      "vmulosb",	CONST,	vec_widen_smult_odd_v16qi)
+BU_ALTIVEC_2 (VMULOUH,	      "vmulouh",	CONST,	vec_widen_umult_odd_v8hi)
+BU_ALTIVEC_2 (VMULOUH_UNS,    "vmulouh_uns",	CONST,	vec_widen_umult_odd_v8hi)
+BU_ALTIVEC_2 (VMULOSH,	      "vmulosh",	CONST,	vec_widen_smult_odd_v8hi)
 BU_ALTIVEC_2 (VNOR,	      "vnor",		CONST,	norv4si3)
 BU_ALTIVEC_2 (VOR,	      "vor",		CONST,	iorv4si3)
 BU_ALTIVEC_2 (VPKUHUM,	      "vpkuhum",	CONST,	altivec_vpkuhum)
-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 7/7] Zap now unused builtin_mul_widen_even/odd target hooks
  2012-07-10  8:23 [PATCH 0/7] Clean up widen mult even/odd Richard Henderson
                   ` (4 preceding siblings ...)
  2012-07-10  8:23 ` [PATCH 5/7] Move vector highpart emulation to the optabs layer Richard Henderson
@ 2012-07-10  8:23 ` Richard Henderson
  2012-07-10  8:23 ` [PATCH 6/7] Use VEC_WIDEN_MULT_EVEN/ODD_EXPR in supportable_widening_operation Richard Henderson
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2012-07-10  8:23 UTC (permalink / raw)
  To: gcc-patches

        * target.def (builtin_mul_widen_even, builtin_mul_widen_odd): Remove.
        * system.h (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN): Poison.
        (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD): Poison.
        * config/i386/i386.c (IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI): Remove.
        (IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI): Remove.
        (IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI): Remove.
        (IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI): Remove.
        (IX86_BUILTIN_VEC_WIDEN_SMUL_EVEN_V4SI): Remove.
        (IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI): Remove.
        (IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI): Remove.
        (bdesc_args): Remove entries to match.
        (ix86_builtin_mul_widen_even, ix86_builtin_mul_widen_odd): Remove.
        (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN): Remove.
        (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD): Remove.
        * config/rs6000/rs6000.c (rs6000_builtin_mul_widen_even): Remove.
        (rs6000_builtin_mul_widen_odd): Remove.
        (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN): Remove.
        (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD): Remove.
        * config/spu/spu.c (spu_builtin_mul_widen_even): Remove.
        (spu_builtin_mul_widen_odd): Remove.
        (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN): Remove.
        (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD): Remove.
        * doc/tm.texi.in: Don't document the removed hooks.
---
 gcc/ChangeLog              |   24 ++++++++++++++
 gcc/config/i386/i386.c     |   76 --------------------------------------------
 gcc/config/rs6000/rs6000.c |   51 -----------------------------
 gcc/config/spu/spu.c       |   42 ------------------------
 gcc/doc/tm.texi            |   22 -------------
 gcc/doc/tm.texi.in         |   22 -------------
 gcc/system.h               |    4 ++-
 gcc/target.def             |   14 --------
 8 files changed, 27 insertions(+), 228 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3cb34ce..23abe01 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -25754,14 +25754,6 @@ enum ix86_builtins
   IX86_BUILTIN_CPYSGNPS256,
   IX86_BUILTIN_CPYSGNPD256,
 
-  IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI,
-  IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI,
-  IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI,
-  IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI,
-  IX86_BUILTIN_VEC_WIDEN_SMUL_EVEN_V4SI,
-  IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI,
-  IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI,
-
   /* FMA4 instructions.  */
   IX86_BUILTIN_VFMADDSS,
   IX86_BUILTIN_VFMADDSD,
@@ -26620,10 +26612,6 @@ static const struct builtin_description bdesc_args[] =
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_vw_umul_even_v4si", IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI, UNKNOWN, (int) V2UDI_FTYPE_V4USI_V4USI },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_smult_even_v4si, "__builtin_ia32_vw_smul_even_v4si", IX86_BUILTIN_VEC_WIDEN_SMUL_EVEN_V4SI, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_odd_v4si, "__builtin_ia32_vw_umul_odd_v4si", IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI, UNKNOWN, (int) V2UDI_FTYPE_V4USI_V4USI },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_smult_odd_v4si, "__builtin_ia32_vw_smul_odd_v4si", IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
 
@@ -27016,15 +27004,12 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2  , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2  , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
-  { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_odd_v8si, "__builtin_ia32_vw_smul_odd_v8si", IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256"  , IX86_BUILTIN_PMULHW256  , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256"  , IX86_BUILTIN_PMULLW256  , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256"  , IX86_BUILTIN_PMULLD256  , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
-  { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_i386_vw_umul_even_v8si", IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI, UNKNOWN, (int) V4UDI_FTYPE_V8USI_V8USI },
-  { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_odd_v8si, "__builtin_ia32_vw_umul_odd_v8si", IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI, UNKNOWN, (int) V4UDI_FTYPE_V8USI_V8USI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_pshufbv32qi3, "__builtin_ia32_pshufb256", IX86_BUILTIN_PSHUFB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI },
@@ -31063,62 +31048,6 @@ ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
 	return NULL_TREE;
       }
 }
-
-static tree
-ix86_builtin_mul_widen_even (tree type)
-{
-  bool uns_p = TYPE_UNSIGNED (type);
-  enum ix86_builtins code;
-
-  switch (TYPE_MODE (type))
-    {
-    case V4SImode:
-      if (!TARGET_SSE2)
-	return NULL;
-      code = (uns_p ? IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI
-	      : IX86_BUILTIN_VEC_WIDEN_SMUL_EVEN_V4SI);
-      break;
-
-    case V8SImode:
-      if (!TARGET_AVX2)
-	return NULL;
-      code = (uns_p ? IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI
-	      : IX86_BUILTIN_PMULDQ256);
-      break;
-
-    default:
-      return NULL;
-    }
-  return ix86_builtins[code];
-}
-
-static tree
-ix86_builtin_mul_widen_odd (tree type)
-{
-  bool uns_p = TYPE_UNSIGNED (type);
-  enum ix86_builtins code;
-
-  switch (TYPE_MODE (type))
-    {
-    case V4SImode:
-      if (!TARGET_SSE2)
-	return NULL;
-      code = (uns_p ? IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI
-	      : IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI);
-      break;
-
-    case V8SImode:
-      if (!TARGET_AVX2)
-	return NULL;
-      code = (uns_p ? IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI
-	      : IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI);
-      break;
-
-    default:
-      return NULL;
-    }
-  return ix86_builtins[code];
-}
 \f
 /* Helper for avx_vpermilps256_operand et al.  This is also used by
    the expansion functions to turn the parallel back into a mask.
@@ -40204,11 +40133,6 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
 #undef TARGET_VECTORIZE_BUILTIN_GATHER
 #define TARGET_VECTORIZE_BUILTIN_GATHER ix86_vectorize_builtin_gather
 
-#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
-#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN ix86_builtin_mul_widen_even
-#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
-#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD ix86_builtin_mul_widen_odd
-
 #undef TARGET_BUILTIN_RECIPROCAL
 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index c124c63..1d0e247 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1273,10 +1273,6 @@ static const struct attribute_spec rs6000_attribute_table[] =
 
 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
-#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
-#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN rs6000_builtin_mul_widen_even
-#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
-#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD rs6000_builtin_mul_widen_odd
 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT		\
   rs6000_builtin_support_vector_misalignment
@@ -3285,53 +3281,6 @@ rs6000_loop_align_max_skip (rtx label)
   return (1 << rs6000_loop_align (label)) - 1;
 }
 
-/* Implement targetm.vectorize.builtin_mul_widen_even.  */
-static tree
-rs6000_builtin_mul_widen_even (tree type)
-{
-  if (!TARGET_ALTIVEC)
-    return NULL_TREE;
-
-  switch (TYPE_MODE (type))
-    {
-    case V8HImode:
-      return TYPE_UNSIGNED (type)
-            ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULEUH_UNS]
-            : rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULESH];
-
-    case V16QImode:
-      return TYPE_UNSIGNED (type)
-            ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULEUB_UNS]
-            : rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULESB];
-    default:
-      return NULL_TREE;
-    }
-}
-
-/* Implement targetm.vectorize.builtin_mul_widen_odd.  */
-static tree
-rs6000_builtin_mul_widen_odd (tree type)
-{
-  if (!TARGET_ALTIVEC)
-    return NULL_TREE;
-
-  switch (TYPE_MODE (type))
-    {
-    case V8HImode:
-      return TYPE_UNSIGNED (type)
-            ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOUH_UNS]
-            : rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOSH];
-
-    case V16QImode:
-      return TYPE_UNSIGNED (type)
-            ? rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOUB_UNS]
-            : rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOSB];
-    default:
-      return NULL_TREE;
-    }
-}
-
-
 /* Return true iff, data reference of TYPE can reach vector alignment (16)
    after applying N number of iterations.  This routine does not determine
    how may iterations are required to reach desired alignment.  */
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index 3e0135f..7ca40a0 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -211,8 +211,6 @@ static void spu_encode_section_info (tree, rtx, int);
 static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
 static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
 					      addr_space_t);
-static tree spu_builtin_mul_widen_even (tree);
-static tree spu_builtin_mul_widen_odd (tree);
 static tree spu_builtin_mask_for_load (void);
 static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
 static bool spu_vector_alignment_reachable (const_tree, bool);
@@ -431,12 +429,6 @@ static void spu_setup_incoming_varargs (cumulative_args_t cum,
 #undef  TARGET_ENCODE_SECTION_INFO
 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
 
-#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
-#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
-
-#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
-#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
-
 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
 
@@ -6863,40 +6855,6 @@ spu_expand_builtin (tree exp,
   abort ();
 }
 
-/* Implement targetm.vectorize.builtin_mul_widen_even.  */
-static tree
-spu_builtin_mul_widen_even (tree type)
-{
-  switch (TYPE_MODE (type))
-    {
-    case V8HImode:
-      if (TYPE_UNSIGNED (type))
-	return spu_builtin_decls[SPU_MULE_0];
-      else
-	return spu_builtin_decls[SPU_MULE_1];
-      break;
-    default:
-      return NULL_TREE;
-    }
-}
-
-/* Implement targetm.vectorize.builtin_mul_widen_odd.  */
-static tree
-spu_builtin_mul_widen_odd (tree type)
-{
-  switch (TYPE_MODE (type))
-    {
-    case V8HImode:
-      if (TYPE_UNSIGNED (type))
-	return spu_builtin_decls[SPU_MULO_1];
-      else
-	return spu_builtin_decls[SPU_MULO_0]; 
-      break;
-    default:
-      return NULL_TREE;
-    }
-}
-
 /* Implement targetm.vectorize.builtin_mask_for_load.  */
 static tree
 spu_builtin_mask_for_load (void)
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 4ec2469..bbf2aff 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -5645,28 +5645,6 @@ the argument @var{OFF} to @code{REALIGN_LOAD}, in which case the low
 log2(@var{VS}) @minus{} 1 bits of @var{addr} will be considered.
 @end deftypefn
 
-@hook TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
-This hook should return the DECL of a function @var{f} that implements
-widening multiplication of the even elements of two input vectors of type @var{x}.
-
-If this hook is defined, the autovectorizer will use it along with the
-@code{TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD} target hook when vectorizing
-widening multiplication in cases that the order of the results does not have to be
-preserved (e.g.@: used only by a reduction computation). Otherwise, the
-@code{widen_mult_hi/lo} idioms will be used.
-@end deftypefn
-
-@hook TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
-This hook should return the DECL of a function @var{f} that implements
-widening multiplication of the odd elements of two input vectors of type @var{x}.
-
-If this hook is defined, the autovectorizer will use it along with the
-@code{TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN} target hook when vectorizing
-widening multiplication in cases that the order of the results does not have to be
-preserved (e.g.@: used only by a reduction computation). Otherwise, the
-@code{widen_mult_hi/lo} idioms will be used.
-@end deftypefn
-
 @hook TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 Returns cost of different scalar or vector statements for vectorization cost model.
 For vector memory operations the cost may depend on type (@var{vectype}) and
diff --git a/gcc/system.h b/gcc/system.h
index f8b5232..d2c7690 100644
--- a/gcc/system.h
+++ b/gcc/system.h
@@ -909,7 +909,9 @@ extern void fancy_abort (const char *, int, const char *) ATTRIBUTE_NORETURN;
 	LANG_HOOKS_MISSING_ARGUMENT LANG_HOOKS_HASH_TYPES \
 	TARGET_HANDLE_OFAST TARGET_OPTION_OPTIMIZATION \
 	TARGET_IRA_COVER_CLASSES TARGET_HELP \
-	TARGET_HANDLE_PRAGMA_EXTERN_PREFIX
+	TARGET_HANDLE_PRAGMA_EXTERN_PREFIX \
+	TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN \
+	TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD \
 
 /* Arrays that were deleted in favor of a functional interface.  */
  #pragma GCC poison built_in_decls implicit_built_in_decls
diff --git a/gcc/target.def b/gcc/target.def
index 051579f..42dc591 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -970,20 +970,6 @@ DEFHOOK
  tree, (unsigned code, tree dest_type, tree src_type),
  default_builtin_vectorized_conversion)
 
-/* Target builtin that implements vector widening multiplication.
-   builtin_mul_widen_eve computes the element-by-element products
-   for the even elements, and builtin_mul_widen_odd computes the
-   element-by-element products for the odd elements.  */
-DEFHOOK
-(builtin_mul_widen_even,
- "",
- tree, (tree x), NULL)
-
-DEFHOOK
-(builtin_mul_widen_odd,
- "",
- tree, (tree x), NULL)
-
 /* Cost of different vector/scalar statements in vectorization cost
    model. In case of misaligned vector loads and stores the cost depends
    on the data type and misalignment value.  */
-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 6/7] Use VEC_WIDEN_MULT_EVEN/ODD_EXPR in supportable_widening_operation
  2012-07-10  8:23 [PATCH 0/7] Clean up widen mult even/odd Richard Henderson
                   ` (5 preceding siblings ...)
  2012-07-10  8:23 ` [PATCH 7/7] Zap now unused builtin_mul_widen_even/odd target hooks Richard Henderson
@ 2012-07-10  8:23 ` Richard Henderson
  2012-07-10  9:03 ` [PATCH 0/7] Clean up widen mult even/odd Richard Guenther
  2012-07-10  9:12 ` Jakub Jelinek
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2012-07-10  8:23 UTC (permalink / raw)
  To: gcc-patches

        * tree-vect-stmts.c (supportable_widening_operation): Expand
        WIDEN_MULT_EXPR via VEC_WIDEN_MULT_EVEN/ODD_EXPR if possible.
---
 gcc/ChangeLog         |    3 ++
 gcc/tree-vect-stmts.c |   96 +++++++++++++++++++++++++------------------------
 2 files changed, 53 insertions(+), 46 deletions(-)

diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 9caf1c6..fe6a997 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -6199,7 +6199,8 @@ vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
 bool
 supportable_widening_operation (enum tree_code code, gimple stmt,
 				tree vectype_out, tree vectype_in,
-                                tree *decl1, tree *decl2,
+                                tree *decl1 ATTRIBUTE_UNUSED,
+				tree *decl2 ATTRIBUTE_UNUSED,
                                 enum tree_code *code1, enum tree_code *code2,
                                 int *multi_step_cvt,
                                 VEC (tree, heap) **interm_types)
@@ -6207,7 +6208,6 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
   stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
   loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info);
   struct loop *vect_loop = NULL;
-  bool ordered_p;
   enum machine_mode vec_mode;
   enum insn_code icode1, icode2;
   optab optab1, optab2;
@@ -6223,56 +6223,60 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
   if (loop_info)
     vect_loop = LOOP_VINFO_LOOP (loop_info);
 
-  /* The result of a vectorized widening operation usually requires two vectors
-     (because the widened results do not fit into one vector). The generated
-     vector results would normally be expected to be generated in the same
-     order as in the original scalar computation, i.e. if 8 results are
-     generated in each vector iteration, they are to be organized as follows:
-        vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
-
-     However, in the special case that the result of the widening operation is
-     used in a reduction computation only, the order doesn't matter (because
-     when vectorizing a reduction we change the order of the computation).
-     Some targets can take advantage of this and generate more efficient code.
-     For example, targets like Altivec, that support widen_mult using a sequence
-     of {mult_even,mult_odd} generate the following vectors:
-        vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8].
-
-     When vectorizing outer-loops, we execute the inner-loop sequentially
-     (each vectorized inner-loop iteration contributes to VF outer-loop
-     iterations in parallel).  We therefore don't allow to change the order
-     of the computation in the inner-loop during outer-loop vectorization.  */
-
-   if (vect_loop
-       && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
-       && !nested_in_vect_loop_p (vect_loop, stmt))
-     ordered_p = false;
-   else
-     ordered_p = true;
-
-  if (!ordered_p
-      && code == WIDEN_MULT_EXPR
-      && targetm.vectorize.builtin_mul_widen_even
-      && targetm.vectorize.builtin_mul_widen_even (vectype)
-      && targetm.vectorize.builtin_mul_widen_odd
-      && targetm.vectorize.builtin_mul_widen_odd (vectype))
-    {
-      if (vect_print_dump_info (REPORT_DETAILS))
-        fprintf (vect_dump, "Unordered widening operation detected.");
-
-      *code1 = *code2 = CALL_EXPR;
-      *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
-      *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
-      return true;
-    }
-
   switch (code)
     {
     case WIDEN_MULT_EXPR:
+      /* The result of a vectorized widening operation usually requires
+	 two vectors (because the widened results do not fit into one vector).
+	 The generated vector results would normally be expected to be
+	 generated in the same order as in the original scalar computation,
+	 i.e. if 8 results are generated in each vector iteration, they are
+	 to be organized as follows:
+		vect1: [res1,res2,res3,res4],
+		vect2: [res5,res6,res7,res8].
+
+	 However, in the special case that the result of the widening
+	 operation is used in a reduction computation only, the order doesn't
+	 matter (because when vectorizing a reduction we change the order of
+	 the computation).  Some targets can take advantage of this and
+	 generate more efficient code.  For example, targets like Altivec,
+	 that support widen_mult using a sequence of {mult_even,mult_odd}
+	 generate the following vectors:
+		vect1: [res1,res3,res5,res7],
+		vect2: [res2,res4,res6,res8].
+
+	 When vectorizing outer-loops, we execute the inner-loop sequentially
+	 (each vectorized inner-loop iteration contributes to VF outer-loop
+	 iterations in parallel).  We therefore don't allow to change the
+	 order of the computation in the inner-loop during outer-loop
+	 vectorization.  */
+      /* TODO: Another case in which order doesn't *really* matter is when we
+	 widen and then contract again, e.g. (short)((int)x * y >> 8).
+	 Normally, pack_trunc performs an even/odd permute, whereas the 
+	 repack from an even/odd expansion would be an interleave, which
+	 would be significantly simpler for e.g. AVX2.  */
+      /* In any case, in order to avoid duplicating the code below, recurse
+	 on VEC_WIDEN_MULT_EVEN_EXPR.  If it succeeds, all the return values
+	 are properly set up for the caller.  If we fail, we'll continue with
+	 a VEC_WIDEN_MULT_LO/HI_EXPR check.  */
+      if (vect_loop
+	  && STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction
+	  && !nested_in_vect_loop_p (vect_loop, stmt)
+	  && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
+					     stmt, vectype_out, vectype_in,
+					     NULL, NULL, code1, code2,
+					     multi_step_cvt, interm_types))
+	return true;
       c1 = VEC_WIDEN_MULT_LO_EXPR;
       c2 = VEC_WIDEN_MULT_HI_EXPR;
       break;
 
+    case VEC_WIDEN_MULT_EVEN_EXPR:
+      /* Support the recursion induced just above.  */
+      c1 = VEC_WIDEN_MULT_EVEN_EXPR;
+      c2 = VEC_WIDEN_MULT_ODD_EXPR;
+      break;
+
     case WIDEN_LSHIFT_EXPR:
       c1 = VEC_WIDEN_LSHIFT_LO_EXPR;
       c2 = VEC_WIDEN_LSHIFT_HI_EXPR;
@@ -6298,7 +6302,7 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
       gcc_unreachable ();
     }
 
-  if (BYTES_BIG_ENDIAN)
+  if (BYTES_BIG_ENDIAN && c1 != VEC_WIDEN_MULT_EVEN_EXPR)
     {
       enum tree_code ctmp = c1;
       c1 = c2;
-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 2/7] i386: Rename patterns for vec_widen_<s>mult_even/odd_<mode>
  2012-07-10  8:23 [PATCH 0/7] Clean up widen mult even/odd Richard Henderson
  2012-07-10  8:23 ` [PATCH 4/7] spu: Rename patterns for vec_widen_<s>mult_even/odd_<mode> Richard Henderson
  2012-07-10  8:23 ` [PATCH 1/7] Add VEC_WIDEN_MULT_EVEN/ODD_EXPR Richard Henderson
@ 2012-07-10  8:23 ` Richard Henderson
  2012-07-10  8:23 ` [PATCH 3/7] rs6000: " Richard Henderson
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2012-07-10  8:23 UTC (permalink / raw)
  To: gcc-patches

        * config/i386/sse.md (vec_widen_umult_even_v8si): Rename from
        avx2_umulv4siv4di3.
        (vec_widen_umult_even_v4si): Rename from sse2_umulv2siv2di3.
        (vec_widen_smult_even_v8si): Rename from avx2_mulv4siv4di3.
        (mulv4si3): Remove XOP test shadowed by SSE4 test.
        * config/i386/i386.c (bdesc_args): Update pattern names.
        (ix86_expand_sse2_mulvxdi3): Likewise.
        (ix86_expand_mul_widen_evenodd): Likewise.  Remove XOP test
        shadowed by SSE4 test.
---
 gcc/ChangeLog          |   10 ++++++++++
 gcc/config/i386/i386.c |   31 +++++++++++++------------------
 gcc/config/i386/sse.md |   18 ++++++------------
 3 files changed, 29 insertions(+), 30 deletions(-)

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index fbab32f..3cb34ce 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -26619,8 +26619,8 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
 
   { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
-  { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_vw_umul_even_v4si", IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI, UNKNOWN, (int) V2UDI_FTYPE_V4USI_V4USI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
+  { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_vw_umul_even_v4si", IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V4SI, UNKNOWN, (int) V2UDI_FTYPE_V4USI_V4USI },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_smult_even_v4si, "__builtin_ia32_vw_smul_even_v4si", IX86_BUILTIN_VEC_WIDEN_SMUL_EVEN_V4SI, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_odd_v4si, "__builtin_ia32_vw_umul_odd_v4si", IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V4SI, UNKNOWN, (int) V2UDI_FTYPE_V4USI_V4USI },
   { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_smult_odd_v4si, "__builtin_ia32_vw_smul_odd_v4si", IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V4SI, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
@@ -27015,15 +27015,15 @@ static const struct builtin_description bdesc_args[] =
   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv8hiv8si2  , "__builtin_ia32_pmovzxwd256", IX86_BUILTIN_PMOVZXWD256, UNKNOWN, (int) V8SI_FTYPE_V8HI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4hiv4di2  , "__builtin_ia32_pmovzxwq256", IX86_BUILTIN_PMOVZXWQ256, UNKNOWN, (int) V4DI_FTYPE_V8HI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_zero_extendv4siv4di2  , "__builtin_ia32_pmovzxdq256", IX86_BUILTIN_PMOVZXDQ256, UNKNOWN, (int) V4DI_FTYPE_V4SI },
-  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_mulv4siv4di3  , "__builtin_ia32_pmuldq256"  , IX86_BUILTIN_PMULDQ256  , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_even_v8si, "__builtin_ia32_pmuldq256", IX86_BUILTIN_PMULDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_smult_odd_v8si, "__builtin_ia32_vw_smul_odd_v8si", IX86_BUILTIN_VEC_WIDEN_SMUL_ODD_V8SI, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulhrswv16hi3 , "__builtin_ia32_pmulhrsw256", IX86_BUILTIN_PMULHRSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_umulv16hi3_highpart, "__builtin_ia32_pmulhuw256" , IX86_BUILTIN_PMULHUW256 , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_smulv16hi3_highpart, "__builtin_ia32_pmulhw256"  , IX86_BUILTIN_PMULHW256  , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv16hi3, "__builtin_ia32_pmullw256"  , IX86_BUILTIN_PMULLW256  , UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_mulv8si3, "__builtin_ia32_pmulld256"  , IX86_BUILTIN_PMULLD256  , UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI },
-  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3  , "__builtin_ia32_pmuludq256" , IX86_BUILTIN_PMULUDQ256 , UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
-  { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_umulv4siv4di3  , "__builtin_i386_vw_umul_even_v8si" , IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI, UNKNOWN, (int) V4UDI_FTYPE_V8USI_V8USI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_ia32_pmuludq256", IX86_BUILTIN_PMULUDQ256, UNKNOWN, (int) V4DI_FTYPE_V8SI_V8SI },
+  { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_even_v8si, "__builtin_i386_vw_umul_even_v8si", IX86_BUILTIN_VEC_WIDEN_UMUL_EVEN_V8SI, UNKNOWN, (int) V4UDI_FTYPE_V8USI_V8USI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_vec_widen_umult_odd_v8si, "__builtin_ia32_vw_umul_odd_v8si", IX86_BUILTIN_VEC_WIDEN_UMUL_ODD_V8SI, UNKNOWN, (int) V4UDI_FTYPE_V8USI_V8USI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_iorv4di3, "__builtin_ia32_por256", IX86_BUILTIN_POR256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI },
   { OPTION_MASK_ISA_AVX2, CODE_FOR_avx2_psadbw, "__builtin_ia32_psadbw256", IX86_BUILTIN_PSADBW256, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI },
@@ -38803,19 +38803,14 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
   if (mode == V8SImode)
     {
       if (uns_p)
-	x = gen_avx2_umulv4siv4di3 (dest, op1, op2);
+	x = gen_vec_widen_umult_even_v8si (dest, op1, op2);
       else
-	x = gen_avx2_mulv4siv4di3 (dest, op1, op2);
+	x = gen_vec_widen_smult_even_v8si (dest, op1, op2);
     }
   else if (uns_p)
-    x = gen_sse2_umulv2siv2di3 (dest, op1, op2);
+    x = gen_vec_widen_umult_even_v4si (dest, op1, op2);
   else if (TARGET_SSE4_1)
     x = gen_sse4_1_mulv2siv2di3 (dest, op1, op2);
-  else if (TARGET_XOP)
-    {
-      x = force_reg (wmode, CONST0_RTX (wmode));
-      x = gen_xop_pmacsdql (dest, op1, op2, x);
-    }
   else
     {
       rtx s1, s2, t0, t1, t2;
@@ -38833,12 +38828,12 @@ ix86_expand_mul_widen_evenodd (rtx dest, rtx op1, rtx op2,
       /* Multiply LO(A) * HI(B), and vice-versa.  */
       t1 = gen_reg_rtx (wmode);
       t2 = gen_reg_rtx (wmode);
-      emit_insn (gen_sse2_umulv2siv2di3 (t1, s1, op2));
-      emit_insn (gen_sse2_umulv2siv2di3 (t2, s2, op1));
+      emit_insn (gen_vec_widen_umult_even_v4si (t1, s1, op2));
+      emit_insn (gen_vec_widen_umult_even_v4si (t2, s2, op1));
 
       /* Multiply LO(A) * LO(B).  */
       t0 = gen_reg_rtx (wmode);
-      emit_insn (gen_sse2_umulv2siv2di3 (t0, op1, op2));
+      emit_insn (gen_vec_widen_umult_even_v4si (t0, op1, op2));
 
       /* Combine and shift the highparts into place.  */
       t1 = expand_binop (wmode, add_optab, t1, t2, t1, 1, OPTAB_DIRECT);
@@ -39013,12 +39008,12 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
 
       if (mode == V2DImode)
 	{
-	  umul = gen_sse2_umulv2siv2di3;
+	  umul = gen_vec_widen_umult_even_v4si;
 	  nmode = V4SImode;
 	}
       else if (mode == V4DImode)
 	{
-	  umul = gen_avx2_umulv4siv4di3;
+	  umul = gen_vec_widen_umult_even_v8si;
 	  nmode = V8SImode;
 	}
       else
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 754b8b4..532ebdd 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -5277,7 +5277,7 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "avx2_umulv4siv4di3"
+(define_expand "vec_widen_umult_even_v8si"
   [(set (match_operand:V4DI 0 "register_operand")
 	(mult:V4DI
 	  (zero_extend:V4DI
@@ -5293,7 +5293,7 @@
   "TARGET_AVX2"
   "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
 
-(define_insn "*avx_umulv4siv4di3"
+(define_insn "*vec_widen_umult_even_v8si"
   [(set (match_operand:V4DI 0 "register_operand" "=x")
 	(mult:V4DI
 	  (zero_extend:V4DI
@@ -5312,7 +5312,7 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "OI")])
 
-(define_expand "sse2_umulv2siv2di3"
+(define_expand "vec_widen_umult_even_v4si"
   [(set (match_operand:V2DI 0 "register_operand")
 	(mult:V2DI
 	  (zero_extend:V2DI
@@ -5326,7 +5326,7 @@
   "TARGET_SSE2"
   "ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);")
 
-(define_insn "*sse2_umulv2siv2di3"
+(define_insn "*vec_widen_umult_even_v4si"
   [(set (match_operand:V2DI 0 "register_operand" "=x,x")
 	(mult:V2DI
 	  (zero_extend:V2DI
@@ -5347,7 +5347,7 @@
    (set_attr "prefix" "orig,vex")
    (set_attr "mode" "TI")])
 
-(define_expand "avx2_mulv4siv4di3"
+(define_expand "vec_widen_smult_even_v8si"
   [(set (match_operand:V4DI 0 "register_operand")
 	(mult:V4DI
 	  (sign_extend:V4DI
@@ -5363,7 +5363,7 @@
   "TARGET_AVX2"
   "ix86_fixup_binary_operands_no_copy (MULT, V8SImode, operands);")
 
-(define_insn "*avx2_mulv4siv4di3"
+(define_insn "*vec_widen_smult_even_v8si"
   [(set (match_operand:V4DI 0 "register_operand" "=x")
 	(mult:V4DI
 	  (sign_extend:V4DI
@@ -5564,12 +5564,6 @@
 	operands[2] = force_const_mem (<MODE>mode, operands[2]);
       ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);
     }
-  else if (TARGET_XOP)
-    {
-      rtx z = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
-      emit_insn (gen_xop_pmacsdd (operands[0], operands[1], operands[2], z));
-      DONE;
-    }
   else
     {
       ix86_expand_sse2_mulv4si3 (operands[0], operands[1], operands[2]);
-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* [PATCH 4/7] spu: Rename patterns for vec_widen_<s>mult_even/odd_<mode>
  2012-07-10  8:23 [PATCH 0/7] Clean up widen mult even/odd Richard Henderson
@ 2012-07-10  8:23 ` Richard Henderson
  2012-07-10  8:23 ` [PATCH 1/7] Add VEC_WIDEN_MULT_EVEN/ODD_EXPR Richard Henderson
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2012-07-10  8:23 UTC (permalink / raw)
  To: gcc-patches

        * config/spu/spu-builtins.md (spu_mpy): Move to spu.md.
        (spu_mpyu, spu_mpyhhu, spu_mpyhh): Likewise.
        * config/spu/spu.md (vec_widen_smult_odd_v8hi): Rename from spu_mpy.
        (vec_widen_umult_odd_v8hi): Rename from spu_mpyu.
        (vec_widen_smult_even_v8hi): Rename from spu_mpyhh.
        (vec_widen_umult_even_v8hi): Rename from spu_mpyhhu.
        * config/spu/spu-builtins.def: Update pattern names to match.
---
 gcc/ChangeLog                   |    8 ++++
 gcc/config/spu/spu-builtins.def |   24 +++++------
 gcc/config/spu/spu-builtins.md  |   65 -----------------------------
 gcc/config/spu/spu.md           |   86 ++++++++++++++++++++++++++++++++++-----
 4 files changed, 95 insertions(+), 88 deletions(-)

diff --git a/gcc/config/spu/spu-builtins.def b/gcc/config/spu/spu-builtins.def
index 4d01d94..6095e9c 100644
--- a/gcc/config/spu/spu-builtins.def
+++ b/gcc/config/spu/spu-builtins.def
@@ -62,15 +62,15 @@ DEF_BUILTIN (SI_SFI,         CODE_FOR_spu_sf,        "si_sfi",         B_INSN,
 DEF_BUILTIN (SI_SFX,         CODE_FOR_spu_sfx,       "si_sfx",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
 DEF_BUILTIN (SI_BG,          CODE_FOR_spu_bg,        "si_bg",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
 DEF_BUILTIN (SI_BGX,         CODE_FOR_spu_bgx,       "si_bgx",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
-DEF_BUILTIN (SI_MPY,         CODE_FOR_spu_mpy,       "si_mpy",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
-DEF_BUILTIN (SI_MPYU,        CODE_FOR_spu_mpyu,      "si_mpyu",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
-DEF_BUILTIN (SI_MPYI,        CODE_FOR_spu_mpy,       "si_mpyi",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
-DEF_BUILTIN (SI_MPYUI,       CODE_FOR_spu_mpyu,      "si_mpyui",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_MPY, CODE_FOR_vec_widen_smult_odd_v8hi, "si_mpy",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYU, CODE_FOR_vec_widen_umult_odd_v8hi, "si_mpyu",    B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYI, CODE_FOR_vec_widen_smult_odd_v8hi, "si_mpyi",    B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
+DEF_BUILTIN (SI_MPYUI, CODE_FOR_vec_widen_umult_odd_v8hi, "si_mpyui",  B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_S10))
 DEF_BUILTIN (SI_MPYA,        CODE_FOR_spu_mpya,      "si_mpya",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
 DEF_BUILTIN (SI_MPYH,        CODE_FOR_spu_mpyh,      "si_mpyh",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
 DEF_BUILTIN (SI_MPYS,        CODE_FOR_spu_mpys,      "si_mpys",        B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
-DEF_BUILTIN (SI_MPYHH,       CODE_FOR_spu_mpyhh,     "si_mpyhh",       B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
-DEF_BUILTIN (SI_MPYHHU,      CODE_FOR_spu_mpyhhu,    "si_mpyhhu",      B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYHH, CODE_FOR_vec_widen_smult_even_v8hi, "si_mpyhh", B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_MPYHHU, CODE_FOR_vec_widen_umult_even_v8hi, "si_mpyhhu", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
 DEF_BUILTIN (SI_MPYHHA,      CODE_FOR_spu_mpyhha,    "si_mpyhha",      B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
 DEF_BUILTIN (SI_MPYHHAU,     CODE_FOR_spu_mpyhhau,   "si_mpyhhau",     B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
 DEF_BUILTIN (SI_CLZ,         CODE_FOR_clzv4si2,      "si_clz",         B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
@@ -295,16 +295,16 @@ DEF_BUILTIN (SPU_MHHADD,           CODE_FOR_nothing,       "spu_mhhadd",
 DEF_BUILTIN (SPU_MHHADD_0,         CODE_FOR_spu_mpyhhau,   "spu_mhhadd_0",         B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV4SI))
 DEF_BUILTIN (SPU_MHHADD_1,         CODE_FOR_spu_mpyhha,    "spu_mhhadd_1",         B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V4SI))
 DEF_BUILTIN (SPU_MULE,             CODE_FOR_nothing,       "spu_mule",             B_OVERLOAD, _A1(SPU_BTI_VOID))
-DEF_BUILTIN (SPU_MULE_0,           CODE_FOR_spu_mpyhhu,    "spu_mule_0",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
-DEF_BUILTIN (SPU_MULE_1,           CODE_FOR_spu_mpyhh,     "spu_mule_1",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_MULE_0, CODE_FOR_vec_widen_umult_even_v8hi, "spu_mule_0",         B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_MULE_1, CODE_FOR_vec_widen_smult_even_v8hi, "spu_mule_1",         B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
 DEF_BUILTIN (SPU_MUL,              CODE_FOR_nothing,       "spu_mul",              B_OVERLOAD, _A1(SPU_BTI_VOID))
 DEF_BUILTIN (SPU_MUL_0,            CODE_FOR_mulv4sf3,      "spu_mul_0",            B_INTERNAL, _A3(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
 DEF_BUILTIN (SPU_MUL_1,            CODE_FOR_mulv2df3,      "spu_mul_1",            B_INTERNAL, _A3(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
 DEF_BUILTIN (SPU_MULO,             CODE_FOR_nothing,       "spu_mulo",             B_OVERLOAD, _A1(SPU_BTI_VOID))
-DEF_BUILTIN (SPU_MULO_0,           CODE_FOR_spu_mpy,       "spu_mulo_0",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
-DEF_BUILTIN (SPU_MULO_1,           CODE_FOR_spu_mpyu,      "spu_mulo_1",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
-DEF_BUILTIN (SPU_MULO_2,           CODE_FOR_spu_mpy,       "spu_mulo_2",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_INTHI))
-DEF_BUILTIN (SPU_MULO_3,           CODE_FOR_spu_mpyu,      "spu_mulo_3",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
+DEF_BUILTIN (SPU_MULO_0, CODE_FOR_vec_widen_smult_odd_v8hi, "spu_mulo_0",          B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
+DEF_BUILTIN (SPU_MULO_1, CODE_FOR_vec_widen_umult_odd_v8hi, "spu_mulo_1",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
+DEF_BUILTIN (SPU_MULO_2, CODE_FOR_vec_widen_smult_odd_v8hi, "spu_mulo_2",          B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_INTHI))
+DEF_BUILTIN (SPU_MULO_3, CODE_FOR_vec_widen_umult_odd_v8hi, "spu_mulo_3",          B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
 DEF_BUILTIN (SPU_NMSUB,            CODE_FOR_nothing,       "spu_nmsub",            B_OVERLOAD, _A1(SPU_BTI_VOID))
 DEF_BUILTIN (SPU_NMSUB_0,          CODE_FOR_fnmav4sf4,     "spu_nmsub_0",          B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
 DEF_BUILTIN (SPU_NMSUB_1,          CODE_FOR_nfmsv2df4,     "spu_nmsub_1",          B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
diff --git a/gcc/config/spu/spu-builtins.md b/gcc/config/spu/spu-builtins.md
index ac3a333..6f7baad 100644
--- a/gcc/config/spu/spu-builtins.md
+++ b/gcc/config/spu/spu-builtins.md
@@ -197,41 +197,6 @@
   ""
   "")
 
-;; integer multiply
-(define_insn "spu_mpy"
-  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r,r")
-        (mult:V4SI
-	  (sign_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 1 "spu_reg_operand" "r,r")
-	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
-          (sign_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 2 "spu_arith_operand" "r,B")
-	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))))]
-  ""
-  "@
-   mpy\t%0,%1,%2
-   mpyi\t%0,%1,%2"
-  [(set_attr "type" "fp7")])
-
-(define_insn "spu_mpyu"
-  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r,r")
-        (mult:V4SI
-	  (zero_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 1 "spu_reg_operand" "r,r")
-	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
-          (zero_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 2 "spu_arith_operand" "r,B")
-	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))))]
-  ""
-  "@
-   mpyu\t%0,%1,%2
-   mpyui\t%0,%1,%2"
-  [(set_attr "type" "fp7")])
-
 (define_insn "spu_mpya"
   [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
         (plus:V4SI
@@ -283,36 +248,6 @@
   "mpys\t%0,%1,%2"
   [(set_attr "type" "fp7")])
 
-(define_insn "spu_mpyhhu"
-  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
-	(mult:V4SI
-	  (zero_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 1 "spu_reg_operand" "r")
-	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
-	  (zero_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 2 "spu_reg_operand" "r")
-	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))))]
-  ""
-  "mpyhhu\t%0,%1,%2"
-  [(set_attr "type" "fp7")])
-
-(define_insn "spu_mpyhh"
-  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
-	(mult:V4SI
-	  (sign_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 1 "spu_reg_operand" "r")
-	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
-	  (sign_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 2 "spu_reg_operand" "r")
-	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))))]
-  ""
-  "mpyhh\t%0,%1,%2"
-  [(set_attr "type" "fp7")])
-
 (define_insn "spu_mpyhhau"
   [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
         (plus:V4SI
diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md
index 03ed457..ee5fced 100644
--- a/gcc/config/spu/spu.md
+++ b/gcc/config/spu/spu.md
@@ -1387,8 +1387,8 @@
     rtx mask = gen_reg_rtx (V4SImode);
 
     emit_move_insn (mask, spu_const (V4SImode, 0x0000ffff));
-    emit_insn (gen_spu_mpyhh (high, operands[1], operands[2]));
-    emit_insn (gen_spu_mpy (low, operands[1], operands[2]));
+    emit_insn (gen_vec_widen_smult_even_v8hi (high, operands[1], operands[2]));
+    emit_insn (gen_vec_widen_smult_odd_v8hi (low, operands[1], operands[2]));
     emit_insn (gen_vashlv4si3 (shift, high, spu_const(V4SImode, 16)));
     emit_insn (gen_selb (result, shift, low, mask));
     DONE;
@@ -1482,7 +1482,7 @@
     rtx op2 = simplify_gen_subreg (V8HImode, operands[2], V4SImode, 0);
     emit_insn (gen_spu_mpyh(a, op1, op2));
     emit_insn (gen_spu_mpyh(b, op2, op1));
-    emit_insn (gen_spu_mpyu(c, op1, op2));
+    emit_insn (gen_vec_widen_umult_odd_v8hi (c, op1, op2));
     emit_insn (gen_addv4si3(d, a, b));
     emit_insn (gen_addv4si3(operands[0], d, c));
     DONE;
@@ -4612,6 +4612,70 @@ selb\t%0,%4,%0,%3"
   DONE;
 }")
 
+(define_insn "vec_widen_smult_odd_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r,r")
+        (mult:V4SI
+	  (sign_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 1 "spu_reg_operand" "r,r")
+	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
+          (sign_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 2 "spu_arith_operand" "r,B")
+	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))))]
+  ""
+  "@
+   mpy\t%0,%1,%2
+   mpyi\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "vec_widen_umult_odd_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r,r")
+        (mult:V4SI
+	  (zero_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 1 "spu_reg_operand" "r,r")
+	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))
+          (zero_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 2 "spu_arith_operand" "r,B")
+	      (parallel [(const_int 1)(const_int 3)(const_int 5)(const_int 7)])))))]
+  ""
+  "@
+   mpyu\t%0,%1,%2
+   mpyui\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "vec_widen_smult_even_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+	(mult:V4SI
+	  (sign_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 1 "spu_reg_operand" "r")
+	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
+	  (sign_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 2 "spu_reg_operand" "r")
+	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))))]
+  ""
+  "mpyhh\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
+(define_insn "vec_widen_umult_even_v8hi"
+  [(set (match_operand:V4SI 0 "spu_reg_operand" "=r")
+	(mult:V4SI
+	  (zero_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 1 "spu_reg_operand" "r")
+	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))
+	  (zero_extend:V4SI
+	    (vec_select:V4HI
+	      (match_operand:V8HI 2 "spu_reg_operand" "r")
+	      (parallel [(const_int 0)(const_int 2)(const_int 4)(const_int 6)])))))]
+  ""
+  "mpyhhu\t%0,%1,%2"
+  [(set_attr "type" "fp7")])
+
 (define_expand "vec_widen_umult_hi_v8hi"
   [(set (match_operand:V4SI 0 "register_operand"   "=r")
         (mult:V4SI
@@ -4634,8 +4698,8 @@ selb\t%0,%4,%0,%3"
     0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17};
   
   emit_move_insn (mask, array_to_constant (TImode, arr));
-  emit_insn (gen_spu_mpyhhu (ve, operands[1], operands[2]));
-  emit_insn (gen_spu_mpyu (vo, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
   emit_insn (gen_shufb (operands[0], ve, vo, mask));
   DONE;
 }")
@@ -4662,8 +4726,8 @@ selb\t%0,%4,%0,%3"
     0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F};
 
   emit_move_insn (mask, array_to_constant (TImode, arr));
-  emit_insn (gen_spu_mpyhhu (ve, operands[1], operands[2]));
-  emit_insn (gen_spu_mpyu (vo, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_even_v8hi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_umult_odd_v8hi (vo, operands[1], operands[2]));
   emit_insn (gen_shufb (operands[0], ve, vo, mask));
   DONE;
 }")
@@ -4690,8 +4754,8 @@ selb\t%0,%4,%0,%3"
     0x04, 0x05, 0x06, 0x07, 0x14, 0x15, 0x16, 0x17};
   
   emit_move_insn (mask, array_to_constant (TImode, arr));
-  emit_insn (gen_spu_mpyhh (ve, operands[1], operands[2]));
-  emit_insn (gen_spu_mpy (vo, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
   emit_insn (gen_shufb (operands[0], ve, vo, mask));
   DONE;
 }")
@@ -4718,8 +4782,8 @@ selb\t%0,%4,%0,%3"
     0x0C, 0x0D, 0x0E, 0x0F, 0x1C, 0x1D, 0x1E, 0x1F};
 
   emit_move_insn (mask, array_to_constant (TImode, arr));
-  emit_insn (gen_spu_mpyhh (ve, operands[1], operands[2]));
-  emit_insn (gen_spu_mpy (vo, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_smult_even_v8hi (ve, operands[1], operands[2]));
+  emit_insn (gen_vec_widen_smult_odd_v8hi (vo, operands[1], operands[2]));
   emit_insn (gen_shufb (operands[0], ve, vo, mask));
   DONE;
 }")
-- 
1.7.10.4

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/7] Clean up widen mult even/odd
  2012-07-10  8:23 [PATCH 0/7] Clean up widen mult even/odd Richard Henderson
                   ` (6 preceding siblings ...)
  2012-07-10  8:23 ` [PATCH 6/7] Use VEC_WIDEN_MULT_EVEN/ODD_EXPR in supportable_widening_operation Richard Henderson
@ 2012-07-10  9:03 ` Richard Guenther
  2012-07-10  9:12 ` Jakub Jelinek
  8 siblings, 0 replies; 11+ messages in thread
From: Richard Guenther @ 2012-07-10  9:03 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches

On Tue, Jul 10, 2012 at 10:22 AM, Richard Henderson <rth@redhat.com> wrote:
> I find it instructive that 4 of the 5 isas that actually implement
> widening integer multiplication do have mult-widen-even as the isa
> primitive (even if the -odd variant is missing).  The fact that this
> operation is implemented as a set of builtins and target hooks has
> lead to disturbingly cookie-cutter implementations of these hooks
> in the various backends.
>
> Thus I choose to add VEC_WIDEN_MULT_EVEN/ODD_EXPR as tree codes and
> optabs.  This removes a farily trivial amount of code from three
> backends (the fourth backend, ia64, never grew this support).
>
> The existance of optabs then allows the expansion of MULT_HIGHPART_EXPR
> at the rtl-expansion level without having to resort to builtin expansion
> in order to emit the even/odd alternative.  This saves a fairly
> substantial amount of code from the vectorizer.
>
> I've not touched the interface to supportable_widening_operation,
> which is still prepared to return a CALL_EXPR and some decls.  After
> this patch set it will never do so.  I'm undecided as to whether we
> ought to be prepared for such in the future, or whether this should
> simply go in as a completely separate patch that could in the future
> be easily reverted.
>
> Tested on x86_64; cross-compiled to ppc64 and spu, spot checking the
> relevant division-by-constant testcases.

Thanks,

This looks all good!
Richard.

>
> r~
>
>
>
> Richard Henderson (7):
>   Add VEC_WIDEN_MULT_EVEN/ODD_EXPR
>   i386: Rename patterns for vec_widen_<s>mult_even/odd_<mode>
>   rs6000: Rename patterns for vec_widen_<s>mult_even/odd_<mode>
>   spu: Rename patterns for vec_widen_<s>mult_even/odd_<mode>
>   Move vector highpart emulation to the optabs layer
>   Use VEC_WIDEN_MULT_EVEN/ODD_EXPR in supportable_widening_operation
>   Zap now unused builtin_mul_widen_even/odd target hooks
>
>  gcc/ChangeLog                        |   89 ++++++++++++
>  gcc/cfgexpand.c                      |    4 +-
>  gcc/config/i386/i386.c               |  103 ++-----------
>  gcc/config/i386/sse.md               |   18 +--
>  gcc/config/rs6000/altivec.md         |   54 +++----
>  gcc/config/rs6000/rs6000-builtin.def |   24 +--
>  gcc/config/rs6000/rs6000.c           |   51 -------
>  gcc/config/spu/spu-builtins.def      |   24 +--
>  gcc/config/spu/spu-builtins.md       |   65 ---------
>  gcc/config/spu/spu.c                 |   42 ------
>  gcc/config/spu/spu.md                |   86 +++++++++--
>  gcc/doc/md.texi                      |   12 +-
>  gcc/doc/tm.texi                      |   22 ---
>  gcc/doc/tm.texi.in                   |   22 ---
>  gcc/expmed.c                         |   32 ++--
>  gcc/expr.c                           |   35 ++---
>  gcc/fold-const.c                     |   36 +++--
>  gcc/genopinit.c                      |    4 +
>  gcc/gimple-pretty-print.c            |    2 +
>  gcc/optabs.c                         |  134 +++++++++++++++++
>  gcc/optabs.h                         |   18 ++-
>  gcc/system.h                         |    4 +-
>  gcc/target.def                       |   14 --
>  gcc/tree-cfg.c                       |    2 +
>  gcc/tree-inline.c                    |    2 +
>  gcc/tree-pretty-print.c              |   32 ++--
>  gcc/tree-vect-generic.c              |  145 +++++-------------
>  gcc/tree-vect-patterns.c             |   23 +--
>  gcc/tree-vect-stmts.c                |  267 +++++++++-------------------------
>  gcc/tree.c                           |    2 +
>  gcc/tree.def                         |    4 +
>  31 files changed, 580 insertions(+), 792 deletions(-)
>
> --
> 1.7.10.4
>

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/7] Clean up widen mult even/odd
  2012-07-10  8:23 [PATCH 0/7] Clean up widen mult even/odd Richard Henderson
                   ` (7 preceding siblings ...)
  2012-07-10  9:03 ` [PATCH 0/7] Clean up widen mult even/odd Richard Guenther
@ 2012-07-10  9:12 ` Jakub Jelinek
  2012-07-17 12:32   ` Richard Henderson
  8 siblings, 1 reply; 11+ messages in thread
From: Jakub Jelinek @ 2012-07-10  9:12 UTC (permalink / raw)
  To: Richard Henderson; +Cc: gcc-patches

On Tue, Jul 10, 2012 at 10:22:44AM +0200, Richard Henderson wrote:
> I've not touched the interface to supportable_widening_operation,
> which is still prepared to return a CALL_EXPR and some decls.  After
> this patch set it will never do so.  I'm undecided as to whether we
> ought to be prepared for such in the future, or whether this should
> simply go in as a completely separate patch that could in the future
> be easily reverted.

I think it would be nice to remove the support for widening operation
calls as a follow-up, if we ever need it in the future, we can restore
it from svn and it will simplify the callers that already handle way too
many different cases.

Thanks for working on this.

	Jakub

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH 0/7] Clean up widen mult even/odd
  2012-07-10  9:12 ` Jakub Jelinek
@ 2012-07-17 12:32   ` Richard Henderson
  0 siblings, 0 replies; 11+ messages in thread
From: Richard Henderson @ 2012-07-17 12:32 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches

[-- Attachment #1: Type: text/plain, Size: 951 bytes --]

On 07/10/2012 02:09 AM, Jakub Jelinek wrote:
> On Tue, Jul 10, 2012 at 10:22:44AM +0200, Richard Henderson wrote:
>> I've not touched the interface to supportable_widening_operation,
>> which is still prepared to return a CALL_EXPR and some decls.  After
>> this patch set it will never do so.  I'm undecided as to whether we
>> ought to be prepared for such in the future, or whether this should
>> simply go in as a completely separate patch that could in the future
>> be easily reverted.
> 
> I think it would be nice to remove the support for widening operation
> calls as a follow-up, if we ever need it in the future, we can restore
> it from svn and it will simplify the callers that already handle way too
> many different cases.

The cleanup is much smaller than I hoped, since we may still generate calls via the supportable_convert_operation and supportable_narrowing_operation paths.

Nevertheless... tested on x86_64 and committed.


r~

[-- Attachment #2: z --]
[-- Type: text/plain, Size: 5504 bytes --]

	* tree-vect-stmts.c (supportable_widening_operation): Remove decl
	parameters.
	(vectorizable_conversion): Update supportable_widening_operation call.
	* tree-vect-patterns.c (vect_recog_widen_mult_pattern): Likewise.
	(vect_recog_widen_shift_pattern): Likewise.
	* tree-vectorizer.h: Update decl.




diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
index e8ac42a..595b9b6 100644
--- a/gcc/tree-vect-patterns.c
+++ b/gcc/tree-vect-patterns.c
@@ -599,7 +599,6 @@ vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
   tree type, half_type0, half_type1;
   gimple pattern_stmt;
   tree vectype, vectype_out = NULL_TREE;
-  tree dummy;
   tree var;
   enum tree_code dummy_code;
   int dummy_int;
@@ -692,8 +691,8 @@ vect_recog_widen_mult_pattern (VEC (gimple, heap) **stmts,
       || !vectype_out
       || !supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt,
 					  vectype_out, vectype,
-					  &dummy, &dummy, &dummy_code,
-					  &dummy_code, &dummy_int, &dummy_vec))
+					  &dummy_code, &dummy_code,
+					  &dummy_int, &dummy_vec))
     return NULL;
 
   *type_in = vectype;
@@ -1370,7 +1369,6 @@ vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
   tree type, half_type0;
   gimple pattern_stmt;
   tree vectype, vectype_out = NULL_TREE;
-  tree dummy;
   tree var;
   enum tree_code dummy_code;
   int dummy_int;
@@ -1441,9 +1439,8 @@ vect_recog_widen_shift_pattern (VEC (gimple, heap) **stmts,
       || !vectype_out
       || !supportable_widening_operation (WIDEN_LSHIFT_EXPR, last_stmt,
 					  vectype_out, vectype,
-					  &dummy, &dummy, &dummy_code,
-					  &dummy_code, &dummy_int,
-					  &dummy_vec))
+					  &dummy_code, &dummy_code,
+					  &dummy_int, &dummy_vec))
     return NULL;
 
   *type_in = vectype;
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 2f372df..358c2d5 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -2410,8 +2410,8 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
 
     case WIDEN:
       if (supportable_widening_operation (code, stmt, vectype_out, vectype_in,
-					  &decl1, &decl2, &code1, &code2,
-					  &multi_step_cvt, &interm_types))
+					  &code1, &code2, &multi_step_cvt,
+					  &interm_types))
 	{
 	  /* Binary widening operation can only be supported directly by the
 	     architecture.  */
@@ -2443,18 +2443,16 @@ vectorizable_conversion (gimple stmt, gimple_stmt_iterator *gsi,
 		goto unsupported;
 	    }
 	  else if (!supportable_widening_operation (code, stmt, vectype_out,
-						    cvt_type, &decl1, &decl2,
-						    &codecvt1, &codecvt2,
-						    &multi_step_cvt,
+						    cvt_type, &codecvt1,
+						    &codecvt2, &multi_step_cvt,
 						    &interm_types))
 	    continue;
 	  else
 	    gcc_assert (multi_step_cvt == 0);
 
 	  if (supportable_widening_operation (NOP_EXPR, stmt, cvt_type,
-					      vectype_in, NULL, NULL, &code1,
-					      &code2, &multi_step_cvt,
-					      &interm_types))
+					      vectype_in, &code1, &code2,
+					      &multi_step_cvt, &interm_types))
 	    break;
 	}
 
@@ -6262,9 +6260,6 @@ vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
    Output:
    - CODE1 and CODE2 are codes of vector operations to be used when
    vectorizing the operation, if available.
-   - DECL1 and DECL2 are decls of target builtin functions to be used
-   when vectorizing the operation, if available.  In this case,
-   CODE1 and CODE2 are CALL_EXPR.
    - MULTI_STEP_CVT determines the number of required intermediate steps in
    case of multi-step conversion (like char->short->int - in that case
    MULTI_STEP_CVT will be 1).
@@ -6274,8 +6269,6 @@ vect_is_simple_use_1 (tree operand, gimple stmt, loop_vec_info loop_vinfo,
 bool
 supportable_widening_operation (enum tree_code code, gimple stmt,
 				tree vectype_out, tree vectype_in,
-                                tree *decl1 ATTRIBUTE_UNUSED,
-				tree *decl2 ATTRIBUTE_UNUSED,
                                 enum tree_code *code1, enum tree_code *code2,
                                 int *multi_step_cvt,
                                 VEC (tree, heap) **interm_types)
@@ -6339,8 +6332,8 @@ supportable_widening_operation (enum tree_code code, gimple stmt,
 	  && !nested_in_vect_loop_p (vect_loop, stmt)
 	  && supportable_widening_operation (VEC_WIDEN_MULT_EVEN_EXPR,
 					     stmt, vectype_out, vectype_in,
-					     NULL, NULL, code1, code2,
-					     multi_step_cvt, interm_types))
+					     code1, code2, multi_step_cvt,
+					     interm_types))
 	return true;
       c1 = VEC_WIDEN_MULT_LO_EXPR;
       c2 = VEC_WIDEN_MULT_HI_EXPR;
diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
index 6b74bb4..3d23107 100644
--- a/gcc/tree-vectorizer.h
+++ b/gcc/tree-vectorizer.h
@@ -895,9 +895,8 @@ extern bool vect_is_simple_use_1 (tree, gimple, loop_vec_info,
 				  bb_vec_info, gimple *,
 				  tree *,  enum vect_def_type *, tree *);
 extern bool supportable_widening_operation (enum tree_code, gimple, tree, tree,
-                                            tree *, tree *, enum tree_code *,
-                                            enum tree_code *, int *,
-                                            VEC (tree, heap) **);
+                                            enum tree_code *, enum tree_code *,
+					    int *, VEC (tree, heap) **);
 extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
 					     enum tree_code *,
 					     int *, VEC (tree, heap) **);

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2012-07-17 12:32 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2012-07-10  8:23 [PATCH 0/7] Clean up widen mult even/odd Richard Henderson
2012-07-10  8:23 ` [PATCH 4/7] spu: Rename patterns for vec_widen_<s>mult_even/odd_<mode> Richard Henderson
2012-07-10  8:23 ` [PATCH 1/7] Add VEC_WIDEN_MULT_EVEN/ODD_EXPR Richard Henderson
2012-07-10  8:23 ` [PATCH 2/7] i386: Rename patterns for vec_widen_<s>mult_even/odd_<mode> Richard Henderson
2012-07-10  8:23 ` [PATCH 3/7] rs6000: " Richard Henderson
2012-07-10  8:23 ` [PATCH 5/7] Move vector highpart emulation to the optabs layer Richard Henderson
2012-07-10  8:23 ` [PATCH 7/7] Zap now unused builtin_mul_widen_even/odd target hooks Richard Henderson
2012-07-10  8:23 ` [PATCH 6/7] Use VEC_WIDEN_MULT_EVEN/ODD_EXPR in supportable_widening_operation Richard Henderson
2012-07-10  9:03 ` [PATCH 0/7] Clean up widen mult even/odd Richard Guenther
2012-07-10  9:12 ` Jakub Jelinek
2012-07-17 12:32   ` Richard Henderson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).