public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/wschmidt/heads/builtins4)] rs6000: Introduce rs6000_builtin_decls_x
@ 2020-11-24 16:46 William Schmidt
  0 siblings, 0 replies; 4+ messages in thread
From: William Schmidt @ 2020-11-24 16:46 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:bfe43645907b8c53202148cf30ee17cf25f6c6df

commit bfe43645907b8c53202148cf30ee17cf25f6c6df
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Fri Nov 20 13:35:44 2020 -0600

    rs6000: Introduce rs6000_builtin_decls_x
    
    2020-11-20  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-c.c (rs6000-builtins.h): New #include.
            (altivec_build_new_resolved_builtin): New forward decl.
            (altivec_resolve_new_overloaded_builtin): New forward decl.
            (altivec_build_resolved_builtin): Call
            altivec_build_new_resolved_builtin.
            (altivec_resolve_overloaded_builtin): Call
            altivec_resolve_new_overloaded_builtin.
            (altivec_build_new_resolved_builtin): New function.
            (altivec_resolve_new_overloaded_builtin): Likewise.
            * config/rs6000/rs6000-call.c
            (rs6000_gimple_fold_new_mma_builtin): Use rs6000_builtin_decls_x.
            (rs6000_builtin_decl): Flag for later rewrite.
            * config/rs6000/rs6000-gen-builtins.c (write_decls): Generate decl
            for rs6000_builtin_decls_x.
            (write_header_file): Don't generate includes; add logic to avoid
            double-includes.
            (write_init_bif_table): Generate definition for
            rs6000_builtin_decls_x and preinitialize unused slots.
            * config/rs6000/rs6000.c (rs6000-builtins.h): New #include.
            (rs6000_new_builtin_vectorized_function): New function.
            (rs6000_new_builtin_md_vectorized_function): Likewise.
            (rs6000_builtin_vectorized_function): Call
            rs6000_new_builtin_vectorized_function.
            (rs6000_builtin_md_vectorized_function): Call
            rs6000_new_builtin_md_vectorized_function.
            (rs6000_builtin_reciprocal): Use rs6000_builtin_decls_x.
            (add_condition_to_bb): Likewise.
            (rs6000_atomic_assign_expand_fenv): Likewise.
            * config/rs6000/t-rs6000 (rs6000-c.o): Add dependency on
            rs6000-builtins.c.
            (rs6000.o): Add note about problem to be solved later.

Diff:
---
 gcc/config/rs6000/rs6000-c.c            | 1086 +++++++++++++++++++++++++++++++
 gcc/config/rs6000/rs6000-call.c         |    5 +-
 gcc/config/rs6000/rs6000-gen-builtins.c |   27 +-
 gcc/config/rs6000/rs6000.c              |  219 ++++++-
 gcc/config/rs6000/t-rs6000              |   18 +-
 5 files changed, 1337 insertions(+), 18 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index cc1e997524e..c6f0b88ac16 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -35,6 +35,14 @@
 #include "langhooks.h"
 #include "c/c-tree.h"
 
+#include "rs6000-builtins.h"
+
+
+static tree
+altivec_build_new_resolved_builtin (tree *, int,
+				    const struct altivec_builtin_types *);
+static tree
+altivec_resolve_new_overloaded_builtin (location_t, tree, void *);
 
 
 /* Handle the machine specific pragma longcall.  Its syntax is
@@ -850,6 +858,9 @@ static tree
 altivec_build_resolved_builtin (tree *args, int n,
 				const struct altivec_builtin_types *desc)
 {
+  if (new_builtins_are_live)
+    return altivec_build_new_resolved_builtin (args, n, desc);
+
   tree impl_fndecl = rs6000_builtin_decls[desc->overloaded_code];
   tree ret_type = rs6000_builtin_type (desc->ret_type);
   tree argtypes = TYPE_ARG_TYPES (TREE_TYPE (impl_fndecl));
@@ -921,6 +932,1081 @@ altivec_build_resolved_builtin (tree *args, int n,
 tree
 altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 				    void *passed_arglist)
+{
+  if (new_builtins_are_live)
+    return altivec_resolve_new_overloaded_builtin (loc, fndecl,
+						   passed_arglist);
+
+  vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
+  unsigned int nargs = vec_safe_length (arglist);
+  enum rs6000_builtins fcode
+    = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
+  tree fnargs = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
+  tree types[4], args[4];
+  const struct altivec_builtin_types *desc;
+  unsigned int n;
+
+  if (!rs6000_overloaded_builtin_p (fcode))
+    return NULL_TREE;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr, "altivec_resolve_overloaded_builtin, code = %4d, %s\n",
+	     (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
+
+  /* vec_lvsl and vec_lvsr are deprecated for use with LE element order.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_LVSL && !BYTES_BIG_ENDIAN)
+    warning (OPT_Wdeprecated,
+	     "%<vec_lvsl%> is deprecated for little endian; use "
+	     "assignment for unaligned loads and stores");
+  else if (fcode == ALTIVEC_BUILTIN_VEC_LVSR && !BYTES_BIG_ENDIAN)
+    warning (OPT_Wdeprecated,
+	     "%<vec_lvsr%> is deprecated for little endian; use "
+	     "assignment for unaligned loads and stores");
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_MUL)
+    {
+      /* vec_mul needs to be special cased because there are no instructions
+	 for it for the {un}signed char, {un}signed short, and {un}signed int
+	 types.  */
+      if (nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", "vec_mul");
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+
+      /* Both arguments must be vectors and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
+	goto bad;
+
+      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	{
+	  case E_QImode:
+	  case E_HImode:
+	  case E_SImode:
+	  case E_DImode:
+	  case E_TImode:
+	    {
+	      /* For scalar types just use a multiply expression.  */
+	      return fold_build2_loc (loc, MULT_EXPR, TREE_TYPE (arg0), arg0,
+				      fold_convert (TREE_TYPE (arg0), arg1));
+	    }
+	  case E_SFmode:
+	    {
+	      /* For floats use the xvmulsp instruction directly.  */
+	      tree call = rs6000_builtin_decls[VSX_BUILTIN_XVMULSP];
+	      return build_call_expr (call, 2, arg0, arg1);
+	    }
+	  case E_DFmode:
+	    {
+	      /* For doubles use the xvmuldp instruction directly.  */
+	      tree call = rs6000_builtin_decls[VSX_BUILTIN_XVMULDP];
+	      return build_call_expr (call, 2, arg0, arg1);
+	    }
+	  /* Other types are errors.  */
+	  default:
+	    goto bad;
+	}
+    }
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_CMPNE)
+    {
+      /* vec_cmpne needs to be special cased because there are no instructions
+	 for it (prior to power 9).  */
+      if (nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", "vec_cmpne");
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+
+      /* Both arguments must be vectors and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
+	goto bad;
+
+      /* Power9 instructions provide the most efficient implementation of
+	 ALTIVEC_BUILTIN_VEC_CMPNE if the mode is not DImode or TImode
+	 or SFmode or DFmode.  */
+      if (!TARGET_P9_VECTOR
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == DImode)
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == TImode)
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == SFmode)
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == DFmode))
+	{
+	  switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	    {
+	      /* vec_cmpneq (va, vb) == vec_nor (vec_cmpeq (va, vb),
+		 vec_cmpeq (va, vb)).  */
+	      /* Note:  vec_nand also works but opt changes vec_nand's
+		 to vec_nor's anyway.  */
+	    case E_QImode:
+	    case E_HImode:
+	    case E_SImode:
+	    case E_DImode:
+	    case E_TImode:
+	    case E_SFmode:
+	    case E_DFmode:
+	      {
+		/* call = vec_cmpeq (va, vb)
+		   result = vec_nor (call, call).  */
+		vec<tree, va_gc> *params = make_tree_vector ();
+		vec_safe_push (params, arg0);
+		vec_safe_push (params, arg1);
+		tree call = altivec_resolve_overloaded_builtin
+		  (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_CMPEQ],
+		   params);
+		/* Use save_expr to ensure that operands used more than once
+		   that may have side effects (like calls) are only evaluated
+		   once.  */
+		call = save_expr (call);
+		params = make_tree_vector ();
+		vec_safe_push (params, call);
+		vec_safe_push (params, call);
+		return altivec_resolve_overloaded_builtin
+		  (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_NOR], params);
+	      }
+	      /* Other types are errors.  */
+	    default:
+	      goto bad;
+	    }
+	}
+      /* else, fall through and process the Power9 alternative below */
+    }
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_ADDE
+      || fcode == ALTIVEC_BUILTIN_VEC_SUBE)
+    {
+      /* vec_adde needs to be special cased because there is no instruction
+	  for the {un}signed int version.  */
+      if (nargs != 3)
+	{
+	  const char *name = fcode == ALTIVEC_BUILTIN_VEC_ADDE ?
+	    "vec_adde": "vec_sube";
+	  error ("builtin %qs only accepts 3 arguments", name);
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+      tree arg2 = (*arglist)[2];
+      tree arg2_type = TREE_TYPE (arg2);
+
+      /* All 3 arguments must be vectors of (signed or unsigned) (int or
+	 __int128) and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
+	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
+	goto bad;
+
+      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	{
+	  /* For {un}signed ints,
+	     vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb),
+						   vec_and (carryv, 1)).
+	     vec_sube (va, vb, carryv) == vec_sub (vec_sub (va, vb),
+						   vec_and (carryv, 1)).  */
+	  case E_SImode:
+	    {
+	      tree add_sub_builtin;
+
+	      vec<tree, va_gc> *params = make_tree_vector ();
+	      vec_safe_push (params, arg0);
+	      vec_safe_push (params, arg1);
+
+	      if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
+		add_sub_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD];
+	      else
+		add_sub_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_SUB];
+
+	      tree call = altivec_resolve_overloaded_builtin (loc,
+							      add_sub_builtin,
+							      params);
+	      tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1);
+	      tree ones_vector = build_vector_from_val (arg0_type, const1);
+	      tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type,
+					       arg2, ones_vector);
+	      params = make_tree_vector ();
+	      vec_safe_push (params, call);
+	      vec_safe_push (params, and_expr);
+	      return altivec_resolve_overloaded_builtin (loc, add_sub_builtin,
+							 params);
+	    }
+	  /* For {un}signed __int128s use the vaddeuqm instruction
+		directly.  */
+	  case E_TImode:
+	    {
+	       tree bii;
+
+	       if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDEUQM];
+
+	       else
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VSUBEUQM];
+
+	       return altivec_resolve_overloaded_builtin (loc, bii, arglist);
+	    }
+
+	  /* Types other than {un}signed int and {un}signed __int128
+		are errors.  */
+	  default:
+	    goto bad;
+	}
+    }
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC
+      || fcode == ALTIVEC_BUILTIN_VEC_SUBEC)
+    {
+      /* vec_addec and vec_subec needs to be special cased because there is
+	 no instruction for the {un}signed int version.  */
+      if (nargs != 3)
+	{
+	  const char *name = fcode == ALTIVEC_BUILTIN_VEC_ADDEC ?
+	    "vec_addec": "vec_subec";
+	  error ("builtin %qs only accepts 3 arguments", name);
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+      tree arg2 = (*arglist)[2];
+      tree arg2_type = TREE_TYPE (arg2);
+
+      /* All 3 arguments must be vectors of (signed or unsigned) (int or
+	 __int128) and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
+	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
+	goto bad;
+
+      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	{
+	  /* For {un}signed ints,
+	      vec_addec (va, vb, carryv) ==
+				vec_or (vec_addc (va, vb),
+					vec_addc (vec_add (va, vb),
+						  vec_and (carryv, 0x1))).  */
+	  case E_SImode:
+	    {
+	    /* Use save_expr to ensure that operands used more than once
+		that may have side effects (like calls) are only evaluated
+		once.  */
+	    tree as_builtin;
+	    tree as_c_builtin;
+
+	    arg0 = save_expr (arg0);
+	    arg1 = save_expr (arg1);
+	    vec<tree, va_gc> *params = make_tree_vector ();
+	    vec_safe_push (params, arg0);
+	    vec_safe_push (params, arg1);
+
+	    if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+	      as_c_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADDC];
+	    else
+	      as_c_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_SUBC];
+
+	    tree call1 = altivec_resolve_overloaded_builtin (loc, as_c_builtin,
+							     params);
+	    params = make_tree_vector ();
+	    vec_safe_push (params, arg0);
+	    vec_safe_push (params, arg1);
+
+
+	    if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+	      as_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD];
+	    else
+	      as_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_SUB];
+
+	    tree call2 = altivec_resolve_overloaded_builtin (loc, as_builtin,
+							     params);
+	    tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1);
+	    tree ones_vector = build_vector_from_val (arg0_type, const1);
+	    tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type,
+					     arg2, ones_vector);
+	    params = make_tree_vector ();
+	    vec_safe_push (params, call2);
+	    vec_safe_push (params, and_expr);
+	    call2 = altivec_resolve_overloaded_builtin (loc, as_c_builtin,
+							params);
+	    params = make_tree_vector ();
+	    vec_safe_push (params, call1);
+	    vec_safe_push (params, call2);
+	    tree or_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_OR];
+	    return altivec_resolve_overloaded_builtin (loc, or_builtin,
+						       params);
+	    }
+	  /* For {un}signed __int128s use the vaddecuq/vsubbecuq
+	     instructions.  */
+	  case E_TImode:
+	    {
+	       tree bii;
+
+	       if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDECUQ];
+
+	       else
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VSUBECUQ];
+
+	       return altivec_resolve_overloaded_builtin (loc, bii, arglist);
+	    }
+	  /* Types other than {un}signed int and {un}signed __int128
+		are errors.  */
+	  default:
+	    goto bad;
+	}
+    }
+
+  /* For now treat vec_splats and vec_promote as the same.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS
+      || fcode == ALTIVEC_BUILTIN_VEC_PROMOTE)
+    {
+      tree type, arg;
+      int size;
+      int i;
+      bool unsigned_p;
+      vec<constructor_elt, va_gc> *vec;
+      const char *name = fcode == ALTIVEC_BUILTIN_VEC_SPLATS ? "vec_splats": "vec_promote";
+
+      if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS && nargs != 1)
+	{
+	  error ("builtin %qs only accepts 1 argument", name);
+	  return error_mark_node;
+	}
+      if (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE && nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", name);
+	  return error_mark_node;
+	}
+      /* Ignore promote's element argument.  */
+      if (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE
+	  && !INTEGRAL_TYPE_P (TREE_TYPE ((*arglist)[1])))
+	goto bad;
+
+      arg = (*arglist)[0];
+      type = TREE_TYPE (arg);
+      if (!SCALAR_FLOAT_TYPE_P (type)
+	  && !INTEGRAL_TYPE_P (type))
+	goto bad;
+      unsigned_p = TYPE_UNSIGNED (type);
+      switch (TYPE_MODE (type))
+	{
+	  case E_TImode:
+	    type = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
+	    size = 1;
+	    break;
+	  case E_DImode:
+	    type = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
+	    size = 2;
+	    break;
+	  case E_SImode:
+	    type = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
+	    size = 4;
+	    break;
+	  case E_HImode:
+	    type = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
+	    size = 8;
+	    break;
+	  case E_QImode:
+	    type = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
+	    size = 16;
+	    break;
+	  case E_SFmode: type = V4SF_type_node; size = 4; break;
+	  case E_DFmode: type = V2DF_type_node; size = 2; break;
+	  default:
+	    goto bad;
+	}
+      arg = save_expr (fold_convert (TREE_TYPE (type), arg));
+      vec_alloc (vec, size);
+      for(i = 0; i < size; i++)
+	{
+	  constructor_elt elt = {NULL_TREE, arg};
+	  vec->quick_push (elt);
+	}
+	return build_constructor (type, vec);
+    }
+
+  /* For now use pointer tricks to do the extraction, unless we are on VSX
+     extracting a double from a constant offset.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_EXTRACT)
+    {
+      tree arg1;
+      tree arg1_type;
+      tree arg2;
+      tree arg1_inner_type;
+      tree decl, stmt;
+      tree innerptrtype;
+      machine_mode mode;
+
+      /* No second argument. */
+      if (nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", "vec_extract");
+	  return error_mark_node;
+	}
+
+      arg2 = (*arglist)[1];
+      arg1 = (*arglist)[0];
+      arg1_type = TREE_TYPE (arg1);
+
+      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
+	goto bad;
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
+	goto bad;
+
+      /* See if we can optimize vec_extracts with the current VSX instruction
+	 set.  */
+      mode = TYPE_MODE (arg1_type);
+      if (VECTOR_MEM_VSX_P (mode))
+
+	{
+	  tree call = NULL_TREE;
+	  int nunits = GET_MODE_NUNITS (mode);
+
+	  arg2 = fold_for_warn (arg2);
+
+	  /* If the second argument is an integer constant, generate
+	     the built-in code if we can.  We need 64-bit and direct
+	     move to extract the small integer vectors.  */
+	  if (TREE_CODE (arg2) == INTEGER_CST)
+	    {
+	      wide_int selector = wi::to_wide (arg2);
+	      selector = wi::umod_trunc (selector, nunits);
+	      arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case E_V1TImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V1TI];
+		  break;
+
+		case E_V2DFmode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
+		  break;
+
+		case E_V2DImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
+		  break;
+
+		case E_V4SFmode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SF];
+		  break;
+
+		case E_V4SImode:
+		  if (TARGET_DIRECT_MOVE_64BIT)
+		    call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SI];
+		  break;
+
+		case E_V8HImode:
+		  if (TARGET_DIRECT_MOVE_64BIT)
+		    call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V8HI];
+		  break;
+
+		case E_V16QImode:
+		  if (TARGET_DIRECT_MOVE_64BIT)
+		    call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V16QI];
+		  break;
+		}
+	    }
+
+	  /* If the second argument is variable, we can optimize it if we are
+	     generating 64-bit code on a machine with direct move.  */
+	  else if (TREE_CODE (arg2) != INTEGER_CST && TARGET_DIRECT_MOVE_64BIT)
+	    {
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case E_V2DFmode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
+		  break;
+
+		case E_V2DImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
+		  break;
+
+		case E_V4SFmode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SF];
+		  break;
+
+		case E_V4SImode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SI];
+		  break;
+
+		case E_V8HImode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V8HI];
+		  break;
+
+		case E_V16QImode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V16QI];
+		  break;
+		}
+	    }
+
+	  if (call)
+	    {
+	      tree result = build_call_expr (call, 2, arg1, arg2);
+	      /* Coerce the result to vector element type.  May be no-op.  */
+	      arg1_inner_type = TREE_TYPE (arg1_type);
+	      result = fold_convert (arg1_inner_type, result);
+	      return result;
+	    }
+	}
+
+      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */
+      arg1_inner_type = TREE_TYPE (arg1_type);
+      arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
+			      build_int_cst (TREE_TYPE (arg2),
+					     TYPE_VECTOR_SUBPARTS (arg1_type)
+					     - 1), 0);
+      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
+      DECL_EXTERNAL (decl) = 0;
+      TREE_PUBLIC (decl) = 0;
+      DECL_CONTEXT (decl) = current_function_decl;
+      TREE_USED (decl) = 1;
+      TREE_TYPE (decl) = arg1_type;
+      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  TREE_ADDRESSABLE (decl) = 1;
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
+
+      innerptrtype = build_pointer_type (arg1_inner_type);
+
+      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
+      stmt = convert (innerptrtype, stmt);
+      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
+      stmt = build_indirect_ref (loc, stmt, RO_NULL);
+
+      /* PR83660: We mark this as having side effects so that
+	 downstream in fold_build_cleanup_point_expr () it will get a
+	 CLEANUP_POINT_EXPR.  If it does not we can run into an ICE
+	 later in gimplify_cleanup_point_expr ().  Potentially this
+	 causes missed optimization because the actually is no side
+	 effect.  */
+      if (c_dialect_cxx ())
+	TREE_SIDE_EFFECTS (stmt) = 1;
+
+      return stmt;
+    }
+
+  /* For now use pointer tricks to do the insertion, unless we are on VSX
+     inserting a double to a constant offset..  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_INSERT)
+    {
+      tree arg0;
+      tree arg1;
+      tree arg2;
+      tree arg1_type;
+      tree arg1_inner_type;
+      tree decl, stmt;
+      tree innerptrtype;
+      machine_mode mode;
+
+      /* No second or third arguments. */
+      if (nargs != 3)
+	{
+	  error ("builtin %qs only accepts 3 arguments", "vec_insert");
+	  return error_mark_node;
+	}
+
+      arg0 = (*arglist)[0];
+      arg1 = (*arglist)[1];
+      arg1_type = TREE_TYPE (arg1);
+      arg2 = fold_for_warn ((*arglist)[2]);
+
+      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
+	goto bad;
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
+	goto bad;
+
+      /* If we can use the VSX xxpermdi instruction, use that for insert.  */
+      mode = TYPE_MODE (arg1_type);
+      if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode)
+	  && TREE_CODE (arg2) == INTEGER_CST)
+	{
+	  wide_int selector = wi::to_wide (arg2);
+	  selector = wi::umod_trunc (selector, 2);
+	  tree call = NULL_TREE;
+
+	  arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
+	  if (mode == V2DFmode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DF];
+	  else if (mode == V2DImode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DI];
+
+	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+	     reversed.  */
+	  if (call)
+	    return build_call_expr (call, 3, arg1, arg0, arg2);
+	}
+      else if (mode == V1TImode && VECTOR_UNIT_VSX_P (mode)
+	       && TREE_CODE (arg2) == INTEGER_CST)
+	{
+	  tree call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V1TI];
+	  wide_int selector = wi::zero(32);
+
+	  arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
+	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+	     reversed.  */
+	  return build_call_expr (call, 3, arg1, arg0, arg2);
+	}
+
+      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */
+      arg1_inner_type = TREE_TYPE (arg1_type);
+      if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1)
+	arg2 = build_int_cst (TREE_TYPE (arg2), 0);
+      else
+	arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
+				build_int_cst (TREE_TYPE (arg2),
+					       TYPE_VECTOR_SUBPARTS (arg1_type)
+					       - 1), 0);
+      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
+      DECL_EXTERNAL (decl) = 0;
+      TREE_PUBLIC (decl) = 0;
+      DECL_CONTEXT (decl) = current_function_decl;
+      TREE_USED (decl) = 1;
+      TREE_TYPE (decl) = arg1_type;
+      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  TREE_ADDRESSABLE (decl) = 1;
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
+
+      innerptrtype = build_pointer_type (arg1_inner_type);
+
+      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
+      stmt = convert (innerptrtype, stmt);
+      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
+      stmt = build_indirect_ref (loc, stmt, RO_NULL);
+      stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt,
+		     convert (TREE_TYPE (stmt), arg0));
+      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
+      return stmt;
+    }
+
+  for (n = 0;
+       !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
+       fnargs = TREE_CHAIN (fnargs), n++)
+    {
+      tree decl_type = TREE_VALUE (fnargs);
+      tree arg = (*arglist)[n];
+      tree type;
+
+      if (arg == error_mark_node)
+	return error_mark_node;
+
+      if (n >= 4)
+	abort ();
+
+      arg = default_conversion (arg);
+
+      /* The C++ front-end converts float * to const void * using
+	 NOP_EXPR<const void *> (NOP_EXPR<void *> (x)).  */
+      type = TREE_TYPE (arg);
+      if (POINTER_TYPE_P (type)
+	  && TREE_CODE (arg) == NOP_EXPR
+	  && lang_hooks.types_compatible_p (TREE_TYPE (arg),
+					    const_ptr_type_node)
+	  && lang_hooks.types_compatible_p (TREE_TYPE (TREE_OPERAND (arg, 0)),
+					    ptr_type_node))
+	{
+	  arg = TREE_OPERAND (arg, 0);
+	  type = TREE_TYPE (arg);
+	}
+
+      /* Remove the const from the pointers to simplify the overload
+	 matching further down.  */
+      if (POINTER_TYPE_P (decl_type)
+	  && POINTER_TYPE_P (type)
+	  && TYPE_QUALS (TREE_TYPE (type)) != 0)
+	{
+	  if (TYPE_READONLY (TREE_TYPE (type))
+	      && !TYPE_READONLY (TREE_TYPE (decl_type)))
+	    warning (0, "passing argument %d of %qE discards qualifiers from "
+		     "pointer target type", n + 1, fndecl);
+	  type = build_pointer_type (build_qualified_type (TREE_TYPE (type),
+							   0));
+	  arg = fold_convert (type, arg);
+	}
+
+      /* For P9V_BUILTIN_VEC_LXVL, convert any const * to its non constant
+	 equivalent to simplify the overload matching below.  */
+      if (fcode == P9V_BUILTIN_VEC_LXVL)
+	{
+	  if (POINTER_TYPE_P (type)
+	      && TYPE_READONLY (TREE_TYPE (type)))
+	    {
+	      type = build_pointer_type (build_qualified_type (
+						TREE_TYPE (type),0));
+	      arg = fold_convert (type, arg);
+	    }
+	}
+
+      args[n] = arg;
+      types[n] = type;
+    }
+
+  /* If the number of arguments did not match the prototype, return NULL
+     and the generic code will issue the appropriate error message.  */
+  if (!VOID_TYPE_P (TREE_VALUE (fnargs)) || n < nargs)
+    return NULL;
+
+  if (n == 0)
+    abort ();
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_STEP)
+    {
+      if (TREE_CODE (types[0]) != VECTOR_TYPE)
+	goto bad;
+
+      return build_int_cst (NULL_TREE, TYPE_VECTOR_SUBPARTS (types[0]));
+    }
+
+  {
+    bool unsupported_builtin = false;
+    enum rs6000_builtins overloaded_code;
+    tree result = NULL;
+    for (desc = altivec_overloaded_builtins;
+	 desc->code && desc->code != fcode; desc++)
+      continue;
+
+    /* Need to special case __builtin_cmp because the overloaded forms
+       of this function take (unsigned int, unsigned int) or (unsigned
+       long long int, unsigned long long int).  Since C conventions
+       allow the respective argument types to be implicitly coerced into
+       each other, the default handling does not provide adequate
+       discrimination between the desired forms of the function.  */
+    if (fcode == P6_OV_BUILTIN_CMPB)
+      {
+	machine_mode arg1_mode = TYPE_MODE (types[0]);
+	machine_mode arg2_mode = TYPE_MODE (types[1]);
+
+	if (nargs != 2)
+	  {
+	    error ("builtin %qs only accepts 2 arguments", "__builtin_cmpb");
+	    return error_mark_node;
+	  }
+
+	/* If any supplied arguments are wider than 32 bits, resolve to
+	   64-bit variant of built-in function.  */
+	if ((GET_MODE_PRECISION (arg1_mode) > 32)
+	    || (GET_MODE_PRECISION (arg2_mode) > 32))
+	  {
+	    /* Assure all argument and result types are compatible with
+	       the built-in function represented by P6_BUILTIN_CMPB.  */
+	    overloaded_code = P6_BUILTIN_CMPB;
+	  }
+	else
+	  {
+	    /* Assure all argument and result types are compatible with
+	       the built-in function represented by P6_BUILTIN_CMPB_32.  */
+	    overloaded_code = P6_BUILTIN_CMPB_32;
+	  }
+
+	while (desc->code && desc->code == fcode
+	       && desc->overloaded_code != overloaded_code)
+	  desc++;
+
+	if (desc->code && (desc->code == fcode)
+	    && rs6000_builtin_type_compatible (types[0], desc->op1)
+	    && rs6000_builtin_type_compatible (types[1], desc->op2))
+	  {
+	    if (rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
+	      {
+		result = altivec_build_resolved_builtin (args, n, desc);
+		/* overloaded_code is set above */
+		if (!rs6000_builtin_is_supported_p (overloaded_code))
+		  unsupported_builtin = true;
+		else
+		  return result;
+	      }
+	    else
+	      unsupported_builtin = true;
+	  }
+      }
+    else if (fcode == P9V_BUILTIN_VEC_VSIEDP)
+      {
+	machine_mode arg1_mode = TYPE_MODE (types[0]);
+
+	if (nargs != 2)
+	  {
+	    error ("builtin %qs only accepts 2 arguments",
+		   "scalar_insert_exp");
+	    return error_mark_node;
+	  }
+
+	/* If supplied first argument is wider than 64 bits, resolve to
+	   128-bit variant of built-in function.  */
+	if (GET_MODE_PRECISION (arg1_mode) > 64)
+	  {
+	    /* If first argument is of float variety, choose variant
+	       that expects __ieee128 argument.  Otherwise, expect
+	       __int128 argument.  */
+	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
+	      overloaded_code = P9V_BUILTIN_VSIEQPF;
+	    else
+	      overloaded_code = P9V_BUILTIN_VSIEQP;
+	  }
+	else
+	  {
+	    /* If first argument is of float variety, choose variant
+	       that expects double argument.  Otherwise, expect
+	       long long int argument.  */
+	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
+	      overloaded_code = P9V_BUILTIN_VSIEDPF;
+	    else
+	      overloaded_code = P9V_BUILTIN_VSIEDP;
+	  }
+	while (desc->code && desc->code == fcode
+	       && desc->overloaded_code != overloaded_code)
+	  desc++;
+
+	if (desc->code && (desc->code == fcode)
+	    && rs6000_builtin_type_compatible (types[0], desc->op1)
+	    && rs6000_builtin_type_compatible (types[1], desc->op2))
+	  {
+	    if (rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
+	      {
+		result = altivec_build_resolved_builtin (args, n, desc);
+		/* overloaded_code is set above.  */
+		if (!rs6000_builtin_is_supported_p (overloaded_code))
+		  unsupported_builtin = true;
+		else
+		  return result;
+	      }
+	    else
+	      unsupported_builtin = true;
+	  }
+      }
+    else if ((fcode == P10_BUILTIN_VEC_XXEVAL)
+	    || (fcode == P10V_BUILTIN_VXXPERMX))
+      {
+	signed char op3_type;
+
+	/* Need to special case P10_BUILTIN_VEC_XXEVAL and
+	   P10V_BUILTIN_VXXPERMX because they take 4 arguments and the
+	   existing infrastructure only handles three.  */
+	if (nargs != 4)
+	  {
+	    const char *name = fcode == P10_BUILTIN_VEC_XXEVAL ?
+	      "__builtin_vec_xxeval":"__builtin_vec_xxpermx";
+
+	    error ("builtin %qs requires 4 arguments", name);
+	    return error_mark_node;
+	  }
+
+	for ( ; desc->code == fcode; desc++)
+	  {
+	    if (fcode == P10_BUILTIN_VEC_XXEVAL)
+	      op3_type = desc->op3;
+	    else  /* P10V_BUILTIN_VXXPERMX */
+	      op3_type = RS6000_BTI_V16QI;
+
+	    if (rs6000_builtin_type_compatible (types[0], desc->op1)
+		&& rs6000_builtin_type_compatible (types[1], desc->op2)
+		&& rs6000_builtin_type_compatible (types[2], desc->op3)
+		&& rs6000_builtin_type_compatible (types[2], op3_type)
+		&& rs6000_builtin_type_compatible (types[3],
+						   RS6000_BTI_UINTSI))
+	      {
+		if (rs6000_builtin_decls[desc->overloaded_code] == NULL_TREE)
+		  unsupported_builtin = true;
+		else
+		  {
+		    result = altivec_build_resolved_builtin (args, n, desc);
+		    if (rs6000_builtin_is_supported_p (desc->overloaded_code))
+		      return result;
+		    /* Allow loop to continue in case a different
+		       definition is supported.  */
+		    overloaded_code = desc->overloaded_code;
+		    unsupported_builtin = true;
+		  }
+	      }
+	  }
+      }
+    else
+      {
+	/* For arguments after the last, we have RS6000_BTI_NOT_OPAQUE in
+	   the opX fields.  */
+	for (; desc->code == fcode; desc++)
+	  {
+	    if ((desc->op1 == RS6000_BTI_NOT_OPAQUE
+		 || rs6000_builtin_type_compatible (types[0], desc->op1))
+		&& (desc->op2 == RS6000_BTI_NOT_OPAQUE
+		    || rs6000_builtin_type_compatible (types[1], desc->op2))
+		&& (desc->op3 == RS6000_BTI_NOT_OPAQUE
+		    || rs6000_builtin_type_compatible (types[2], desc->op3)))
+	      {
+		if (rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
+		  {
+		    result = altivec_build_resolved_builtin (args, n, desc);
+		    if (!rs6000_builtin_is_supported_p (desc->overloaded_code))
+		      {
+			/* Allow loop to continue in case a different
+			   definition is supported.  */
+			overloaded_code = desc->overloaded_code;
+			unsupported_builtin = true;
+		      }
+		    else
+		      return result;
+		  }
+		else
+		  unsupported_builtin = true;
+	      }
+	  }
+      }
+
+    if (unsupported_builtin)
+      {
+	const char *name = rs6000_overloaded_builtin_name (fcode);
+	if (result != NULL)
+	  {
+	    const char *internal_name
+	      = rs6000_overloaded_builtin_name (overloaded_code);
+	    /* An error message making reference to the name of the
+	       non-overloaded function has already been issued.  Add
+	       clarification of the previous message.  */
+	    rich_location richloc (line_table, input_location);
+	    inform (&richloc, "builtin %qs requires builtin %qs",
+		    name, internal_name);
+	  }
+	else
+	  error ("%qs is not supported in this compiler configuration", name);
+	/* If an error-representing  result tree was returned from
+	   altivec_build_resolved_builtin above, use it.  */
+	return (result != NULL) ? result : error_mark_node;
+      }
+  }
+ bad:
+  {
+    const char *name = rs6000_overloaded_builtin_name (fcode);
+    error ("invalid parameter combination for AltiVec intrinsic %qs", name);
+    return error_mark_node;
+  }
+}
+
+/* Build a tree for a function call to an Altivec non-overloaded builtin.
+   The overloaded builtin that matched the types and args is described
+   by DESC.  The N arguments are given in ARGS, respectively.
+
+   Actually the only thing it does is calling fold_convert on ARGS, with
+   a small exception for vec_{all,any}_{ge,le} predicates. */
+
+static tree
+altivec_build_new_resolved_builtin (tree *args, int n,
+				    const struct altivec_builtin_types *desc)
+{
+  tree impl_fndecl = rs6000_builtin_decls[desc->overloaded_code];
+  tree ret_type = rs6000_builtin_type (desc->ret_type);
+  tree argtypes = TYPE_ARG_TYPES (TREE_TYPE (impl_fndecl));
+  tree arg_type[4];
+  tree call;
+
+  int i;
+  for (i = 0; i < n; i++)
+    arg_type[i] = TREE_VALUE (argtypes), argtypes = TREE_CHAIN (argtypes);
+
+  /* The AltiVec overloading implementation is overall gross, but this
+     is particularly disgusting.  The vec_{all,any}_{ge,le} builtins
+     are completely different for floating-point vs. integer vector
+     types, because the former has vcmpgefp, but the latter should use
+     vcmpgtXX.
+
+     In practice, the second and third arguments are swapped, and the
+     condition (LT vs. EQ, which is recognizable by bit 1 of the first
+     argument) is reversed.  Patch the arguments here before building
+     the resolved CALL_EXPR.  */
+  if (n == 3
+      && desc->code == ALTIVEC_BUILTIN_VEC_VCMPGE_P
+      && desc->overloaded_code != ALTIVEC_BUILTIN_VCMPGEFP_P
+      && desc->overloaded_code != VSX_BUILTIN_XVCMPGEDP_P)
+    {
+      std::swap (args[1], args[2]);
+      std::swap (arg_type[1], arg_type[2]);
+
+      args[0] = fold_build2 (BIT_XOR_EXPR, TREE_TYPE (args[0]), args[0],
+			     build_int_cst (NULL_TREE, 2));
+    }
+
+  switch (n)
+    {
+    case 0:
+      call = build_call_expr (impl_fndecl, 0);
+      break;
+    case 1:
+      call = build_call_expr (impl_fndecl, 1,
+			      fully_fold_convert (arg_type[0], args[0]));
+      break;
+    case 2:
+      call = build_call_expr (impl_fndecl, 2,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]));
+      break;
+    case 3:
+      call = build_call_expr (impl_fndecl, 3,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]),
+			      fully_fold_convert (arg_type[2], args[2]));
+      break;
+    case 4:
+      call = build_call_expr (impl_fndecl, 4,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]),
+			      fully_fold_convert (arg_type[2], args[2]),
+			      fully_fold_convert (arg_type[3], args[3]));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return fold_convert (ret_type, call);
+}
+
+/* Implementation of the resolve_overloaded_builtin target hook, to
+   support Altivec's overloaded builtins.  */
+
+static tree
+altivec_resolve_new_overloaded_builtin (location_t loc, tree fndecl,
+					void *passed_arglist)
 {
   vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
   unsigned int nargs = vec_safe_length (arglist);
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 533e6d0877f..ae9ae6f1c3d 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -14216,7 +14216,7 @@ rs6000_gimple_fold_new_mma_builtin (gimple_stmt_iterator *gsi,
 
       /* We're disassembling an accumulator into a different type, so we need
 	 to emit a xxmfacc instruction now, since we cannot do it later.  */
-      new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL];
+      new_decl = rs6000_builtin_decls_x[RS6000_BIF_XXMFACC_INTERNAL];
       new_call = gimple_build_call (new_decl, 1, src);
       src = make_ssa_name (vector_quad_type_node);
       gimple_call_set_lhs (new_call, src);
@@ -14245,7 +14245,7 @@ rs6000_gimple_fold_new_mma_builtin (gimple_stmt_iterator *gsi,
 
   /* Convert this built-in into an internal version that uses pass-by-value
      arguments.  The internal built-in follows immediately after this one.  */
-  new_decl = rs6000_builtin_decls[fncode + 1];
+  new_decl = rs6000_builtin_decls_x[fncode + 1];
   tree lhs, op[MAX_MMA_OPERANDS];
   tree acc = gimple_call_arg (stmt, 0);
   push_gimplify_context (true);
@@ -16023,6 +16023,7 @@ rs6000_init_builtins (void)
 }
 
 /* Returns the rs6000 builtin decl for CODE.  */
+/* #### TODO: Rewrite this.  */
 
 tree
 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c
index 9e5adbae7c7..c7f405e33fd 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -2114,6 +2114,9 @@ write_decls ()
     fprintf (header_file, "  RS6000_OVLD_%s,\n", ovlds[i].ovld_id_name);
   fprintf (header_file, "  RS6000_OVLD_MAX\n};\n\n");
 
+  fprintf (header_file,
+	   "extern tree rs6000_builtin_decls_x[RS6000_OVLD_MAX];\n\n");
+
   fprintf (header_file, "enum restriction {\n");
   fprintf (header_file, "  RES_NONE,\n");
   fprintf (header_file, "  RES_BITS,\n");
@@ -2356,13 +2359,9 @@ static int
 write_header_file ()
 {
   write_autogenerated_header (header_file);
-  fprintf (header_file, "#include \"config.h\"\n");
-  fprintf (header_file, "#include \"system.h\"\n");
-  fprintf (header_file, "#include \"coretypes.h\"\n");
-  fprintf (header_file, "#include \"backend.h\"\n");
-  fprintf (header_file, "#include \"rtl.h\"\n");
-  fprintf (header_file, "#include \"tree.h\"\n");
-  fprintf (header_file, "\n");
+
+  fprintf (header_file, "#ifndef _RS6000_BUILTINS_H\n");
+  fprintf (header_file, "#define _RS6000_BUILTINS_H 1\n\n");
   fprintf (header_file, "extern int new_builtins_are_live;\n\n");
 
   write_decls ();
@@ -2370,6 +2369,7 @@ write_header_file ()
   /* Write function type list declarators to the header file.  */
   rbt_inorder_callback (&fntype_rbt, fntype_rbt.rbt_root, write_extern_fntype);
   fprintf (header_file, "\n");
+  fprintf (header_file, "\n#endif\n");
 
   return 1;
 }
@@ -2527,7 +2527,7 @@ write_init_bif_table ()
 	       "  if (new_builtins_are_live)\n");
       fprintf (init_file, "    {\n");
       fprintf (init_file,
-	       "      rs6000_builtin_decls[(int)RS6000_BIF_%s] = t\n",
+	       "      rs6000_builtin_decls_x[(int)RS6000_BIF_%s] = t\n",
 	       bifs[i].idname);
       fprintf (init_file,
 	       "        = add_builtin_function (\"%s\",\n",
@@ -2601,7 +2601,7 @@ write_init_ovld_table ()
 	       "  if (new_builtins_are_live)\n");
       fprintf (init_file, "    {\n");
       fprintf (init_file,
-	       "      rs6000_builtin_decls[(int)RS6000_OVLD_%s] = t\n",
+	       "      rs6000_builtin_decls_x[(int)RS6000_OVLD_%s] = t\n",
 	       ovlds[i].ovld_id_name);
       fprintf (init_file,
 	       "        = add_builtin_function (\"%s\",\n",
@@ -2643,6 +2643,8 @@ write_init_file ()
   fprintf (init_file, "int new_builtins_are_live = 0;\n\n");
 #endif
 
+  fprintf (init_file, "tree rs6000_builtin_decls_x[RS6000_OVLD_MAX];\n\n");
+
   write_bif_static_init ();
   write_ovld_static_init ();
 
@@ -2692,6 +2694,13 @@ write_init_file ()
   rbt_inorder_callback (&fntype_rbt, fntype_rbt.rbt_root, write_fntype_init);
   fprintf (init_file, "\n");
 
+  fprintf (init_file,
+	   "  rs6000_builtin_decls_x[RS6000_BIF_NONE] = NULL_TREE;\n");
+  fprintf (init_file,
+	   "  rs6000_builtin_decls_x[RS6000_BIF_MAX] = NULL_TREE;\n");
+  fprintf (init_file,
+	   "  rs6000_builtin_decls_x[RS6000_OVLD_NONE] = NULL_TREE;\n\n");
+
   write_init_bif_table ();
   write_init_ovld_table ();
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index d8ac2f0cd2f..91ca4eae96c 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -77,6 +77,7 @@
 #include "case-cfn-macros.h"
 #include "ppc-auxv.h"
 #include "rs6000-internal.h"
+#include "rs6000-builtins.h"
 #include "opts.h"
 
 /* This file should be included last.  */
@@ -5398,6 +5399,198 @@ rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
   return nunroll;
 }
 
+/* Returns a function decl for a vectorized version of the builtin function
+   with builtin function code FN and the result vector type TYPE, or NULL_TREE
+   if it is not available.  */
+
+static tree
+rs6000_new_builtin_vectorized_function (unsigned int fn, tree type_out,
+					tree type_in)
+{
+  machine_mode in_mode, out_mode;
+  int in_n, out_n;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr, "rs6000_new_builtin_vectorized_function (%s, %s, %s)\n",
+	     combined_fn_name (combined_fn (fn)),
+	     GET_MODE_NAME (TYPE_MODE (type_out)),
+	     GET_MODE_NAME (TYPE_MODE (type_in)));
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE)
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+  switch (fn)
+    {
+    CASE_CFN_COPYSIGN:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_CPSGNDP];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_CPSGNSP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_COPYSIGN_V4SF];
+      break;
+    CASE_CFN_CEIL:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIP];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRFIP];
+      break;
+    CASE_CFN_FLOOR:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIM];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIM];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRFIM];
+      break;
+    CASE_CFN_FMA:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVMADDDP];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVMADDSP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VMADDFP];
+      break;
+    CASE_CFN_TRUNC:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIZ];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIZ];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRFIZ];
+      break;
+    CASE_CFN_NEARBYINT:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && flag_unsafe_math_optimizations
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPI];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && flag_unsafe_math_optimizations
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPI];
+      break;
+    CASE_CFN_RINT:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && !flag_trapping_math
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIC];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && !flag_trapping_math
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIC];
+      break;
+    default:
+      break;
+    }
+
+  /* Generate calls to libmass if appropriate.  */
+  if (rs6000_veclib_handler)
+    return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
+
+  return NULL_TREE;
+}
+
+/* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION.  */
+
+static tree
+rs6000_new_builtin_md_vectorized_function (tree fndecl, tree type_out,
+					   tree type_in)
+{
+  machine_mode in_mode, out_mode;
+  int in_n, out_n;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr,
+	     "rs6000_new_builtin_md_vectorized_function (%s, %s, %s)\n",
+	     IDENTIFIER_POINTER (DECL_NAME (fndecl)),
+	     GET_MODE_NAME (TYPE_MODE (type_out)),
+	     GET_MODE_NAME (TYPE_MODE (type_in)));
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE)
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+  enum rs6000_builtins fn
+    = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
+  switch (fn)
+    {
+    case RS6000_BUILTIN_RSQRTF:
+      if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRSQRTFP];
+      break;
+    case RS6000_BUILTIN_RSQRT:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_RSQRT_2DF];
+      break;
+    case RS6000_BUILTIN_RECIPF:
+      if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRECIPFP];
+      break;
+    case RS6000_BUILTIN_RECIP:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_RECIP_V2DF];
+      break;
+    default:
+      break;
+    }
+  return NULL_TREE;
+}
+
 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
    library with vectorized intrinsics.  */
 
@@ -5517,6 +5710,9 @@ rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
   machine_mode in_mode, out_mode;
   int in_n, out_n;
 
+  if (new_builtins_are_live)
+    return rs6000_new_builtin_vectorized_function (fn, type_out, type_in);
+
   if (TARGET_DEBUG_BUILTIN)
     fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
 	     combined_fn_name (combined_fn (fn)),
@@ -5648,6 +5844,10 @@ rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
   machine_mode in_mode, out_mode;
   int in_n, out_n;
 
+  if (new_builtins_are_live)
+    return rs6000_new_builtin_md_vectorized_function (fndecl, type_out,
+						      type_in);
+
   if (TARGET_DEBUG_BUILTIN)
     fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
 	     IDENTIFIER_POINTER (DECL_NAME (fndecl)),
@@ -22136,12 +22336,16 @@ rs6000_builtin_reciprocal (tree fndecl)
       if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
 	return NULL_TREE;
 
+      if (new_builtins_are_live)
+	return rs6000_builtin_decls_x[RS6000_BIF_RSQRT_2DF];
       return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
 
     case VSX_BUILTIN_XVSQRTSP:
       if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
 	return NULL_TREE;
 
+      if (new_builtins_are_live)
+	return rs6000_builtin_decls_x[RS6000_BIF_RSQRT_4SF];
       return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
 
     default:
@@ -24693,7 +24897,10 @@ add_condition_to_bb (tree function_decl, tree version_decl,
 
   tree bool_zero = build_int_cst (bool_int_type_node, 0);
   tree cond_var = create_tmp_var (bool_int_type_node);
-  tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
+  tree predicate_decl
+    = (new_builtins_are_live
+       ? rs6000_builtin_decls_x[(int) RS6000_BIF_CPU_SUPPORTS]
+       : rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS]);
   const char *arg_str = rs6000_clone_map[clone_isa].name;
   tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
   gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
@@ -26809,8 +27016,14 @@ rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
       return;
     }
 
-  tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
-  tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
+  tree mffs
+    = (new_builtins_are_live
+       ? rs6000_builtin_decls_x[RS6000_BIF_MFFS]
+       : rs6000_builtin_decls[RS6000_BUILTIN_MFFS]);
+  tree mtfsf
+    = (new_builtins_are_live
+       ? rs6000_builtin_decls_x[RS6000_BIF_MTFSF]
+       : rs6000_builtin_decls[RS6000_BUILTIN_MTFSF]);
   tree call_mffs = build_call_expr (mffs, 0);
 
   /* Generates the equivalent of feholdexcept (&fenv_var)
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 2ed15485f4b..ebcefc443cc 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -23,10 +23,6 @@ TM_H += $(srcdir)/config/rs6000/rs6000-cpus.def
 TM_H += $(srcdir)/config/rs6000/rs6000-modes.h
 PASSES_EXTRA += $(srcdir)/config/rs6000/rs6000-passes.def
 
-rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c
-	$(COMPILE) $<
-	$(POSTCOMPILE)
-
 rs6000-string.o: $(srcdir)/config/rs6000/rs6000-string.c
 	$(COMPILE) $<
 	$(POSTCOMPILE)
@@ -79,6 +75,20 @@ rs6000-call.o: $(srcdir)/config/rs6000/rs6000-call.c rs6000-builtins.c
 	$(COMPILE) $<
 	$(POSTCOMPILE)
 
+rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c rs6000-builtins.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
+# We need a dependency from rs6000.c on rs6000-builtins.h, but
+# apparently rs6000.c is automatically generated, so not sure
+# what to do about this.  #### TODO.  The following "works" but
+# generates a warning about ignoring the old recipe for rs6000.o.
+# Better to try to figure out what that recipe is...
+#rs6000.o: $(srcdir)/config/rs6000/rs6000.c rs6000-builtins.c
+#	$(COMPILE) $<
+#	$(POSTCOMPILE)
+
+
 $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
   $(srcdir)/config/rs6000/rs6000-cpus.def
 	$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins4)] rs6000: Introduce rs6000_builtin_decls_x
@ 2021-02-07 18:15 William Schmidt
  0 siblings, 0 replies; 4+ messages in thread
From: William Schmidt @ 2021-02-07 18:15 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:cb80effa34fbeb737dcad59fc862c58e7d4328ee

commit cb80effa34fbeb737dcad59fc862c58e7d4328ee
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Fri Nov 20 13:35:44 2020 -0600

    rs6000: Introduce rs6000_builtin_decls_x
    
    2020-11-20  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-c.c (rs6000-builtins.h): New #include.
            (altivec_build_new_resolved_builtin): New forward decl.
            (altivec_resolve_new_overloaded_builtin): New forward decl.
            (altivec_build_resolved_builtin): Call
            altivec_build_new_resolved_builtin.
            (altivec_resolve_overloaded_builtin): Call
            altivec_resolve_new_overloaded_builtin.
            (altivec_build_new_resolved_builtin): New function.
            (altivec_resolve_new_overloaded_builtin): Likewise.
            * config/rs6000/rs6000-call.c
            (rs6000_gimple_fold_new_mma_builtin): Use rs6000_builtin_decls_x.
            (rs6000_builtin_decl): Flag for later rewrite.
            * config/rs6000/rs6000-gen-builtins.c (write_decls): Generate decl
            for rs6000_builtin_decls_x.
            (write_header_file): Don't generate includes; add logic to avoid
            double-includes.
            (write_init_bif_table): Generate definition for
            rs6000_builtin_decls_x and preinitialize unused slots.
            * config/rs6000/rs6000.c (rs6000-builtins.h): New #include.
            (rs6000_new_builtin_vectorized_function): New function.
            (rs6000_new_builtin_md_vectorized_function): Likewise.
            (rs6000_builtin_vectorized_function): Call
            rs6000_new_builtin_vectorized_function.
            (rs6000_builtin_md_vectorized_function): Call
            rs6000_new_builtin_md_vectorized_function.
            (rs6000_builtin_reciprocal): Use rs6000_builtin_decls_x.
            (add_condition_to_bb): Likewise.
            (rs6000_atomic_assign_expand_fenv): Likewise.
            * config/rs6000/t-rs6000 (rs6000-c.o): Add dependency on
            rs6000-builtins.c.
            (rs6000.o): Add note about problem to be solved later.

Diff:
---
 gcc/config/rs6000/rs6000-c.c            | 1086 +++++++++++++++++++++++++++++++
 gcc/config/rs6000/rs6000-call.c         |    5 +-
 gcc/config/rs6000/rs6000-gen-builtins.c |   27 +-
 gcc/config/rs6000/rs6000.c              |  219 ++++++-
 gcc/config/rs6000/t-rs6000              |   17 +-
 5 files changed, 1336 insertions(+), 18 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 06b3bc0df33..5c61536c9fb 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -35,6 +35,14 @@
 #include "langhooks.h"
 #include "c/c-tree.h"
 
+#include "rs6000-builtins.h"
+
+
+static tree
+altivec_build_new_resolved_builtin (tree *, int,
+				    const struct altivec_builtin_types *);
+static tree
+altivec_resolve_new_overloaded_builtin (location_t, tree, void *);
 
 
 /* Handle the machine specific pragma longcall.  Its syntax is
@@ -852,6 +860,9 @@ static tree
 altivec_build_resolved_builtin (tree *args, int n,
 				const struct altivec_builtin_types *desc)
 {
+  if (new_builtins_are_live)
+    return altivec_build_new_resolved_builtin (args, n, desc);
+
   tree impl_fndecl = rs6000_builtin_decls[desc->overloaded_code];
   tree ret_type = rs6000_builtin_type (desc->ret_type);
   tree argtypes = TYPE_ARG_TYPES (TREE_TYPE (impl_fndecl));
@@ -923,6 +934,1081 @@ altivec_build_resolved_builtin (tree *args, int n,
 tree
 altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 				    void *passed_arglist)
+{
+  if (new_builtins_are_live)
+    return altivec_resolve_new_overloaded_builtin (loc, fndecl,
+						   passed_arglist);
+
+  vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
+  unsigned int nargs = vec_safe_length (arglist);
+  enum rs6000_builtins fcode
+    = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
+  tree fnargs = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
+  tree types[4], args[4];
+  const struct altivec_builtin_types *desc;
+  unsigned int n;
+
+  if (!rs6000_overloaded_builtin_p (fcode))
+    return NULL_TREE;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr, "altivec_resolve_overloaded_builtin, code = %4d, %s\n",
+	     (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
+
+  /* vec_lvsl and vec_lvsr are deprecated for use with LE element order.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_LVSL && !BYTES_BIG_ENDIAN)
+    warning (OPT_Wdeprecated,
+	     "%<vec_lvsl%> is deprecated for little endian; use "
+	     "assignment for unaligned loads and stores");
+  else if (fcode == ALTIVEC_BUILTIN_VEC_LVSR && !BYTES_BIG_ENDIAN)
+    warning (OPT_Wdeprecated,
+	     "%<vec_lvsr%> is deprecated for little endian; use "
+	     "assignment for unaligned loads and stores");
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_MUL)
+    {
+      /* vec_mul needs to be special cased because there are no instructions
+	 for it for the {un}signed char, {un}signed short, and {un}signed int
+	 types.  */
+      if (nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", "vec_mul");
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+
+      /* Both arguments must be vectors and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
+	goto bad;
+
+      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	{
+	  case E_QImode:
+	  case E_HImode:
+	  case E_SImode:
+	  case E_DImode:
+	  case E_TImode:
+	    {
+	      /* For scalar types just use a multiply expression.  */
+	      return fold_build2_loc (loc, MULT_EXPR, TREE_TYPE (arg0), arg0,
+				      fold_convert (TREE_TYPE (arg0), arg1));
+	    }
+	  case E_SFmode:
+	    {
+	      /* For floats use the xvmulsp instruction directly.  */
+	      tree call = rs6000_builtin_decls[VSX_BUILTIN_XVMULSP];
+	      return build_call_expr (call, 2, arg0, arg1);
+	    }
+	  case E_DFmode:
+	    {
+	      /* For doubles use the xvmuldp instruction directly.  */
+	      tree call = rs6000_builtin_decls[VSX_BUILTIN_XVMULDP];
+	      return build_call_expr (call, 2, arg0, arg1);
+	    }
+	  /* Other types are errors.  */
+	  default:
+	    goto bad;
+	}
+    }
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_CMPNE)
+    {
+      /* vec_cmpne needs to be special cased because there are no instructions
+	 for it (prior to power 9).  */
+      if (nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", "vec_cmpne");
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+
+      /* Both arguments must be vectors and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
+	goto bad;
+
+      /* Power9 instructions provide the most efficient implementation of
+	 ALTIVEC_BUILTIN_VEC_CMPNE if the mode is not DImode or TImode
+	 or SFmode or DFmode.  */
+      if (!TARGET_P9_VECTOR
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == DImode)
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == TImode)
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == SFmode)
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == DFmode))
+	{
+	  switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	    {
+	      /* vec_cmpneq (va, vb) == vec_nor (vec_cmpeq (va, vb),
+		 vec_cmpeq (va, vb)).  */
+	      /* Note:  vec_nand also works but opt changes vec_nand's
+		 to vec_nor's anyway.  */
+	    case E_QImode:
+	    case E_HImode:
+	    case E_SImode:
+	    case E_DImode:
+	    case E_TImode:
+	    case E_SFmode:
+	    case E_DFmode:
+	      {
+		/* call = vec_cmpeq (va, vb)
+		   result = vec_nor (call, call).  */
+		vec<tree, va_gc> *params = make_tree_vector ();
+		vec_safe_push (params, arg0);
+		vec_safe_push (params, arg1);
+		tree call = altivec_resolve_overloaded_builtin
+		  (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_CMPEQ],
+		   params);
+		/* Use save_expr to ensure that operands used more than once
+		   that may have side effects (like calls) are only evaluated
+		   once.  */
+		call = save_expr (call);
+		params = make_tree_vector ();
+		vec_safe_push (params, call);
+		vec_safe_push (params, call);
+		return altivec_resolve_overloaded_builtin
+		  (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_NOR], params);
+	      }
+	      /* Other types are errors.  */
+	    default:
+	      goto bad;
+	    }
+	}
+      /* else, fall through and process the Power9 alternative below */
+    }
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_ADDE
+      || fcode == ALTIVEC_BUILTIN_VEC_SUBE)
+    {
+      /* vec_adde needs to be special cased because there is no instruction
+	  for the {un}signed int version.  */
+      if (nargs != 3)
+	{
+	  const char *name = fcode == ALTIVEC_BUILTIN_VEC_ADDE ?
+	    "vec_adde": "vec_sube";
+	  error ("builtin %qs only accepts 3 arguments", name);
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+      tree arg2 = (*arglist)[2];
+      tree arg2_type = TREE_TYPE (arg2);
+
+      /* All 3 arguments must be vectors of (signed or unsigned) (int or
+	 __int128) and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
+	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
+	goto bad;
+
+      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	{
+	  /* For {un}signed ints,
+	     vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb),
+						   vec_and (carryv, 1)).
+	     vec_sube (va, vb, carryv) == vec_sub (vec_sub (va, vb),
+						   vec_and (carryv, 1)).  */
+	  case E_SImode:
+	    {
+	      tree add_sub_builtin;
+
+	      vec<tree, va_gc> *params = make_tree_vector ();
+	      vec_safe_push (params, arg0);
+	      vec_safe_push (params, arg1);
+
+	      if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
+		add_sub_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD];
+	      else
+		add_sub_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_SUB];
+
+	      tree call = altivec_resolve_overloaded_builtin (loc,
+							      add_sub_builtin,
+							      params);
+	      tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1);
+	      tree ones_vector = build_vector_from_val (arg0_type, const1);
+	      tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type,
+					       arg2, ones_vector);
+	      params = make_tree_vector ();
+	      vec_safe_push (params, call);
+	      vec_safe_push (params, and_expr);
+	      return altivec_resolve_overloaded_builtin (loc, add_sub_builtin,
+							 params);
+	    }
+	  /* For {un}signed __int128s use the vaddeuqm instruction
+		directly.  */
+	  case E_TImode:
+	    {
+	       tree bii;
+
+	       if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDEUQM];
+
+	       else
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VSUBEUQM];
+
+	       return altivec_resolve_overloaded_builtin (loc, bii, arglist);
+	    }
+
+	  /* Types other than {un}signed int and {un}signed __int128
+		are errors.  */
+	  default:
+	    goto bad;
+	}
+    }
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC
+      || fcode == ALTIVEC_BUILTIN_VEC_SUBEC)
+    {
+      /* vec_addec and vec_subec needs to be special cased because there is
+	 no instruction for the {un}signed int version.  */
+      if (nargs != 3)
+	{
+	  const char *name = fcode == ALTIVEC_BUILTIN_VEC_ADDEC ?
+	    "vec_addec": "vec_subec";
+	  error ("builtin %qs only accepts 3 arguments", name);
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+      tree arg2 = (*arglist)[2];
+      tree arg2_type = TREE_TYPE (arg2);
+
+      /* All 3 arguments must be vectors of (signed or unsigned) (int or
+	 __int128) and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
+	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
+	goto bad;
+
+      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	{
+	  /* For {un}signed ints,
+	      vec_addec (va, vb, carryv) ==
+				vec_or (vec_addc (va, vb),
+					vec_addc (vec_add (va, vb),
+						  vec_and (carryv, 0x1))).  */
+	  case E_SImode:
+	    {
+	    /* Use save_expr to ensure that operands used more than once
+		that may have side effects (like calls) are only evaluated
+		once.  */
+	    tree as_builtin;
+	    tree as_c_builtin;
+
+	    arg0 = save_expr (arg0);
+	    arg1 = save_expr (arg1);
+	    vec<tree, va_gc> *params = make_tree_vector ();
+	    vec_safe_push (params, arg0);
+	    vec_safe_push (params, arg1);
+
+	    if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+	      as_c_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADDC];
+	    else
+	      as_c_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_SUBC];
+
+	    tree call1 = altivec_resolve_overloaded_builtin (loc, as_c_builtin,
+							     params);
+	    params = make_tree_vector ();
+	    vec_safe_push (params, arg0);
+	    vec_safe_push (params, arg1);
+
+
+	    if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+	      as_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD];
+	    else
+	      as_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_SUB];
+
+	    tree call2 = altivec_resolve_overloaded_builtin (loc, as_builtin,
+							     params);
+	    tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1);
+	    tree ones_vector = build_vector_from_val (arg0_type, const1);
+	    tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type,
+					     arg2, ones_vector);
+	    params = make_tree_vector ();
+	    vec_safe_push (params, call2);
+	    vec_safe_push (params, and_expr);
+	    call2 = altivec_resolve_overloaded_builtin (loc, as_c_builtin,
+							params);
+	    params = make_tree_vector ();
+	    vec_safe_push (params, call1);
+	    vec_safe_push (params, call2);
+	    tree or_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_OR];
+	    return altivec_resolve_overloaded_builtin (loc, or_builtin,
+						       params);
+	    }
+	  /* For {un}signed __int128s use the vaddecuq/vsubbecuq
+	     instructions.  */
+	  case E_TImode:
+	    {
+	       tree bii;
+
+	       if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDECUQ];
+
+	       else
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VSUBECUQ];
+
+	       return altivec_resolve_overloaded_builtin (loc, bii, arglist);
+	    }
+	  /* Types other than {un}signed int and {un}signed __int128
+		are errors.  */
+	  default:
+	    goto bad;
+	}
+    }
+
+  /* For now treat vec_splats and vec_promote as the same.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS
+      || fcode == ALTIVEC_BUILTIN_VEC_PROMOTE)
+    {
+      tree type, arg;
+      int size;
+      int i;
+      bool unsigned_p;
+      vec<constructor_elt, va_gc> *vec;
+      const char *name = fcode == ALTIVEC_BUILTIN_VEC_SPLATS ? "vec_splats": "vec_promote";
+
+      if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS && nargs != 1)
+	{
+	  error ("builtin %qs only accepts 1 argument", name);
+	  return error_mark_node;
+	}
+      if (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE && nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", name);
+	  return error_mark_node;
+	}
+      /* Ignore promote's element argument.  */
+      if (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE
+	  && !INTEGRAL_TYPE_P (TREE_TYPE ((*arglist)[1])))
+	goto bad;
+
+      arg = (*arglist)[0];
+      type = TREE_TYPE (arg);
+      if (!SCALAR_FLOAT_TYPE_P (type)
+	  && !INTEGRAL_TYPE_P (type))
+	goto bad;
+      unsigned_p = TYPE_UNSIGNED (type);
+      switch (TYPE_MODE (type))
+	{
+	  case E_TImode:
+	    type = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
+	    size = 1;
+	    break;
+	  case E_DImode:
+	    type = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
+	    size = 2;
+	    break;
+	  case E_SImode:
+	    type = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
+	    size = 4;
+	    break;
+	  case E_HImode:
+	    type = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
+	    size = 8;
+	    break;
+	  case E_QImode:
+	    type = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
+	    size = 16;
+	    break;
+	  case E_SFmode: type = V4SF_type_node; size = 4; break;
+	  case E_DFmode: type = V2DF_type_node; size = 2; break;
+	  default:
+	    goto bad;
+	}
+      arg = save_expr (fold_convert (TREE_TYPE (type), arg));
+      vec_alloc (vec, size);
+      for(i = 0; i < size; i++)
+	{
+	  constructor_elt elt = {NULL_TREE, arg};
+	  vec->quick_push (elt);
+	}
+	return build_constructor (type, vec);
+    }
+
+  /* For now use pointer tricks to do the extraction, unless we are on VSX
+     extracting a double from a constant offset.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_EXTRACT)
+    {
+      tree arg1;
+      tree arg1_type;
+      tree arg2;
+      tree arg1_inner_type;
+      tree decl, stmt;
+      tree innerptrtype;
+      machine_mode mode;
+
+      /* No second argument. */
+      if (nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", "vec_extract");
+	  return error_mark_node;
+	}
+
+      arg2 = (*arglist)[1];
+      arg1 = (*arglist)[0];
+      arg1_type = TREE_TYPE (arg1);
+
+      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
+	goto bad;
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
+	goto bad;
+
+      /* See if we can optimize vec_extracts with the current VSX instruction
+	 set.  */
+      mode = TYPE_MODE (arg1_type);
+      if (VECTOR_MEM_VSX_P (mode))
+
+	{
+	  tree call = NULL_TREE;
+	  int nunits = GET_MODE_NUNITS (mode);
+
+	  arg2 = fold_for_warn (arg2);
+
+	  /* If the second argument is an integer constant, generate
+	     the built-in code if we can.  We need 64-bit and direct
+	     move to extract the small integer vectors.  */
+	  if (TREE_CODE (arg2) == INTEGER_CST)
+	    {
+	      wide_int selector = wi::to_wide (arg2);
+	      selector = wi::umod_trunc (selector, nunits);
+	      arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case E_V1TImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V1TI];
+		  break;
+
+		case E_V2DFmode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
+		  break;
+
+		case E_V2DImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
+		  break;
+
+		case E_V4SFmode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SF];
+		  break;
+
+		case E_V4SImode:
+		  if (TARGET_DIRECT_MOVE_64BIT)
+		    call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SI];
+		  break;
+
+		case E_V8HImode:
+		  if (TARGET_DIRECT_MOVE_64BIT)
+		    call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V8HI];
+		  break;
+
+		case E_V16QImode:
+		  if (TARGET_DIRECT_MOVE_64BIT)
+		    call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V16QI];
+		  break;
+		}
+	    }
+
+	  /* If the second argument is variable, we can optimize it if we are
+	     generating 64-bit code on a machine with direct move.  */
+	  else if (TREE_CODE (arg2) != INTEGER_CST && TARGET_DIRECT_MOVE_64BIT)
+	    {
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case E_V2DFmode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
+		  break;
+
+		case E_V2DImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
+		  break;
+
+		case E_V4SFmode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SF];
+		  break;
+
+		case E_V4SImode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SI];
+		  break;
+
+		case E_V8HImode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V8HI];
+		  break;
+
+		case E_V16QImode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V16QI];
+		  break;
+		}
+	    }
+
+	  if (call)
+	    {
+	      tree result = build_call_expr (call, 2, arg1, arg2);
+	      /* Coerce the result to vector element type.  May be no-op.  */
+	      arg1_inner_type = TREE_TYPE (arg1_type);
+	      result = fold_convert (arg1_inner_type, result);
+	      return result;
+	    }
+	}
+
+      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */
+      arg1_inner_type = TREE_TYPE (arg1_type);
+      arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
+			      build_int_cst (TREE_TYPE (arg2),
+					     TYPE_VECTOR_SUBPARTS (arg1_type)
+					     - 1), 0);
+      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
+      DECL_EXTERNAL (decl) = 0;
+      TREE_PUBLIC (decl) = 0;
+      DECL_CONTEXT (decl) = current_function_decl;
+      TREE_USED (decl) = 1;
+      TREE_TYPE (decl) = arg1_type;
+      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  TREE_ADDRESSABLE (decl) = 1;
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
+
+      innerptrtype = build_pointer_type (arg1_inner_type);
+
+      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
+      stmt = convert (innerptrtype, stmt);
+      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
+      stmt = build_indirect_ref (loc, stmt, RO_NULL);
+
+      /* PR83660: We mark this as having side effects so that
+	 downstream in fold_build_cleanup_point_expr () it will get a
+	 CLEANUP_POINT_EXPR.  If it does not we can run into an ICE
+	 later in gimplify_cleanup_point_expr ().  Potentially this
+	 causes missed optimization because the actually is no side
+	 effect.  */
+      if (c_dialect_cxx ())
+	TREE_SIDE_EFFECTS (stmt) = 1;
+
+      return stmt;
+    }
+
+  /* For now use pointer tricks to do the insertion, unless we are on VSX
+     inserting a double to a constant offset..  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_INSERT)
+    {
+      tree arg0;
+      tree arg1;
+      tree arg2;
+      tree arg1_type;
+      tree arg1_inner_type;
+      tree decl, stmt;
+      tree innerptrtype;
+      machine_mode mode;
+
+      /* No second or third arguments. */
+      if (nargs != 3)
+	{
+	  error ("builtin %qs only accepts 3 arguments", "vec_insert");
+	  return error_mark_node;
+	}
+
+      arg0 = (*arglist)[0];
+      arg1 = (*arglist)[1];
+      arg1_type = TREE_TYPE (arg1);
+      arg2 = fold_for_warn ((*arglist)[2]);
+
+      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
+	goto bad;
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
+	goto bad;
+
+      /* If we can use the VSX xxpermdi instruction, use that for insert.  */
+      mode = TYPE_MODE (arg1_type);
+      if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode)
+	  && TREE_CODE (arg2) == INTEGER_CST)
+	{
+	  wide_int selector = wi::to_wide (arg2);
+	  selector = wi::umod_trunc (selector, 2);
+	  tree call = NULL_TREE;
+
+	  arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
+	  if (mode == V2DFmode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DF];
+	  else if (mode == V2DImode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DI];
+
+	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+	     reversed.  */
+	  if (call)
+	    return build_call_expr (call, 3, arg1, arg0, arg2);
+	}
+      else if (mode == V1TImode && VECTOR_UNIT_VSX_P (mode)
+	       && TREE_CODE (arg2) == INTEGER_CST)
+	{
+	  tree call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V1TI];
+	  wide_int selector = wi::zero(32);
+
+	  arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
+	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+	     reversed.  */
+	  return build_call_expr (call, 3, arg1, arg0, arg2);
+	}
+
+      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */
+      arg1_inner_type = TREE_TYPE (arg1_type);
+      if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1)
+	arg2 = build_int_cst (TREE_TYPE (arg2), 0);
+      else
+	arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
+				build_int_cst (TREE_TYPE (arg2),
+					       TYPE_VECTOR_SUBPARTS (arg1_type)
+					       - 1), 0);
+      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
+      DECL_EXTERNAL (decl) = 0;
+      TREE_PUBLIC (decl) = 0;
+      DECL_CONTEXT (decl) = current_function_decl;
+      TREE_USED (decl) = 1;
+      TREE_TYPE (decl) = arg1_type;
+      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  TREE_ADDRESSABLE (decl) = 1;
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
+
+      innerptrtype = build_pointer_type (arg1_inner_type);
+
+      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
+      stmt = convert (innerptrtype, stmt);
+      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
+      stmt = build_indirect_ref (loc, stmt, RO_NULL);
+      stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt,
+		     convert (TREE_TYPE (stmt), arg0));
+      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
+      return stmt;
+    }
+
+  for (n = 0;
+       !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
+       fnargs = TREE_CHAIN (fnargs), n++)
+    {
+      tree decl_type = TREE_VALUE (fnargs);
+      tree arg = (*arglist)[n];
+      tree type;
+
+      if (arg == error_mark_node)
+	return error_mark_node;
+
+      if (n >= 4)
+	abort ();
+
+      arg = default_conversion (arg);
+
+      /* The C++ front-end converts float * to const void * using
+	 NOP_EXPR<const void *> (NOP_EXPR<void *> (x)).  */
+      type = TREE_TYPE (arg);
+      if (POINTER_TYPE_P (type)
+	  && TREE_CODE (arg) == NOP_EXPR
+	  && lang_hooks.types_compatible_p (TREE_TYPE (arg),
+					    const_ptr_type_node)
+	  && lang_hooks.types_compatible_p (TREE_TYPE (TREE_OPERAND (arg, 0)),
+					    ptr_type_node))
+	{
+	  arg = TREE_OPERAND (arg, 0);
+	  type = TREE_TYPE (arg);
+	}
+
+      /* Remove the const from the pointers to simplify the overload
+	 matching further down.  */
+      if (POINTER_TYPE_P (decl_type)
+	  && POINTER_TYPE_P (type)
+	  && TYPE_QUALS (TREE_TYPE (type)) != 0)
+	{
+	  if (TYPE_READONLY (TREE_TYPE (type))
+	      && !TYPE_READONLY (TREE_TYPE (decl_type)))
+	    warning (0, "passing argument %d of %qE discards qualifiers from "
+		     "pointer target type", n + 1, fndecl);
+	  type = build_pointer_type (build_qualified_type (TREE_TYPE (type),
+							   0));
+	  arg = fold_convert (type, arg);
+	}
+
+      /* For P9V_BUILTIN_VEC_LXVL, convert any const * to its non constant
+	 equivalent to simplify the overload matching below.  */
+      if (fcode == P9V_BUILTIN_VEC_LXVL)
+	{
+	  if (POINTER_TYPE_P (type)
+	      && TYPE_READONLY (TREE_TYPE (type)))
+	    {
+	      type = build_pointer_type (build_qualified_type (
+						TREE_TYPE (type),0));
+	      arg = fold_convert (type, arg);
+	    }
+	}
+
+      args[n] = arg;
+      types[n] = type;
+    }
+
+  /* If the number of arguments did not match the prototype, return NULL
+     and the generic code will issue the appropriate error message.  */
+  if (!VOID_TYPE_P (TREE_VALUE (fnargs)) || n < nargs)
+    return NULL;
+
+  if (n == 0)
+    abort ();
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_STEP)
+    {
+      if (TREE_CODE (types[0]) != VECTOR_TYPE)
+	goto bad;
+
+      return build_int_cst (NULL_TREE, TYPE_VECTOR_SUBPARTS (types[0]));
+    }
+
+  {
+    bool unsupported_builtin = false;
+    enum rs6000_builtins overloaded_code;
+    tree result = NULL;
+    for (desc = altivec_overloaded_builtins;
+	 desc->code && desc->code != fcode; desc++)
+      continue;
+
+    /* Need to special case __builtin_cmp because the overloaded forms
+       of this function take (unsigned int, unsigned int) or (unsigned
+       long long int, unsigned long long int).  Since C conventions
+       allow the respective argument types to be implicitly coerced into
+       each other, the default handling does not provide adequate
+       discrimination between the desired forms of the function.  */
+    if (fcode == P6_OV_BUILTIN_CMPB)
+      {
+	machine_mode arg1_mode = TYPE_MODE (types[0]);
+	machine_mode arg2_mode = TYPE_MODE (types[1]);
+
+	if (nargs != 2)
+	  {
+	    error ("builtin %qs only accepts 2 arguments", "__builtin_cmpb");
+	    return error_mark_node;
+	  }
+
+	/* If any supplied arguments are wider than 32 bits, resolve to
+	   64-bit variant of built-in function.  */
+	if ((GET_MODE_PRECISION (arg1_mode) > 32)
+	    || (GET_MODE_PRECISION (arg2_mode) > 32))
+	  {
+	    /* Assure all argument and result types are compatible with
+	       the built-in function represented by P6_BUILTIN_CMPB.  */
+	    overloaded_code = P6_BUILTIN_CMPB;
+	  }
+	else
+	  {
+	    /* Assure all argument and result types are compatible with
+	       the built-in function represented by P6_BUILTIN_CMPB_32.  */
+	    overloaded_code = P6_BUILTIN_CMPB_32;
+	  }
+
+	while (desc->code && desc->code == fcode
+	       && desc->overloaded_code != overloaded_code)
+	  desc++;
+
+	if (desc->code && (desc->code == fcode)
+	    && rs6000_builtin_type_compatible (types[0], desc->op1)
+	    && rs6000_builtin_type_compatible (types[1], desc->op2))
+	  {
+	    if (rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
+	      {
+		result = altivec_build_resolved_builtin (args, n, desc);
+		/* overloaded_code is set above */
+		if (!rs6000_builtin_is_supported_p (overloaded_code))
+		  unsupported_builtin = true;
+		else
+		  return result;
+	      }
+	    else
+	      unsupported_builtin = true;
+	  }
+      }
+    else if (fcode == P9V_BUILTIN_VEC_VSIEDP)
+      {
+	machine_mode arg1_mode = TYPE_MODE (types[0]);
+
+	if (nargs != 2)
+	  {
+	    error ("builtin %qs only accepts 2 arguments",
+		   "scalar_insert_exp");
+	    return error_mark_node;
+	  }
+
+	/* If supplied first argument is wider than 64 bits, resolve to
+	   128-bit variant of built-in function.  */
+	if (GET_MODE_PRECISION (arg1_mode) > 64)
+	  {
+	    /* If first argument is of float variety, choose variant
+	       that expects __ieee128 argument.  Otherwise, expect
+	       __int128 argument.  */
+	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
+	      overloaded_code = P9V_BUILTIN_VSIEQPF;
+	    else
+	      overloaded_code = P9V_BUILTIN_VSIEQP;
+	  }
+	else
+	  {
+	    /* If first argument is of float variety, choose variant
+	       that expects double argument.  Otherwise, expect
+	       long long int argument.  */
+	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
+	      overloaded_code = P9V_BUILTIN_VSIEDPF;
+	    else
+	      overloaded_code = P9V_BUILTIN_VSIEDP;
+	  }
+	while (desc->code && desc->code == fcode
+	       && desc->overloaded_code != overloaded_code)
+	  desc++;
+
+	if (desc->code && (desc->code == fcode)
+	    && rs6000_builtin_type_compatible (types[0], desc->op1)
+	    && rs6000_builtin_type_compatible (types[1], desc->op2))
+	  {
+	    if (rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
+	      {
+		result = altivec_build_resolved_builtin (args, n, desc);
+		/* overloaded_code is set above.  */
+		if (!rs6000_builtin_is_supported_p (overloaded_code))
+		  unsupported_builtin = true;
+		else
+		  return result;
+	      }
+	    else
+	      unsupported_builtin = true;
+	  }
+      }
+    else if ((fcode == P10_BUILTIN_VEC_XXEVAL)
+	    || (fcode == P10V_BUILTIN_VXXPERMX))
+      {
+	signed char op3_type;
+
+	/* Need to special case P10_BUILTIN_VEC_XXEVAL and
+	   P10V_BUILTIN_VXXPERMX because they take 4 arguments and the
+	   existing infrastructure only handles three.  */
+	if (nargs != 4)
+	  {
+	    const char *name = fcode == P10_BUILTIN_VEC_XXEVAL ?
+	      "__builtin_vec_xxeval":"__builtin_vec_xxpermx";
+
+	    error ("builtin %qs requires 4 arguments", name);
+	    return error_mark_node;
+	  }
+
+	for ( ; desc->code == fcode; desc++)
+	  {
+	    if (fcode == P10_BUILTIN_VEC_XXEVAL)
+	      op3_type = desc->op3;
+	    else  /* P10V_BUILTIN_VXXPERMX */
+	      op3_type = RS6000_BTI_V16QI;
+
+	    if (rs6000_builtin_type_compatible (types[0], desc->op1)
+		&& rs6000_builtin_type_compatible (types[1], desc->op2)
+		&& rs6000_builtin_type_compatible (types[2], desc->op3)
+		&& rs6000_builtin_type_compatible (types[2], op3_type)
+		&& rs6000_builtin_type_compatible (types[3],
+						   RS6000_BTI_UINTSI))
+	      {
+		if (rs6000_builtin_decls[desc->overloaded_code] == NULL_TREE)
+		  unsupported_builtin = true;
+		else
+		  {
+		    result = altivec_build_resolved_builtin (args, n, desc);
+		    if (rs6000_builtin_is_supported_p (desc->overloaded_code))
+		      return result;
+		    /* Allow loop to continue in case a different
+		       definition is supported.  */
+		    overloaded_code = desc->overloaded_code;
+		    unsupported_builtin = true;
+		  }
+	      }
+	  }
+      }
+    else
+      {
+	/* For arguments after the last, we have RS6000_BTI_NOT_OPAQUE in
+	   the opX fields.  */
+	for (; desc->code == fcode; desc++)
+	  {
+	    if ((desc->op1 == RS6000_BTI_NOT_OPAQUE
+		 || rs6000_builtin_type_compatible (types[0], desc->op1))
+		&& (desc->op2 == RS6000_BTI_NOT_OPAQUE
+		    || rs6000_builtin_type_compatible (types[1], desc->op2))
+		&& (desc->op3 == RS6000_BTI_NOT_OPAQUE
+		    || rs6000_builtin_type_compatible (types[2], desc->op3)))
+	      {
+		if (rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
+		  {
+		    result = altivec_build_resolved_builtin (args, n, desc);
+		    if (!rs6000_builtin_is_supported_p (desc->overloaded_code))
+		      {
+			/* Allow loop to continue in case a different
+			   definition is supported.  */
+			overloaded_code = desc->overloaded_code;
+			unsupported_builtin = true;
+		      }
+		    else
+		      return result;
+		  }
+		else
+		  unsupported_builtin = true;
+	      }
+	  }
+      }
+
+    if (unsupported_builtin)
+      {
+	const char *name = rs6000_overloaded_builtin_name (fcode);
+	if (result != NULL)
+	  {
+	    const char *internal_name
+	      = rs6000_overloaded_builtin_name (overloaded_code);
+	    /* An error message making reference to the name of the
+	       non-overloaded function has already been issued.  Add
+	       clarification of the previous message.  */
+	    rich_location richloc (line_table, input_location);
+	    inform (&richloc, "builtin %qs requires builtin %qs",
+		    name, internal_name);
+	  }
+	else
+	  error ("%qs is not supported in this compiler configuration", name);
+	/* If an error-representing  result tree was returned from
+	   altivec_build_resolved_builtin above, use it.  */
+	return (result != NULL) ? result : error_mark_node;
+      }
+  }
+ bad:
+  {
+    const char *name = rs6000_overloaded_builtin_name (fcode);
+    error ("invalid parameter combination for AltiVec intrinsic %qs", name);
+    return error_mark_node;
+  }
+}
+
+/* Build a tree for a function call to an Altivec non-overloaded builtin.
+   The overloaded builtin that matched the types and args is described
+   by DESC.  The N arguments are given in ARGS, respectively.
+
+   Actually the only thing it does is calling fold_convert on ARGS, with
+   a small exception for vec_{all,any}_{ge,le} predicates. */
+
+static tree
+altivec_build_new_resolved_builtin (tree *args, int n,
+				    const struct altivec_builtin_types *desc)
+{
+  tree impl_fndecl = rs6000_builtin_decls[desc->overloaded_code];
+  tree ret_type = rs6000_builtin_type (desc->ret_type);
+  tree argtypes = TYPE_ARG_TYPES (TREE_TYPE (impl_fndecl));
+  tree arg_type[4];
+  tree call;
+
+  int i;
+  for (i = 0; i < n; i++)
+    arg_type[i] = TREE_VALUE (argtypes), argtypes = TREE_CHAIN (argtypes);
+
+  /* The AltiVec overloading implementation is overall gross, but this
+     is particularly disgusting.  The vec_{all,any}_{ge,le} builtins
+     are completely different for floating-point vs. integer vector
+     types, because the former has vcmpgefp, but the latter should use
+     vcmpgtXX.
+
+     In practice, the second and third arguments are swapped, and the
+     condition (LT vs. EQ, which is recognizable by bit 1 of the first
+     argument) is reversed.  Patch the arguments here before building
+     the resolved CALL_EXPR.  */
+  if (n == 3
+      && desc->code == ALTIVEC_BUILTIN_VEC_VCMPGE_P
+      && desc->overloaded_code != ALTIVEC_BUILTIN_VCMPGEFP_P
+      && desc->overloaded_code != VSX_BUILTIN_XVCMPGEDP_P)
+    {
+      std::swap (args[1], args[2]);
+      std::swap (arg_type[1], arg_type[2]);
+
+      args[0] = fold_build2 (BIT_XOR_EXPR, TREE_TYPE (args[0]), args[0],
+			     build_int_cst (NULL_TREE, 2));
+    }
+
+  switch (n)
+    {
+    case 0:
+      call = build_call_expr (impl_fndecl, 0);
+      break;
+    case 1:
+      call = build_call_expr (impl_fndecl, 1,
+			      fully_fold_convert (arg_type[0], args[0]));
+      break;
+    case 2:
+      call = build_call_expr (impl_fndecl, 2,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]));
+      break;
+    case 3:
+      call = build_call_expr (impl_fndecl, 3,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]),
+			      fully_fold_convert (arg_type[2], args[2]));
+      break;
+    case 4:
+      call = build_call_expr (impl_fndecl, 4,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]),
+			      fully_fold_convert (arg_type[2], args[2]),
+			      fully_fold_convert (arg_type[3], args[3]));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return fold_convert (ret_type, call);
+}
+
+/* Implementation of the resolve_overloaded_builtin target hook, to
+   support Altivec's overloaded builtins.  */
+
+static tree
+altivec_resolve_new_overloaded_builtin (location_t loc, tree fndecl,
+					void *passed_arglist)
 {
   vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
   unsigned int nargs = vec_safe_length (arglist);
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 9be5997cf8f..84a5f03de57 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -14263,7 +14263,7 @@ rs6000_gimple_fold_new_mma_builtin (gimple_stmt_iterator *gsi,
 
       /* We're disassembling an accumulator into a different type, so we need
 	 to emit a xxmfacc instruction now, since we cannot do it later.  */
-      new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL];
+      new_decl = rs6000_builtin_decls_x[RS6000_BIF_XXMFACC_INTERNAL];
       new_call = gimple_build_call (new_decl, 1, src);
       src = make_ssa_name (vector_quad_type_node);
       gimple_call_set_lhs (new_call, src);
@@ -14292,7 +14292,7 @@ rs6000_gimple_fold_new_mma_builtin (gimple_stmt_iterator *gsi,
 
   /* Convert this built-in into an internal version that uses pass-by-value
      arguments.  The internal built-in follows immediately after this one.  */
-  new_decl = rs6000_builtin_decls[fncode + 1];
+  new_decl = rs6000_builtin_decls_x[fncode + 1];
   tree lhs, op[MAX_MMA_OPERANDS];
   tree acc = gimple_call_arg (stmt, 0);
   push_gimplify_context (true);
@@ -16070,6 +16070,7 @@ rs6000_init_builtins (void)
 }
 
 /* Returns the rs6000 builtin decl for CODE.  */
+/* #### TODO: Rewrite this.  */
 
 tree
 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c
index 9e5adbae7c7..c7f405e33fd 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -2114,6 +2114,9 @@ write_decls ()
     fprintf (header_file, "  RS6000_OVLD_%s,\n", ovlds[i].ovld_id_name);
   fprintf (header_file, "  RS6000_OVLD_MAX\n};\n\n");
 
+  fprintf (header_file,
+	   "extern tree rs6000_builtin_decls_x[RS6000_OVLD_MAX];\n\n");
+
   fprintf (header_file, "enum restriction {\n");
   fprintf (header_file, "  RES_NONE,\n");
   fprintf (header_file, "  RES_BITS,\n");
@@ -2356,13 +2359,9 @@ static int
 write_header_file ()
 {
   write_autogenerated_header (header_file);
-  fprintf (header_file, "#include \"config.h\"\n");
-  fprintf (header_file, "#include \"system.h\"\n");
-  fprintf (header_file, "#include \"coretypes.h\"\n");
-  fprintf (header_file, "#include \"backend.h\"\n");
-  fprintf (header_file, "#include \"rtl.h\"\n");
-  fprintf (header_file, "#include \"tree.h\"\n");
-  fprintf (header_file, "\n");
+
+  fprintf (header_file, "#ifndef _RS6000_BUILTINS_H\n");
+  fprintf (header_file, "#define _RS6000_BUILTINS_H 1\n\n");
   fprintf (header_file, "extern int new_builtins_are_live;\n\n");
 
   write_decls ();
@@ -2370,6 +2369,7 @@ write_header_file ()
   /* Write function type list declarators to the header file.  */
   rbt_inorder_callback (&fntype_rbt, fntype_rbt.rbt_root, write_extern_fntype);
   fprintf (header_file, "\n");
+  fprintf (header_file, "\n#endif\n");
 
   return 1;
 }
@@ -2527,7 +2527,7 @@ write_init_bif_table ()
 	       "  if (new_builtins_are_live)\n");
       fprintf (init_file, "    {\n");
       fprintf (init_file,
-	       "      rs6000_builtin_decls[(int)RS6000_BIF_%s] = t\n",
+	       "      rs6000_builtin_decls_x[(int)RS6000_BIF_%s] = t\n",
 	       bifs[i].idname);
       fprintf (init_file,
 	       "        = add_builtin_function (\"%s\",\n",
@@ -2601,7 +2601,7 @@ write_init_ovld_table ()
 	       "  if (new_builtins_are_live)\n");
       fprintf (init_file, "    {\n");
       fprintf (init_file,
-	       "      rs6000_builtin_decls[(int)RS6000_OVLD_%s] = t\n",
+	       "      rs6000_builtin_decls_x[(int)RS6000_OVLD_%s] = t\n",
 	       ovlds[i].ovld_id_name);
       fprintf (init_file,
 	       "        = add_builtin_function (\"%s\",\n",
@@ -2643,6 +2643,8 @@ write_init_file ()
   fprintf (init_file, "int new_builtins_are_live = 0;\n\n");
 #endif
 
+  fprintf (init_file, "tree rs6000_builtin_decls_x[RS6000_OVLD_MAX];\n\n");
+
   write_bif_static_init ();
   write_ovld_static_init ();
 
@@ -2692,6 +2694,13 @@ write_init_file ()
   rbt_inorder_callback (&fntype_rbt, fntype_rbt.rbt_root, write_fntype_init);
   fprintf (init_file, "\n");
 
+  fprintf (init_file,
+	   "  rs6000_builtin_decls_x[RS6000_BIF_NONE] = NULL_TREE;\n");
+  fprintf (init_file,
+	   "  rs6000_builtin_decls_x[RS6000_BIF_MAX] = NULL_TREE;\n");
+  fprintf (init_file,
+	   "  rs6000_builtin_decls_x[RS6000_OVLD_NONE] = NULL_TREE;\n\n");
+
   write_init_bif_table ();
   write_init_ovld_table ();
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index ec068c58aa5..fb512fd9f0f 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -77,6 +77,7 @@
 #include "case-cfn-macros.h"
 #include "ppc-auxv.h"
 #include "rs6000-internal.h"
+#include "rs6000-builtins.h"
 #include "opts.h"
 
 /* This file should be included last.  */
@@ -5424,6 +5425,198 @@ rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
   return nunroll;
 }
 
+/* Returns a function decl for a vectorized version of the builtin function
+   with builtin function code FN and the result vector type TYPE, or NULL_TREE
+   if it is not available.  */
+
+static tree
+rs6000_new_builtin_vectorized_function (unsigned int fn, tree type_out,
+					tree type_in)
+{
+  machine_mode in_mode, out_mode;
+  int in_n, out_n;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr, "rs6000_new_builtin_vectorized_function (%s, %s, %s)\n",
+	     combined_fn_name (combined_fn (fn)),
+	     GET_MODE_NAME (TYPE_MODE (type_out)),
+	     GET_MODE_NAME (TYPE_MODE (type_in)));
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE)
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+  switch (fn)
+    {
+    CASE_CFN_COPYSIGN:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_CPSGNDP];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_CPSGNSP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_COPYSIGN_V4SF];
+      break;
+    CASE_CFN_CEIL:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIP];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRFIP];
+      break;
+    CASE_CFN_FLOOR:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIM];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIM];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRFIM];
+      break;
+    CASE_CFN_FMA:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVMADDDP];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVMADDSP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VMADDFP];
+      break;
+    CASE_CFN_TRUNC:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIZ];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIZ];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRFIZ];
+      break;
+    CASE_CFN_NEARBYINT:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && flag_unsafe_math_optimizations
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPI];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && flag_unsafe_math_optimizations
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPI];
+      break;
+    CASE_CFN_RINT:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && !flag_trapping_math
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIC];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && !flag_trapping_math
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIC];
+      break;
+    default:
+      break;
+    }
+
+  /* Generate calls to libmass if appropriate.  */
+  if (rs6000_veclib_handler)
+    return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
+
+  return NULL_TREE;
+}
+
+/* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION.  */
+
+static tree
+rs6000_new_builtin_md_vectorized_function (tree fndecl, tree type_out,
+					   tree type_in)
+{
+  machine_mode in_mode, out_mode;
+  int in_n, out_n;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr,
+	     "rs6000_new_builtin_md_vectorized_function (%s, %s, %s)\n",
+	     IDENTIFIER_POINTER (DECL_NAME (fndecl)),
+	     GET_MODE_NAME (TYPE_MODE (type_out)),
+	     GET_MODE_NAME (TYPE_MODE (type_in)));
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE)
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+  enum rs6000_builtins fn
+    = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
+  switch (fn)
+    {
+    case RS6000_BUILTIN_RSQRTF:
+      if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRSQRTFP];
+      break;
+    case RS6000_BUILTIN_RSQRT:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_RSQRT_2DF];
+      break;
+    case RS6000_BUILTIN_RECIPF:
+      if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRECIPFP];
+      break;
+    case RS6000_BUILTIN_RECIP:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_RECIP_V2DF];
+      break;
+    default:
+      break;
+    }
+  return NULL_TREE;
+}
+
 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
    library with vectorized intrinsics.  */
 
@@ -5543,6 +5736,9 @@ rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
   machine_mode in_mode, out_mode;
   int in_n, out_n;
 
+  if (new_builtins_are_live)
+    return rs6000_new_builtin_vectorized_function (fn, type_out, type_in);
+
   if (TARGET_DEBUG_BUILTIN)
     fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
 	     combined_fn_name (combined_fn (fn)),
@@ -5674,6 +5870,10 @@ rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
   machine_mode in_mode, out_mode;
   int in_n, out_n;
 
+  if (new_builtins_are_live)
+    return rs6000_new_builtin_md_vectorized_function (fndecl, type_out,
+						      type_in);
+
   if (TARGET_DEBUG_BUILTIN)
     fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
 	     IDENTIFIER_POINTER (DECL_NAME (fndecl)),
@@ -22316,12 +22516,16 @@ rs6000_builtin_reciprocal (tree fndecl)
       if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
 	return NULL_TREE;
 
+      if (new_builtins_are_live)
+	return rs6000_builtin_decls_x[RS6000_BIF_RSQRT_2DF];
       return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
 
     case VSX_BUILTIN_XVSQRTSP:
       if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
 	return NULL_TREE;
 
+      if (new_builtins_are_live)
+	return rs6000_builtin_decls_x[RS6000_BIF_RSQRT_4SF];
       return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
 
     default:
@@ -24884,7 +25088,10 @@ add_condition_to_bb (tree function_decl, tree version_decl,
 
   tree bool_zero = build_int_cst (bool_int_type_node, 0);
   tree cond_var = create_tmp_var (bool_int_type_node);
-  tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
+  tree predicate_decl
+    = (new_builtins_are_live
+       ? rs6000_builtin_decls_x[(int) RS6000_BIF_CPU_SUPPORTS]
+       : rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS]);
   const char *arg_str = rs6000_clone_map[clone_isa].name;
   tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
   gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
@@ -27044,8 +27251,14 @@ rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
       return;
     }
 
-  tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
-  tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
+  tree mffs
+    = (new_builtins_are_live
+       ? rs6000_builtin_decls_x[RS6000_BIF_MFFS]
+       : rs6000_builtin_decls[RS6000_BUILTIN_MFFS]);
+  tree mtfsf
+    = (new_builtins_are_live
+       ? rs6000_builtin_decls_x[RS6000_BIF_MTFSF]
+       : rs6000_builtin_decls[RS6000_BUILTIN_MTFSF]);
   tree call_mffs = build_call_expr (mffs, 0);
 
   /* Generates the equivalent of feholdexcept (&fenv_var)
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index da1630175e3..6634b531ead 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -23,10 +23,6 @@ TM_H += $(srcdir)/config/rs6000/rs6000-cpus.def
 TM_H += $(srcdir)/config/rs6000/rs6000-modes.h
 PASSES_EXTRA += $(srcdir)/config/rs6000/rs6000-passes.def
 
-rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c
-	$(COMPILE) $<
-	$(POSTCOMPILE)
-
 rs6000-string.o: $(srcdir)/config/rs6000/rs6000-string.c
 	$(COMPILE) $<
 	$(POSTCOMPILE)
@@ -79,6 +75,19 @@ rs6000-call.o: $(srcdir)/config/rs6000/rs6000-call.c rs6000-builtins.c
 	$(COMPILE) $<
 	$(POSTCOMPILE)
 
+rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c rs6000-builtins.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
+# We need a dependency from rs6000.c on rs6000-builtins.h, but
+# apparently rs6000.c is automatically generated, so not sure
+# what to do about this.  #### TODO.  The following "works" but
+# generates a warning about ignoring the old recipe for rs6000.o.
+# Better to try to figure out what that recipe is...
+#rs6000.o: $(srcdir)/config/rs6000/rs6000.c rs6000-builtins.c
+#	$(COMPILE) $<
+#	$(POSTCOMPILE)
+
 #$(srcdir)/config/rs6000/fusion.md: $(srcdir)/config/rs6000/genfusion.pl
 #	$(srcdir)/config/rs6000/genfusion.pl > $(srcdir)/config/rs6000/fusion.md


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins4)] rs6000: Introduce rs6000_builtin_decls_x
@ 2020-12-16 18:09 William Schmidt
  0 siblings, 0 replies; 4+ messages in thread
From: William Schmidt @ 2020-12-16 18:09 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:e79cb1f037d50b29ec63fc34a946b9a29e7e8cb4

commit e79cb1f037d50b29ec63fc34a946b9a29e7e8cb4
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Fri Nov 20 13:35:44 2020 -0600

    rs6000: Introduce rs6000_builtin_decls_x
    
    2020-11-20  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-c.c (rs6000-builtins.h): New #include.
            (altivec_build_new_resolved_builtin): New forward decl.
            (altivec_resolve_new_overloaded_builtin): New forward decl.
            (altivec_build_resolved_builtin): Call
            altivec_build_new_resolved_builtin.
            (altivec_resolve_overloaded_builtin): Call
            altivec_resolve_new_overloaded_builtin.
            (altivec_build_new_resolved_builtin): New function.
            (altivec_resolve_new_overloaded_builtin): Likewise.
            * config/rs6000/rs6000-call.c
            (rs6000_gimple_fold_new_mma_builtin): Use rs6000_builtin_decls_x.
            (rs6000_builtin_decl): Flag for later rewrite.
            * config/rs6000/rs6000-gen-builtins.c (write_decls): Generate decl
            for rs6000_builtin_decls_x.
            (write_header_file): Don't generate includes; add logic to avoid
            double-includes.
            (write_init_bif_table): Generate definition for
            rs6000_builtin_decls_x and preinitialize unused slots.
            * config/rs6000/rs6000.c (rs6000-builtins.h): New #include.
            (rs6000_new_builtin_vectorized_function): New function.
            (rs6000_new_builtin_md_vectorized_function): Likewise.
            (rs6000_builtin_vectorized_function): Call
            rs6000_new_builtin_vectorized_function.
            (rs6000_builtin_md_vectorized_function): Call
            rs6000_new_builtin_md_vectorized_function.
            (rs6000_builtin_reciprocal): Use rs6000_builtin_decls_x.
            (add_condition_to_bb): Likewise.
            (rs6000_atomic_assign_expand_fenv): Likewise.
            * config/rs6000/t-rs6000 (rs6000-c.o): Add dependency on
            rs6000-builtins.c.
            (rs6000.o): Add note about problem to be solved later.

Diff:
---
 gcc/config/rs6000/rs6000-c.c            | 1086 +++++++++++++++++++++++++++++++
 gcc/config/rs6000/rs6000-call.c         |    5 +-
 gcc/config/rs6000/rs6000-gen-builtins.c |   27 +-
 gcc/config/rs6000/rs6000.c              |  219 ++++++-
 gcc/config/rs6000/t-rs6000              |   18 +-
 5 files changed, 1337 insertions(+), 18 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index cc1e997524e..c6f0b88ac16 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -35,6 +35,14 @@
 #include "langhooks.h"
 #include "c/c-tree.h"
 
+#include "rs6000-builtins.h"
+
+
+static tree
+altivec_build_new_resolved_builtin (tree *, int,
+				    const struct altivec_builtin_types *);
+static tree
+altivec_resolve_new_overloaded_builtin (location_t, tree, void *);
 
 
 /* Handle the machine specific pragma longcall.  Its syntax is
@@ -850,6 +858,9 @@ static tree
 altivec_build_resolved_builtin (tree *args, int n,
 				const struct altivec_builtin_types *desc)
 {
+  if (new_builtins_are_live)
+    return altivec_build_new_resolved_builtin (args, n, desc);
+
   tree impl_fndecl = rs6000_builtin_decls[desc->overloaded_code];
   tree ret_type = rs6000_builtin_type (desc->ret_type);
   tree argtypes = TYPE_ARG_TYPES (TREE_TYPE (impl_fndecl));
@@ -921,6 +932,1081 @@ altivec_build_resolved_builtin (tree *args, int n,
 tree
 altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 				    void *passed_arglist)
+{
+  if (new_builtins_are_live)
+    return altivec_resolve_new_overloaded_builtin (loc, fndecl,
+						   passed_arglist);
+
+  vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
+  unsigned int nargs = vec_safe_length (arglist);
+  enum rs6000_builtins fcode
+    = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
+  tree fnargs = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
+  tree types[4], args[4];
+  const struct altivec_builtin_types *desc;
+  unsigned int n;
+
+  if (!rs6000_overloaded_builtin_p (fcode))
+    return NULL_TREE;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr, "altivec_resolve_overloaded_builtin, code = %4d, %s\n",
+	     (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
+
+  /* vec_lvsl and vec_lvsr are deprecated for use with LE element order.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_LVSL && !BYTES_BIG_ENDIAN)
+    warning (OPT_Wdeprecated,
+	     "%<vec_lvsl%> is deprecated for little endian; use "
+	     "assignment for unaligned loads and stores");
+  else if (fcode == ALTIVEC_BUILTIN_VEC_LVSR && !BYTES_BIG_ENDIAN)
+    warning (OPT_Wdeprecated,
+	     "%<vec_lvsr%> is deprecated for little endian; use "
+	     "assignment for unaligned loads and stores");
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_MUL)
+    {
+      /* vec_mul needs to be special cased because there are no instructions
+	 for it for the {un}signed char, {un}signed short, and {un}signed int
+	 types.  */
+      if (nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", "vec_mul");
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+
+      /* Both arguments must be vectors and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
+	goto bad;
+
+      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	{
+	  case E_QImode:
+	  case E_HImode:
+	  case E_SImode:
+	  case E_DImode:
+	  case E_TImode:
+	    {
+	      /* For scalar types just use a multiply expression.  */
+	      return fold_build2_loc (loc, MULT_EXPR, TREE_TYPE (arg0), arg0,
+				      fold_convert (TREE_TYPE (arg0), arg1));
+	    }
+	  case E_SFmode:
+	    {
+	      /* For floats use the xvmulsp instruction directly.  */
+	      tree call = rs6000_builtin_decls[VSX_BUILTIN_XVMULSP];
+	      return build_call_expr (call, 2, arg0, arg1);
+	    }
+	  case E_DFmode:
+	    {
+	      /* For doubles use the xvmuldp instruction directly.  */
+	      tree call = rs6000_builtin_decls[VSX_BUILTIN_XVMULDP];
+	      return build_call_expr (call, 2, arg0, arg1);
+	    }
+	  /* Other types are errors.  */
+	  default:
+	    goto bad;
+	}
+    }
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_CMPNE)
+    {
+      /* vec_cmpne needs to be special cased because there are no instructions
+	 for it (prior to power 9).  */
+      if (nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", "vec_cmpne");
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+
+      /* Both arguments must be vectors and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
+	goto bad;
+
+      /* Power9 instructions provide the most efficient implementation of
+	 ALTIVEC_BUILTIN_VEC_CMPNE if the mode is not DImode or TImode
+	 or SFmode or DFmode.  */
+      if (!TARGET_P9_VECTOR
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == DImode)
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == TImode)
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == SFmode)
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == DFmode))
+	{
+	  switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	    {
+	      /* vec_cmpneq (va, vb) == vec_nor (vec_cmpeq (va, vb),
+		 vec_cmpeq (va, vb)).  */
+	      /* Note:  vec_nand also works but opt changes vec_nand's
+		 to vec_nor's anyway.  */
+	    case E_QImode:
+	    case E_HImode:
+	    case E_SImode:
+	    case E_DImode:
+	    case E_TImode:
+	    case E_SFmode:
+	    case E_DFmode:
+	      {
+		/* call = vec_cmpeq (va, vb)
+		   result = vec_nor (call, call).  */
+		vec<tree, va_gc> *params = make_tree_vector ();
+		vec_safe_push (params, arg0);
+		vec_safe_push (params, arg1);
+		tree call = altivec_resolve_overloaded_builtin
+		  (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_CMPEQ],
+		   params);
+		/* Use save_expr to ensure that operands used more than once
+		   that may have side effects (like calls) are only evaluated
+		   once.  */
+		call = save_expr (call);
+		params = make_tree_vector ();
+		vec_safe_push (params, call);
+		vec_safe_push (params, call);
+		return altivec_resolve_overloaded_builtin
+		  (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_NOR], params);
+	      }
+	      /* Other types are errors.  */
+	    default:
+	      goto bad;
+	    }
+	}
+      /* else, fall through and process the Power9 alternative below */
+    }
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_ADDE
+      || fcode == ALTIVEC_BUILTIN_VEC_SUBE)
+    {
+      /* vec_adde needs to be special cased because there is no instruction
+	  for the {un}signed int version.  */
+      if (nargs != 3)
+	{
+	  const char *name = fcode == ALTIVEC_BUILTIN_VEC_ADDE ?
+	    "vec_adde": "vec_sube";
+	  error ("builtin %qs only accepts 3 arguments", name);
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+      tree arg2 = (*arglist)[2];
+      tree arg2_type = TREE_TYPE (arg2);
+
+      /* All 3 arguments must be vectors of (signed or unsigned) (int or
+	 __int128) and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
+	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
+	goto bad;
+
+      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	{
+	  /* For {un}signed ints,
+	     vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb),
+						   vec_and (carryv, 1)).
+	     vec_sube (va, vb, carryv) == vec_sub (vec_sub (va, vb),
+						   vec_and (carryv, 1)).  */
+	  case E_SImode:
+	    {
+	      tree add_sub_builtin;
+
+	      vec<tree, va_gc> *params = make_tree_vector ();
+	      vec_safe_push (params, arg0);
+	      vec_safe_push (params, arg1);
+
+	      if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
+		add_sub_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD];
+	      else
+		add_sub_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_SUB];
+
+	      tree call = altivec_resolve_overloaded_builtin (loc,
+							      add_sub_builtin,
+							      params);
+	      tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1);
+	      tree ones_vector = build_vector_from_val (arg0_type, const1);
+	      tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type,
+					       arg2, ones_vector);
+	      params = make_tree_vector ();
+	      vec_safe_push (params, call);
+	      vec_safe_push (params, and_expr);
+	      return altivec_resolve_overloaded_builtin (loc, add_sub_builtin,
+							 params);
+	    }
+	  /* For {un}signed __int128s use the vaddeuqm instruction
+		directly.  */
+	  case E_TImode:
+	    {
+	       tree bii;
+
+	       if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDEUQM];
+
+	       else
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VSUBEUQM];
+
+	       return altivec_resolve_overloaded_builtin (loc, bii, arglist);
+	    }
+
+	  /* Types other than {un}signed int and {un}signed __int128
+		are errors.  */
+	  default:
+	    goto bad;
+	}
+    }
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC
+      || fcode == ALTIVEC_BUILTIN_VEC_SUBEC)
+    {
+      /* vec_addec and vec_subec needs to be special cased because there is
+	 no instruction for the {un}signed int version.  */
+      if (nargs != 3)
+	{
+	  const char *name = fcode == ALTIVEC_BUILTIN_VEC_ADDEC ?
+	    "vec_addec": "vec_subec";
+	  error ("builtin %qs only accepts 3 arguments", name);
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+      tree arg2 = (*arglist)[2];
+      tree arg2_type = TREE_TYPE (arg2);
+
+      /* All 3 arguments must be vectors of (signed or unsigned) (int or
+	 __int128) and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
+	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
+	goto bad;
+
+      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	{
+	  /* For {un}signed ints,
+	      vec_addec (va, vb, carryv) ==
+				vec_or (vec_addc (va, vb),
+					vec_addc (vec_add (va, vb),
+						  vec_and (carryv, 0x1))).  */
+	  case E_SImode:
+	    {
+	    /* Use save_expr to ensure that operands used more than once
+		that may have side effects (like calls) are only evaluated
+		once.  */
+	    tree as_builtin;
+	    tree as_c_builtin;
+
+	    arg0 = save_expr (arg0);
+	    arg1 = save_expr (arg1);
+	    vec<tree, va_gc> *params = make_tree_vector ();
+	    vec_safe_push (params, arg0);
+	    vec_safe_push (params, arg1);
+
+	    if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+	      as_c_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADDC];
+	    else
+	      as_c_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_SUBC];
+
+	    tree call1 = altivec_resolve_overloaded_builtin (loc, as_c_builtin,
+							     params);
+	    params = make_tree_vector ();
+	    vec_safe_push (params, arg0);
+	    vec_safe_push (params, arg1);
+
+
+	    if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+	      as_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD];
+	    else
+	      as_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_SUB];
+
+	    tree call2 = altivec_resolve_overloaded_builtin (loc, as_builtin,
+							     params);
+	    tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1);
+	    tree ones_vector = build_vector_from_val (arg0_type, const1);
+	    tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type,
+					     arg2, ones_vector);
+	    params = make_tree_vector ();
+	    vec_safe_push (params, call2);
+	    vec_safe_push (params, and_expr);
+	    call2 = altivec_resolve_overloaded_builtin (loc, as_c_builtin,
+							params);
+	    params = make_tree_vector ();
+	    vec_safe_push (params, call1);
+	    vec_safe_push (params, call2);
+	    tree or_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_OR];
+	    return altivec_resolve_overloaded_builtin (loc, or_builtin,
+						       params);
+	    }
+	  /* For {un}signed __int128s use the vaddecuq/vsubbecuq
+	     instructions.  */
+	  case E_TImode:
+	    {
+	       tree bii;
+
+	       if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDECUQ];
+
+	       else
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VSUBECUQ];
+
+	       return altivec_resolve_overloaded_builtin (loc, bii, arglist);
+	    }
+	  /* Types other than {un}signed int and {un}signed __int128
+		are errors.  */
+	  default:
+	    goto bad;
+	}
+    }
+
+  /* For now treat vec_splats and vec_promote as the same.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS
+      || fcode == ALTIVEC_BUILTIN_VEC_PROMOTE)
+    {
+      tree type, arg;
+      int size;
+      int i;
+      bool unsigned_p;
+      vec<constructor_elt, va_gc> *vec;
+      const char *name = fcode == ALTIVEC_BUILTIN_VEC_SPLATS ? "vec_splats": "vec_promote";
+
+      if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS && nargs != 1)
+	{
+	  error ("builtin %qs only accepts 1 argument", name);
+	  return error_mark_node;
+	}
+      if (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE && nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", name);
+	  return error_mark_node;
+	}
+      /* Ignore promote's element argument.  */
+      if (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE
+	  && !INTEGRAL_TYPE_P (TREE_TYPE ((*arglist)[1])))
+	goto bad;
+
+      arg = (*arglist)[0];
+      type = TREE_TYPE (arg);
+      if (!SCALAR_FLOAT_TYPE_P (type)
+	  && !INTEGRAL_TYPE_P (type))
+	goto bad;
+      unsigned_p = TYPE_UNSIGNED (type);
+      switch (TYPE_MODE (type))
+	{
+	  case E_TImode:
+	    type = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
+	    size = 1;
+	    break;
+	  case E_DImode:
+	    type = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
+	    size = 2;
+	    break;
+	  case E_SImode:
+	    type = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
+	    size = 4;
+	    break;
+	  case E_HImode:
+	    type = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
+	    size = 8;
+	    break;
+	  case E_QImode:
+	    type = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
+	    size = 16;
+	    break;
+	  case E_SFmode: type = V4SF_type_node; size = 4; break;
+	  case E_DFmode: type = V2DF_type_node; size = 2; break;
+	  default:
+	    goto bad;
+	}
+      arg = save_expr (fold_convert (TREE_TYPE (type), arg));
+      vec_alloc (vec, size);
+      for(i = 0; i < size; i++)
+	{
+	  constructor_elt elt = {NULL_TREE, arg};
+	  vec->quick_push (elt);
+	}
+	return build_constructor (type, vec);
+    }
+
+  /* For now use pointer tricks to do the extraction, unless we are on VSX
+     extracting a double from a constant offset.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_EXTRACT)
+    {
+      tree arg1;
+      tree arg1_type;
+      tree arg2;
+      tree arg1_inner_type;
+      tree decl, stmt;
+      tree innerptrtype;
+      machine_mode mode;
+
+      /* No second argument. */
+      if (nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", "vec_extract");
+	  return error_mark_node;
+	}
+
+      arg2 = (*arglist)[1];
+      arg1 = (*arglist)[0];
+      arg1_type = TREE_TYPE (arg1);
+
+      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
+	goto bad;
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
+	goto bad;
+
+      /* See if we can optimize vec_extracts with the current VSX instruction
+	 set.  */
+      mode = TYPE_MODE (arg1_type);
+      if (VECTOR_MEM_VSX_P (mode))
+
+	{
+	  tree call = NULL_TREE;
+	  int nunits = GET_MODE_NUNITS (mode);
+
+	  arg2 = fold_for_warn (arg2);
+
+	  /* If the second argument is an integer constant, generate
+	     the built-in code if we can.  We need 64-bit and direct
+	     move to extract the small integer vectors.  */
+	  if (TREE_CODE (arg2) == INTEGER_CST)
+	    {
+	      wide_int selector = wi::to_wide (arg2);
+	      selector = wi::umod_trunc (selector, nunits);
+	      arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case E_V1TImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V1TI];
+		  break;
+
+		case E_V2DFmode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
+		  break;
+
+		case E_V2DImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
+		  break;
+
+		case E_V4SFmode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SF];
+		  break;
+
+		case E_V4SImode:
+		  if (TARGET_DIRECT_MOVE_64BIT)
+		    call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SI];
+		  break;
+
+		case E_V8HImode:
+		  if (TARGET_DIRECT_MOVE_64BIT)
+		    call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V8HI];
+		  break;
+
+		case E_V16QImode:
+		  if (TARGET_DIRECT_MOVE_64BIT)
+		    call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V16QI];
+		  break;
+		}
+	    }
+
+	  /* If the second argument is variable, we can optimize it if we are
+	     generating 64-bit code on a machine with direct move.  */
+	  else if (TREE_CODE (arg2) != INTEGER_CST && TARGET_DIRECT_MOVE_64BIT)
+	    {
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case E_V2DFmode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
+		  break;
+
+		case E_V2DImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
+		  break;
+
+		case E_V4SFmode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SF];
+		  break;
+
+		case E_V4SImode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SI];
+		  break;
+
+		case E_V8HImode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V8HI];
+		  break;
+
+		case E_V16QImode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V16QI];
+		  break;
+		}
+	    }
+
+	  if (call)
+	    {
+	      tree result = build_call_expr (call, 2, arg1, arg2);
+	      /* Coerce the result to vector element type.  May be no-op.  */
+	      arg1_inner_type = TREE_TYPE (arg1_type);
+	      result = fold_convert (arg1_inner_type, result);
+	      return result;
+	    }
+	}
+
+      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */
+      arg1_inner_type = TREE_TYPE (arg1_type);
+      arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
+			      build_int_cst (TREE_TYPE (arg2),
+					     TYPE_VECTOR_SUBPARTS (arg1_type)
+					     - 1), 0);
+      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
+      DECL_EXTERNAL (decl) = 0;
+      TREE_PUBLIC (decl) = 0;
+      DECL_CONTEXT (decl) = current_function_decl;
+      TREE_USED (decl) = 1;
+      TREE_TYPE (decl) = arg1_type;
+      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  TREE_ADDRESSABLE (decl) = 1;
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
+
+      innerptrtype = build_pointer_type (arg1_inner_type);
+
+      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
+      stmt = convert (innerptrtype, stmt);
+      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
+      stmt = build_indirect_ref (loc, stmt, RO_NULL);
+
+      /* PR83660: We mark this as having side effects so that
+	 downstream in fold_build_cleanup_point_expr () it will get a
+	 CLEANUP_POINT_EXPR.  If it does not we can run into an ICE
+	 later in gimplify_cleanup_point_expr ().  Potentially this
+	 causes missed optimization because the actually is no side
+	 effect.  */
+      if (c_dialect_cxx ())
+	TREE_SIDE_EFFECTS (stmt) = 1;
+
+      return stmt;
+    }
+
+  /* For now use pointer tricks to do the insertion, unless we are on VSX
+     inserting a double to a constant offset..  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_INSERT)
+    {
+      tree arg0;
+      tree arg1;
+      tree arg2;
+      tree arg1_type;
+      tree arg1_inner_type;
+      tree decl, stmt;
+      tree innerptrtype;
+      machine_mode mode;
+
+      /* No second or third arguments. */
+      if (nargs != 3)
+	{
+	  error ("builtin %qs only accepts 3 arguments", "vec_insert");
+	  return error_mark_node;
+	}
+
+      arg0 = (*arglist)[0];
+      arg1 = (*arglist)[1];
+      arg1_type = TREE_TYPE (arg1);
+      arg2 = fold_for_warn ((*arglist)[2]);
+
+      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
+	goto bad;
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
+	goto bad;
+
+      /* If we can use the VSX xxpermdi instruction, use that for insert.  */
+      mode = TYPE_MODE (arg1_type);
+      if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode)
+	  && TREE_CODE (arg2) == INTEGER_CST)
+	{
+	  wide_int selector = wi::to_wide (arg2);
+	  selector = wi::umod_trunc (selector, 2);
+	  tree call = NULL_TREE;
+
+	  arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
+	  if (mode == V2DFmode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DF];
+	  else if (mode == V2DImode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DI];
+
+	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+	     reversed.  */
+	  if (call)
+	    return build_call_expr (call, 3, arg1, arg0, arg2);
+	}
+      else if (mode == V1TImode && VECTOR_UNIT_VSX_P (mode)
+	       && TREE_CODE (arg2) == INTEGER_CST)
+	{
+	  tree call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V1TI];
+	  wide_int selector = wi::zero(32);
+
+	  arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
+	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+	     reversed.  */
+	  return build_call_expr (call, 3, arg1, arg0, arg2);
+	}
+
+      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */
+      arg1_inner_type = TREE_TYPE (arg1_type);
+      if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1)
+	arg2 = build_int_cst (TREE_TYPE (arg2), 0);
+      else
+	arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
+				build_int_cst (TREE_TYPE (arg2),
+					       TYPE_VECTOR_SUBPARTS (arg1_type)
+					       - 1), 0);
+      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
+      DECL_EXTERNAL (decl) = 0;
+      TREE_PUBLIC (decl) = 0;
+      DECL_CONTEXT (decl) = current_function_decl;
+      TREE_USED (decl) = 1;
+      TREE_TYPE (decl) = arg1_type;
+      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  TREE_ADDRESSABLE (decl) = 1;
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
+
+      innerptrtype = build_pointer_type (arg1_inner_type);
+
+      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
+      stmt = convert (innerptrtype, stmt);
+      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
+      stmt = build_indirect_ref (loc, stmt, RO_NULL);
+      stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt,
+		     convert (TREE_TYPE (stmt), arg0));
+      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
+      return stmt;
+    }
+
+  for (n = 0;
+       !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
+       fnargs = TREE_CHAIN (fnargs), n++)
+    {
+      tree decl_type = TREE_VALUE (fnargs);
+      tree arg = (*arglist)[n];
+      tree type;
+
+      if (arg == error_mark_node)
+	return error_mark_node;
+
+      if (n >= 4)
+	abort ();
+
+      arg = default_conversion (arg);
+
+      /* The C++ front-end converts float * to const void * using
+	 NOP_EXPR<const void *> (NOP_EXPR<void *> (x)).  */
+      type = TREE_TYPE (arg);
+      if (POINTER_TYPE_P (type)
+	  && TREE_CODE (arg) == NOP_EXPR
+	  && lang_hooks.types_compatible_p (TREE_TYPE (arg),
+					    const_ptr_type_node)
+	  && lang_hooks.types_compatible_p (TREE_TYPE (TREE_OPERAND (arg, 0)),
+					    ptr_type_node))
+	{
+	  arg = TREE_OPERAND (arg, 0);
+	  type = TREE_TYPE (arg);
+	}
+
+      /* Remove the const from the pointers to simplify the overload
+	 matching further down.  */
+      if (POINTER_TYPE_P (decl_type)
+	  && POINTER_TYPE_P (type)
+	  && TYPE_QUALS (TREE_TYPE (type)) != 0)
+	{
+	  if (TYPE_READONLY (TREE_TYPE (type))
+	      && !TYPE_READONLY (TREE_TYPE (decl_type)))
+	    warning (0, "passing argument %d of %qE discards qualifiers from "
+		     "pointer target type", n + 1, fndecl);
+	  type = build_pointer_type (build_qualified_type (TREE_TYPE (type),
+							   0));
+	  arg = fold_convert (type, arg);
+	}
+
+      /* For P9V_BUILTIN_VEC_LXVL, convert any const * to its non constant
+	 equivalent to simplify the overload matching below.  */
+      if (fcode == P9V_BUILTIN_VEC_LXVL)
+	{
+	  if (POINTER_TYPE_P (type)
+	      && TYPE_READONLY (TREE_TYPE (type)))
+	    {
+	      type = build_pointer_type (build_qualified_type (
+						TREE_TYPE (type),0));
+	      arg = fold_convert (type, arg);
+	    }
+	}
+
+      args[n] = arg;
+      types[n] = type;
+    }
+
+  /* If the number of arguments did not match the prototype, return NULL
+     and the generic code will issue the appropriate error message.  */
+  if (!VOID_TYPE_P (TREE_VALUE (fnargs)) || n < nargs)
+    return NULL;
+
+  if (n == 0)
+    abort ();
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_STEP)
+    {
+      if (TREE_CODE (types[0]) != VECTOR_TYPE)
+	goto bad;
+
+      return build_int_cst (NULL_TREE, TYPE_VECTOR_SUBPARTS (types[0]));
+    }
+
+  {
+    bool unsupported_builtin = false;
+    enum rs6000_builtins overloaded_code;
+    tree result = NULL;
+    for (desc = altivec_overloaded_builtins;
+	 desc->code && desc->code != fcode; desc++)
+      continue;
+
+    /* Need to special case __builtin_cmp because the overloaded forms
+       of this function take (unsigned int, unsigned int) or (unsigned
+       long long int, unsigned long long int).  Since C conventions
+       allow the respective argument types to be implicitly coerced into
+       each other, the default handling does not provide adequate
+       discrimination between the desired forms of the function.  */
+    if (fcode == P6_OV_BUILTIN_CMPB)
+      {
+	machine_mode arg1_mode = TYPE_MODE (types[0]);
+	machine_mode arg2_mode = TYPE_MODE (types[1]);
+
+	if (nargs != 2)
+	  {
+	    error ("builtin %qs only accepts 2 arguments", "__builtin_cmpb");
+	    return error_mark_node;
+	  }
+
+	/* If any supplied arguments are wider than 32 bits, resolve to
+	   64-bit variant of built-in function.  */
+	if ((GET_MODE_PRECISION (arg1_mode) > 32)
+	    || (GET_MODE_PRECISION (arg2_mode) > 32))
+	  {
+	    /* Assure all argument and result types are compatible with
+	       the built-in function represented by P6_BUILTIN_CMPB.  */
+	    overloaded_code = P6_BUILTIN_CMPB;
+	  }
+	else
+	  {
+	    /* Assure all argument and result types are compatible with
+	       the built-in function represented by P6_BUILTIN_CMPB_32.  */
+	    overloaded_code = P6_BUILTIN_CMPB_32;
+	  }
+
+	while (desc->code && desc->code == fcode
+	       && desc->overloaded_code != overloaded_code)
+	  desc++;
+
+	if (desc->code && (desc->code == fcode)
+	    && rs6000_builtin_type_compatible (types[0], desc->op1)
+	    && rs6000_builtin_type_compatible (types[1], desc->op2))
+	  {
+	    if (rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
+	      {
+		result = altivec_build_resolved_builtin (args, n, desc);
+		/* overloaded_code is set above */
+		if (!rs6000_builtin_is_supported_p (overloaded_code))
+		  unsupported_builtin = true;
+		else
+		  return result;
+	      }
+	    else
+	      unsupported_builtin = true;
+	  }
+      }
+    else if (fcode == P9V_BUILTIN_VEC_VSIEDP)
+      {
+	machine_mode arg1_mode = TYPE_MODE (types[0]);
+
+	if (nargs != 2)
+	  {
+	    error ("builtin %qs only accepts 2 arguments",
+		   "scalar_insert_exp");
+	    return error_mark_node;
+	  }
+
+	/* If supplied first argument is wider than 64 bits, resolve to
+	   128-bit variant of built-in function.  */
+	if (GET_MODE_PRECISION (arg1_mode) > 64)
+	  {
+	    /* If first argument is of float variety, choose variant
+	       that expects __ieee128 argument.  Otherwise, expect
+	       __int128 argument.  */
+	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
+	      overloaded_code = P9V_BUILTIN_VSIEQPF;
+	    else
+	      overloaded_code = P9V_BUILTIN_VSIEQP;
+	  }
+	else
+	  {
+	    /* If first argument is of float variety, choose variant
+	       that expects double argument.  Otherwise, expect
+	       long long int argument.  */
+	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
+	      overloaded_code = P9V_BUILTIN_VSIEDPF;
+	    else
+	      overloaded_code = P9V_BUILTIN_VSIEDP;
+	  }
+	while (desc->code && desc->code == fcode
+	       && desc->overloaded_code != overloaded_code)
+	  desc++;
+
+	if (desc->code && (desc->code == fcode)
+	    && rs6000_builtin_type_compatible (types[0], desc->op1)
+	    && rs6000_builtin_type_compatible (types[1], desc->op2))
+	  {
+	    if (rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
+	      {
+		result = altivec_build_resolved_builtin (args, n, desc);
+		/* overloaded_code is set above.  */
+		if (!rs6000_builtin_is_supported_p (overloaded_code))
+		  unsupported_builtin = true;
+		else
+		  return result;
+	      }
+	    else
+	      unsupported_builtin = true;
+	  }
+      }
+    else if ((fcode == P10_BUILTIN_VEC_XXEVAL)
+	    || (fcode == P10V_BUILTIN_VXXPERMX))
+      {
+	signed char op3_type;
+
+	/* Need to special case P10_BUILTIN_VEC_XXEVAL and
+	   P10V_BUILTIN_VXXPERMX because they take 4 arguments and the
+	   existing infrastructure only handles three.  */
+	if (nargs != 4)
+	  {
+	    const char *name = fcode == P10_BUILTIN_VEC_XXEVAL ?
+	      "__builtin_vec_xxeval":"__builtin_vec_xxpermx";
+
+	    error ("builtin %qs requires 4 arguments", name);
+	    return error_mark_node;
+	  }
+
+	for ( ; desc->code == fcode; desc++)
+	  {
+	    if (fcode == P10_BUILTIN_VEC_XXEVAL)
+	      op3_type = desc->op3;
+	    else  /* P10V_BUILTIN_VXXPERMX */
+	      op3_type = RS6000_BTI_V16QI;
+
+	    if (rs6000_builtin_type_compatible (types[0], desc->op1)
+		&& rs6000_builtin_type_compatible (types[1], desc->op2)
+		&& rs6000_builtin_type_compatible (types[2], desc->op3)
+		&& rs6000_builtin_type_compatible (types[2], op3_type)
+		&& rs6000_builtin_type_compatible (types[3],
+						   RS6000_BTI_UINTSI))
+	      {
+		if (rs6000_builtin_decls[desc->overloaded_code] == NULL_TREE)
+		  unsupported_builtin = true;
+		else
+		  {
+		    result = altivec_build_resolved_builtin (args, n, desc);
+		    if (rs6000_builtin_is_supported_p (desc->overloaded_code))
+		      return result;
+		    /* Allow loop to continue in case a different
+		       definition is supported.  */
+		    overloaded_code = desc->overloaded_code;
+		    unsupported_builtin = true;
+		  }
+	      }
+	  }
+      }
+    else
+      {
+	/* For arguments after the last, we have RS6000_BTI_NOT_OPAQUE in
+	   the opX fields.  */
+	for (; desc->code == fcode; desc++)
+	  {
+	    if ((desc->op1 == RS6000_BTI_NOT_OPAQUE
+		 || rs6000_builtin_type_compatible (types[0], desc->op1))
+		&& (desc->op2 == RS6000_BTI_NOT_OPAQUE
+		    || rs6000_builtin_type_compatible (types[1], desc->op2))
+		&& (desc->op3 == RS6000_BTI_NOT_OPAQUE
+		    || rs6000_builtin_type_compatible (types[2], desc->op3)))
+	      {
+		if (rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
+		  {
+		    result = altivec_build_resolved_builtin (args, n, desc);
+		    if (!rs6000_builtin_is_supported_p (desc->overloaded_code))
+		      {
+			/* Allow loop to continue in case a different
+			   definition is supported.  */
+			overloaded_code = desc->overloaded_code;
+			unsupported_builtin = true;
+		      }
+		    else
+		      return result;
+		  }
+		else
+		  unsupported_builtin = true;
+	      }
+	  }
+      }
+
+    if (unsupported_builtin)
+      {
+	const char *name = rs6000_overloaded_builtin_name (fcode);
+	if (result != NULL)
+	  {
+	    const char *internal_name
+	      = rs6000_overloaded_builtin_name (overloaded_code);
+	    /* An error message making reference to the name of the
+	       non-overloaded function has already been issued.  Add
+	       clarification of the previous message.  */
+	    rich_location richloc (line_table, input_location);
+	    inform (&richloc, "builtin %qs requires builtin %qs",
+		    name, internal_name);
+	  }
+	else
+	  error ("%qs is not supported in this compiler configuration", name);
+	/* If an error-representing  result tree was returned from
+	   altivec_build_resolved_builtin above, use it.  */
+	return (result != NULL) ? result : error_mark_node;
+      }
+  }
+ bad:
+  {
+    const char *name = rs6000_overloaded_builtin_name (fcode);
+    error ("invalid parameter combination for AltiVec intrinsic %qs", name);
+    return error_mark_node;
+  }
+}
+
+/* Build a tree for a function call to an Altivec non-overloaded builtin.
+   The overloaded builtin that matched the types and args is described
+   by DESC.  The N arguments are given in ARGS, respectively.
+
+   Actually the only thing it does is calling fold_convert on ARGS, with
+   a small exception for vec_{all,any}_{ge,le} predicates. */
+
+static tree
+altivec_build_new_resolved_builtin (tree *args, int n,
+				    const struct altivec_builtin_types *desc)
+{
+  tree impl_fndecl = rs6000_builtin_decls[desc->overloaded_code];
+  tree ret_type = rs6000_builtin_type (desc->ret_type);
+  tree argtypes = TYPE_ARG_TYPES (TREE_TYPE (impl_fndecl));
+  tree arg_type[4];
+  tree call;
+
+  int i;
+  for (i = 0; i < n; i++)
+    arg_type[i] = TREE_VALUE (argtypes), argtypes = TREE_CHAIN (argtypes);
+
+  /* The AltiVec overloading implementation is overall gross, but this
+     is particularly disgusting.  The vec_{all,any}_{ge,le} builtins
+     are completely different for floating-point vs. integer vector
+     types, because the former has vcmpgefp, but the latter should use
+     vcmpgtXX.
+
+     In practice, the second and third arguments are swapped, and the
+     condition (LT vs. EQ, which is recognizable by bit 1 of the first
+     argument) is reversed.  Patch the arguments here before building
+     the resolved CALL_EXPR.  */
+  if (n == 3
+      && desc->code == ALTIVEC_BUILTIN_VEC_VCMPGE_P
+      && desc->overloaded_code != ALTIVEC_BUILTIN_VCMPGEFP_P
+      && desc->overloaded_code != VSX_BUILTIN_XVCMPGEDP_P)
+    {
+      std::swap (args[1], args[2]);
+      std::swap (arg_type[1], arg_type[2]);
+
+      args[0] = fold_build2 (BIT_XOR_EXPR, TREE_TYPE (args[0]), args[0],
+			     build_int_cst (NULL_TREE, 2));
+    }
+
+  switch (n)
+    {
+    case 0:
+      call = build_call_expr (impl_fndecl, 0);
+      break;
+    case 1:
+      call = build_call_expr (impl_fndecl, 1,
+			      fully_fold_convert (arg_type[0], args[0]));
+      break;
+    case 2:
+      call = build_call_expr (impl_fndecl, 2,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]));
+      break;
+    case 3:
+      call = build_call_expr (impl_fndecl, 3,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]),
+			      fully_fold_convert (arg_type[2], args[2]));
+      break;
+    case 4:
+      call = build_call_expr (impl_fndecl, 4,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]),
+			      fully_fold_convert (arg_type[2], args[2]),
+			      fully_fold_convert (arg_type[3], args[3]));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return fold_convert (ret_type, call);
+}
+
+/* Implementation of the resolve_overloaded_builtin target hook, to
+   support Altivec's overloaded builtins.  */
+
+static tree
+altivec_resolve_new_overloaded_builtin (location_t loc, tree fndecl,
+					void *passed_arglist)
 {
   vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
   unsigned int nargs = vec_safe_length (arglist);
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 58d52ff4ddc..4c0fd5406f6 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -14216,7 +14216,7 @@ rs6000_gimple_fold_new_mma_builtin (gimple_stmt_iterator *gsi,
 
       /* We're disassembling an accumulator into a different type, so we need
 	 to emit a xxmfacc instruction now, since we cannot do it later.  */
-      new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL];
+      new_decl = rs6000_builtin_decls_x[RS6000_BIF_XXMFACC_INTERNAL];
       new_call = gimple_build_call (new_decl, 1, src);
       src = make_ssa_name (vector_quad_type_node);
       gimple_call_set_lhs (new_call, src);
@@ -14245,7 +14245,7 @@ rs6000_gimple_fold_new_mma_builtin (gimple_stmt_iterator *gsi,
 
   /* Convert this built-in into an internal version that uses pass-by-value
      arguments.  The internal built-in follows immediately after this one.  */
-  new_decl = rs6000_builtin_decls[fncode + 1];
+  new_decl = rs6000_builtin_decls_x[fncode + 1];
   tree lhs, op[MAX_MMA_OPERANDS];
   tree acc = gimple_call_arg (stmt, 0);
   push_gimplify_context (true);
@@ -16023,6 +16023,7 @@ rs6000_init_builtins (void)
 }
 
 /* Returns the rs6000 builtin decl for CODE.  */
+/* #### TODO: Rewrite this.  */
 
 tree
 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c
index 9e5adbae7c7..c7f405e33fd 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -2114,6 +2114,9 @@ write_decls ()
     fprintf (header_file, "  RS6000_OVLD_%s,\n", ovlds[i].ovld_id_name);
   fprintf (header_file, "  RS6000_OVLD_MAX\n};\n\n");
 
+  fprintf (header_file,
+	   "extern tree rs6000_builtin_decls_x[RS6000_OVLD_MAX];\n\n");
+
   fprintf (header_file, "enum restriction {\n");
   fprintf (header_file, "  RES_NONE,\n");
   fprintf (header_file, "  RES_BITS,\n");
@@ -2356,13 +2359,9 @@ static int
 write_header_file ()
 {
   write_autogenerated_header (header_file);
-  fprintf (header_file, "#include \"config.h\"\n");
-  fprintf (header_file, "#include \"system.h\"\n");
-  fprintf (header_file, "#include \"coretypes.h\"\n");
-  fprintf (header_file, "#include \"backend.h\"\n");
-  fprintf (header_file, "#include \"rtl.h\"\n");
-  fprintf (header_file, "#include \"tree.h\"\n");
-  fprintf (header_file, "\n");
+
+  fprintf (header_file, "#ifndef _RS6000_BUILTINS_H\n");
+  fprintf (header_file, "#define _RS6000_BUILTINS_H 1\n\n");
   fprintf (header_file, "extern int new_builtins_are_live;\n\n");
 
   write_decls ();
@@ -2370,6 +2369,7 @@ write_header_file ()
   /* Write function type list declarators to the header file.  */
   rbt_inorder_callback (&fntype_rbt, fntype_rbt.rbt_root, write_extern_fntype);
   fprintf (header_file, "\n");
+  fprintf (header_file, "\n#endif\n");
 
   return 1;
 }
@@ -2527,7 +2527,7 @@ write_init_bif_table ()
 	       "  if (new_builtins_are_live)\n");
       fprintf (init_file, "    {\n");
       fprintf (init_file,
-	       "      rs6000_builtin_decls[(int)RS6000_BIF_%s] = t\n",
+	       "      rs6000_builtin_decls_x[(int)RS6000_BIF_%s] = t\n",
 	       bifs[i].idname);
       fprintf (init_file,
 	       "        = add_builtin_function (\"%s\",\n",
@@ -2601,7 +2601,7 @@ write_init_ovld_table ()
 	       "  if (new_builtins_are_live)\n");
       fprintf (init_file, "    {\n");
       fprintf (init_file,
-	       "      rs6000_builtin_decls[(int)RS6000_OVLD_%s] = t\n",
+	       "      rs6000_builtin_decls_x[(int)RS6000_OVLD_%s] = t\n",
 	       ovlds[i].ovld_id_name);
       fprintf (init_file,
 	       "        = add_builtin_function (\"%s\",\n",
@@ -2643,6 +2643,8 @@ write_init_file ()
   fprintf (init_file, "int new_builtins_are_live = 0;\n\n");
 #endif
 
+  fprintf (init_file, "tree rs6000_builtin_decls_x[RS6000_OVLD_MAX];\n\n");
+
   write_bif_static_init ();
   write_ovld_static_init ();
 
@@ -2692,6 +2694,13 @@ write_init_file ()
   rbt_inorder_callback (&fntype_rbt, fntype_rbt.rbt_root, write_fntype_init);
   fprintf (init_file, "\n");
 
+  fprintf (init_file,
+	   "  rs6000_builtin_decls_x[RS6000_BIF_NONE] = NULL_TREE;\n");
+  fprintf (init_file,
+	   "  rs6000_builtin_decls_x[RS6000_BIF_MAX] = NULL_TREE;\n");
+  fprintf (init_file,
+	   "  rs6000_builtin_decls_x[RS6000_OVLD_NONE] = NULL_TREE;\n\n");
+
   write_init_bif_table ();
   write_init_ovld_table ();
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index cf172138c2d..8b7b7820f42 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -77,6 +77,7 @@
 #include "case-cfn-macros.h"
 #include "ppc-auxv.h"
 #include "rs6000-internal.h"
+#include "rs6000-builtins.h"
 #include "opts.h"
 
 /* This file should be included last.  */
@@ -5411,6 +5412,198 @@ rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
   return nunroll;
 }
 
+/* Returns a function decl for a vectorized version of the builtin function
+   with builtin function code FN and the result vector type TYPE, or NULL_TREE
+   if it is not available.  */
+
+static tree
+rs6000_new_builtin_vectorized_function (unsigned int fn, tree type_out,
+					tree type_in)
+{
+  machine_mode in_mode, out_mode;
+  int in_n, out_n;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr, "rs6000_new_builtin_vectorized_function (%s, %s, %s)\n",
+	     combined_fn_name (combined_fn (fn)),
+	     GET_MODE_NAME (TYPE_MODE (type_out)),
+	     GET_MODE_NAME (TYPE_MODE (type_in)));
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE)
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+  switch (fn)
+    {
+    CASE_CFN_COPYSIGN:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_CPSGNDP];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_CPSGNSP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_COPYSIGN_V4SF];
+      break;
+    CASE_CFN_CEIL:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIP];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRFIP];
+      break;
+    CASE_CFN_FLOOR:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIM];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIM];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRFIM];
+      break;
+    CASE_CFN_FMA:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVMADDDP];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVMADDSP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VMADDFP];
+      break;
+    CASE_CFN_TRUNC:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIZ];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIZ];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRFIZ];
+      break;
+    CASE_CFN_NEARBYINT:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && flag_unsafe_math_optimizations
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPI];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && flag_unsafe_math_optimizations
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPI];
+      break;
+    CASE_CFN_RINT:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && !flag_trapping_math
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIC];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && !flag_trapping_math
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIC];
+      break;
+    default:
+      break;
+    }
+
+  /* Generate calls to libmass if appropriate.  */
+  if (rs6000_veclib_handler)
+    return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
+
+  return NULL_TREE;
+}
+
+/* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION.  */
+
+static tree
+rs6000_new_builtin_md_vectorized_function (tree fndecl, tree type_out,
+					   tree type_in)
+{
+  machine_mode in_mode, out_mode;
+  int in_n, out_n;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr,
+	     "rs6000_new_builtin_md_vectorized_function (%s, %s, %s)\n",
+	     IDENTIFIER_POINTER (DECL_NAME (fndecl)),
+	     GET_MODE_NAME (TYPE_MODE (type_out)),
+	     GET_MODE_NAME (TYPE_MODE (type_in)));
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE)
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+  enum rs6000_builtins fn
+    = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
+  switch (fn)
+    {
+    case RS6000_BUILTIN_RSQRTF:
+      if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRSQRTFP];
+      break;
+    case RS6000_BUILTIN_RSQRT:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_RSQRT_2DF];
+      break;
+    case RS6000_BUILTIN_RECIPF:
+      if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRECIPFP];
+      break;
+    case RS6000_BUILTIN_RECIP:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_RECIP_V2DF];
+      break;
+    default:
+      break;
+    }
+  return NULL_TREE;
+}
+
 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
    library with vectorized intrinsics.  */
 
@@ -5530,6 +5723,9 @@ rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
   machine_mode in_mode, out_mode;
   int in_n, out_n;
 
+  if (new_builtins_are_live)
+    return rs6000_new_builtin_vectorized_function (fn, type_out, type_in);
+
   if (TARGET_DEBUG_BUILTIN)
     fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
 	     combined_fn_name (combined_fn (fn)),
@@ -5661,6 +5857,10 @@ rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
   machine_mode in_mode, out_mode;
   int in_n, out_n;
 
+  if (new_builtins_are_live)
+    return rs6000_new_builtin_md_vectorized_function (fndecl, type_out,
+						      type_in);
+
   if (TARGET_DEBUG_BUILTIN)
     fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
 	     IDENTIFIER_POINTER (DECL_NAME (fndecl)),
@@ -22143,12 +22343,16 @@ rs6000_builtin_reciprocal (tree fndecl)
       if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
 	return NULL_TREE;
 
+      if (new_builtins_are_live)
+	return rs6000_builtin_decls_x[RS6000_BIF_RSQRT_2DF];
       return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
 
     case VSX_BUILTIN_XVSQRTSP:
       if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
 	return NULL_TREE;
 
+      if (new_builtins_are_live)
+	return rs6000_builtin_decls_x[RS6000_BIF_RSQRT_4SF];
       return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
 
     default:
@@ -24700,7 +24904,10 @@ add_condition_to_bb (tree function_decl, tree version_decl,
 
   tree bool_zero = build_int_cst (bool_int_type_node, 0);
   tree cond_var = create_tmp_var (bool_int_type_node);
-  tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
+  tree predicate_decl
+    = (new_builtins_are_live
+       ? rs6000_builtin_decls_x[(int) RS6000_BIF_CPU_SUPPORTS]
+       : rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS]);
   const char *arg_str = rs6000_clone_map[clone_isa].name;
   tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
   gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
@@ -26816,8 +27023,14 @@ rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
       return;
     }
 
-  tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
-  tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
+  tree mffs
+    = (new_builtins_are_live
+       ? rs6000_builtin_decls_x[RS6000_BIF_MFFS]
+       : rs6000_builtin_decls[RS6000_BUILTIN_MFFS]);
+  tree mtfsf
+    = (new_builtins_are_live
+       ? rs6000_builtin_decls_x[RS6000_BIF_MTFSF]
+       : rs6000_builtin_decls[RS6000_BUILTIN_MTFSF]);
   tree call_mffs = build_call_expr (mffs, 0);
 
   /* Generates the equivalent of feholdexcept (&fenv_var)
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 2ed15485f4b..ebcefc443cc 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -23,10 +23,6 @@ TM_H += $(srcdir)/config/rs6000/rs6000-cpus.def
 TM_H += $(srcdir)/config/rs6000/rs6000-modes.h
 PASSES_EXTRA += $(srcdir)/config/rs6000/rs6000-passes.def
 
-rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c
-	$(COMPILE) $<
-	$(POSTCOMPILE)
-
 rs6000-string.o: $(srcdir)/config/rs6000/rs6000-string.c
 	$(COMPILE) $<
 	$(POSTCOMPILE)
@@ -79,6 +75,20 @@ rs6000-call.o: $(srcdir)/config/rs6000/rs6000-call.c rs6000-builtins.c
 	$(COMPILE) $<
 	$(POSTCOMPILE)
 
+rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c rs6000-builtins.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
+# We need a dependency from rs6000.c on rs6000-builtins.h, but
+# apparently rs6000.c is automatically generated, so not sure
+# what to do about this.  #### TODO.  The following "works" but
+# generates a warning about ignoring the old recipe for rs6000.o.
+# Better to try to figure out what that recipe is...
+#rs6000.o: $(srcdir)/config/rs6000/rs6000.c rs6000-builtins.c
+#	$(COMPILE) $<
+#	$(POSTCOMPILE)
+
+
 $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
   $(srcdir)/config/rs6000/rs6000-cpus.def
 	$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \


^ permalink raw reply	[flat|nested] 4+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins4)] rs6000: Introduce rs6000_builtin_decls_x
@ 2020-11-20 19:36 William Schmidt
  0 siblings, 0 replies; 4+ messages in thread
From: William Schmidt @ 2020-11-20 19:36 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:b78db0ab06883f9771ca889339f84ed11e54cff6

commit b78db0ab06883f9771ca889339f84ed11e54cff6
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Fri Nov 20 13:35:44 2020 -0600

    rs6000: Introduce rs6000_builtin_decls_x
    
    2020-11-20  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-c.c (rs6000-builtins.h): New #include.
            (altivec_build_new_resolved_builtin): New forward decl.
            (altivec_resolve_new_overloaded_builtin): New forward decl.
            (altivec_build_resolved_builtin): Call
            altivec_build_new_resolved_builtin.
            (altivec_resolve_overloaded_builtin): Call
            altivec_resolve_new_overloaded_builtin.
            (altivec_build_new_resolved_builtin): New function.
            (altivec_resolve_new_overloaded_builtin): Likewise.
            * config/rs6000/rs6000-call.c
            (rs6000_gimple_fold_new_mma_builtin): Use rs6000_builtin_decls_x.
            (rs6000_builtin_decl): Flag for later rewrite.
            * config/rs6000/rs6000-gen-builtins.c (write_decls): Generate decl
            for rs6000_builtin_decls_x.
            (write_header_file): Don't generate includes; add logic to avoid
            double-includes.
            (write_init_bif_table): Generate definition for
            rs6000_builtin_decls_x and preinitialize unused slots.
            * config/rs6000/rs6000.c (rs6000-builtins.h): New #include.
            (rs6000_new_builtin_vectorized_function): New function.
            (rs6000_new_builtin_md_vectorized_function): Likewise.
            (rs6000_builtin_vectorized_function): Call
            rs6000_new_builtin_vectorized_function.
            (rs6000_builtin_md_vectorized_function): Call
            rs6000_new_builtin_md_vectorized_function.
            (rs6000_builtin_reciprocal): Use rs6000_builtin_decls_x.
            (add_condition_to_bb): Likewise.
            (rs6000_atomic_assign_expand_fenv): Likewise.
            * config/rs6000/t-rs6000 (rs6000-c.o): Add dependency on
            rs6000-builtins.c.
            (rs6000.o): Add note about problem to be solved later.

Diff:
---
 gcc/config/rs6000/rs6000-c.c            | 1086 +++++++++++++++++++++++++++++++
 gcc/config/rs6000/rs6000-call.c         |    5 +-
 gcc/config/rs6000/rs6000-gen-builtins.c |   27 +-
 gcc/config/rs6000/rs6000.c              |  219 ++++++-
 gcc/config/rs6000/t-rs6000              |   18 +-
 5 files changed, 1337 insertions(+), 18 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index cc1e997524e..c6f0b88ac16 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -35,6 +35,14 @@
 #include "langhooks.h"
 #include "c/c-tree.h"
 
+#include "rs6000-builtins.h"
+
+
+static tree
+altivec_build_new_resolved_builtin (tree *, int,
+				    const struct altivec_builtin_types *);
+static tree
+altivec_resolve_new_overloaded_builtin (location_t, tree, void *);
 
 
 /* Handle the machine specific pragma longcall.  Its syntax is
@@ -850,6 +858,9 @@ static tree
 altivec_build_resolved_builtin (tree *args, int n,
 				const struct altivec_builtin_types *desc)
 {
+  if (new_builtins_are_live)
+    return altivec_build_new_resolved_builtin (args, n, desc);
+
   tree impl_fndecl = rs6000_builtin_decls[desc->overloaded_code];
   tree ret_type = rs6000_builtin_type (desc->ret_type);
   tree argtypes = TYPE_ARG_TYPES (TREE_TYPE (impl_fndecl));
@@ -921,6 +932,1081 @@ altivec_build_resolved_builtin (tree *args, int n,
 tree
 altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
 				    void *passed_arglist)
+{
+  if (new_builtins_are_live)
+    return altivec_resolve_new_overloaded_builtin (loc, fndecl,
+						   passed_arglist);
+
+  vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
+  unsigned int nargs = vec_safe_length (arglist);
+  enum rs6000_builtins fcode
+    = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
+  tree fnargs = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
+  tree types[4], args[4];
+  const struct altivec_builtin_types *desc;
+  unsigned int n;
+
+  if (!rs6000_overloaded_builtin_p (fcode))
+    return NULL_TREE;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr, "altivec_resolve_overloaded_builtin, code = %4d, %s\n",
+	     (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)));
+
+  /* vec_lvsl and vec_lvsr are deprecated for use with LE element order.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_LVSL && !BYTES_BIG_ENDIAN)
+    warning (OPT_Wdeprecated,
+	     "%<vec_lvsl%> is deprecated for little endian; use "
+	     "assignment for unaligned loads and stores");
+  else if (fcode == ALTIVEC_BUILTIN_VEC_LVSR && !BYTES_BIG_ENDIAN)
+    warning (OPT_Wdeprecated,
+	     "%<vec_lvsr%> is deprecated for little endian; use "
+	     "assignment for unaligned loads and stores");
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_MUL)
+    {
+      /* vec_mul needs to be special cased because there are no instructions
+	 for it for the {un}signed char, {un}signed short, and {un}signed int
+	 types.  */
+      if (nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", "vec_mul");
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+
+      /* Both arguments must be vectors and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
+	goto bad;
+
+      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	{
+	  case E_QImode:
+	  case E_HImode:
+	  case E_SImode:
+	  case E_DImode:
+	  case E_TImode:
+	    {
+	      /* For scalar types just use a multiply expression.  */
+	      return fold_build2_loc (loc, MULT_EXPR, TREE_TYPE (arg0), arg0,
+				      fold_convert (TREE_TYPE (arg0), arg1));
+	    }
+	  case E_SFmode:
+	    {
+	      /* For floats use the xvmulsp instruction directly.  */
+	      tree call = rs6000_builtin_decls[VSX_BUILTIN_XVMULSP];
+	      return build_call_expr (call, 2, arg0, arg1);
+	    }
+	  case E_DFmode:
+	    {
+	      /* For doubles use the xvmuldp instruction directly.  */
+	      tree call = rs6000_builtin_decls[VSX_BUILTIN_XVMULDP];
+	      return build_call_expr (call, 2, arg0, arg1);
+	    }
+	  /* Other types are errors.  */
+	  default:
+	    goto bad;
+	}
+    }
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_CMPNE)
+    {
+      /* vec_cmpne needs to be special cased because there are no instructions
+	 for it (prior to power 9).  */
+      if (nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", "vec_cmpne");
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+
+      /* Both arguments must be vectors and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type))
+	goto bad;
+
+      /* Power9 instructions provide the most efficient implementation of
+	 ALTIVEC_BUILTIN_VEC_CMPNE if the mode is not DImode or TImode
+	 or SFmode or DFmode.  */
+      if (!TARGET_P9_VECTOR
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == DImode)
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == TImode)
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == SFmode)
+	  || (TYPE_MODE (TREE_TYPE (arg0_type)) == DFmode))
+	{
+	  switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	    {
+	      /* vec_cmpneq (va, vb) == vec_nor (vec_cmpeq (va, vb),
+		 vec_cmpeq (va, vb)).  */
+	      /* Note:  vec_nand also works but opt changes vec_nand's
+		 to vec_nor's anyway.  */
+	    case E_QImode:
+	    case E_HImode:
+	    case E_SImode:
+	    case E_DImode:
+	    case E_TImode:
+	    case E_SFmode:
+	    case E_DFmode:
+	      {
+		/* call = vec_cmpeq (va, vb)
+		   result = vec_nor (call, call).  */
+		vec<tree, va_gc> *params = make_tree_vector ();
+		vec_safe_push (params, arg0);
+		vec_safe_push (params, arg1);
+		tree call = altivec_resolve_overloaded_builtin
+		  (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_CMPEQ],
+		   params);
+		/* Use save_expr to ensure that operands used more than once
+		   that may have side effects (like calls) are only evaluated
+		   once.  */
+		call = save_expr (call);
+		params = make_tree_vector ();
+		vec_safe_push (params, call);
+		vec_safe_push (params, call);
+		return altivec_resolve_overloaded_builtin
+		  (loc, rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_NOR], params);
+	      }
+	      /* Other types are errors.  */
+	    default:
+	      goto bad;
+	    }
+	}
+      /* else, fall through and process the Power9 alternative below */
+    }
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_ADDE
+      || fcode == ALTIVEC_BUILTIN_VEC_SUBE)
+    {
+      /* vec_adde needs to be special cased because there is no instruction
+	  for the {un}signed int version.  */
+      if (nargs != 3)
+	{
+	  const char *name = fcode == ALTIVEC_BUILTIN_VEC_ADDE ?
+	    "vec_adde": "vec_sube";
+	  error ("builtin %qs only accepts 3 arguments", name);
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+      tree arg2 = (*arglist)[2];
+      tree arg2_type = TREE_TYPE (arg2);
+
+      /* All 3 arguments must be vectors of (signed or unsigned) (int or
+	 __int128) and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
+	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
+	goto bad;
+
+      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	{
+	  /* For {un}signed ints,
+	     vec_adde (va, vb, carryv) == vec_add (vec_add (va, vb),
+						   vec_and (carryv, 1)).
+	     vec_sube (va, vb, carryv) == vec_sub (vec_sub (va, vb),
+						   vec_and (carryv, 1)).  */
+	  case E_SImode:
+	    {
+	      tree add_sub_builtin;
+
+	      vec<tree, va_gc> *params = make_tree_vector ();
+	      vec_safe_push (params, arg0);
+	      vec_safe_push (params, arg1);
+
+	      if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
+		add_sub_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD];
+	      else
+		add_sub_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_SUB];
+
+	      tree call = altivec_resolve_overloaded_builtin (loc,
+							      add_sub_builtin,
+							      params);
+	      tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1);
+	      tree ones_vector = build_vector_from_val (arg0_type, const1);
+	      tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type,
+					       arg2, ones_vector);
+	      params = make_tree_vector ();
+	      vec_safe_push (params, call);
+	      vec_safe_push (params, and_expr);
+	      return altivec_resolve_overloaded_builtin (loc, add_sub_builtin,
+							 params);
+	    }
+	  /* For {un}signed __int128s use the vaddeuqm instruction
+		directly.  */
+	  case E_TImode:
+	    {
+	       tree bii;
+
+	       if (fcode == ALTIVEC_BUILTIN_VEC_ADDE)
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDEUQM];
+
+	       else
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VSUBEUQM];
+
+	       return altivec_resolve_overloaded_builtin (loc, bii, arglist);
+	    }
+
+	  /* Types other than {un}signed int and {un}signed __int128
+		are errors.  */
+	  default:
+	    goto bad;
+	}
+    }
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC
+      || fcode == ALTIVEC_BUILTIN_VEC_SUBEC)
+    {
+      /* vec_addec and vec_subec needs to be special cased because there is
+	 no instruction for the {un}signed int version.  */
+      if (nargs != 3)
+	{
+	  const char *name = fcode == ALTIVEC_BUILTIN_VEC_ADDEC ?
+	    "vec_addec": "vec_subec";
+	  error ("builtin %qs only accepts 3 arguments", name);
+	  return error_mark_node;
+	}
+
+      tree arg0 = (*arglist)[0];
+      tree arg0_type = TREE_TYPE (arg0);
+      tree arg1 = (*arglist)[1];
+      tree arg1_type = TREE_TYPE (arg1);
+      tree arg2 = (*arglist)[2];
+      tree arg2_type = TREE_TYPE (arg2);
+
+      /* All 3 arguments must be vectors of (signed or unsigned) (int or
+	 __int128) and the types must be compatible.  */
+      if (TREE_CODE (arg0_type) != VECTOR_TYPE)
+	goto bad;
+      if (!lang_hooks.types_compatible_p (arg0_type, arg1_type)
+	  || !lang_hooks.types_compatible_p (arg1_type, arg2_type))
+	goto bad;
+
+      switch (TYPE_MODE (TREE_TYPE (arg0_type)))
+	{
+	  /* For {un}signed ints,
+	      vec_addec (va, vb, carryv) ==
+				vec_or (vec_addc (va, vb),
+					vec_addc (vec_add (va, vb),
+						  vec_and (carryv, 0x1))).  */
+	  case E_SImode:
+	    {
+	    /* Use save_expr to ensure that operands used more than once
+		that may have side effects (like calls) are only evaluated
+		once.  */
+	    tree as_builtin;
+	    tree as_c_builtin;
+
+	    arg0 = save_expr (arg0);
+	    arg1 = save_expr (arg1);
+	    vec<tree, va_gc> *params = make_tree_vector ();
+	    vec_safe_push (params, arg0);
+	    vec_safe_push (params, arg1);
+
+	    if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+	      as_c_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADDC];
+	    else
+	      as_c_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_SUBC];
+
+	    tree call1 = altivec_resolve_overloaded_builtin (loc, as_c_builtin,
+							     params);
+	    params = make_tree_vector ();
+	    vec_safe_push (params, arg0);
+	    vec_safe_push (params, arg1);
+
+
+	    if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+	      as_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_ADD];
+	    else
+	      as_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_SUB];
+
+	    tree call2 = altivec_resolve_overloaded_builtin (loc, as_builtin,
+							     params);
+	    tree const1 = build_int_cstu (TREE_TYPE (arg0_type), 1);
+	    tree ones_vector = build_vector_from_val (arg0_type, const1);
+	    tree and_expr = fold_build2_loc (loc, BIT_AND_EXPR, arg0_type,
+					     arg2, ones_vector);
+	    params = make_tree_vector ();
+	    vec_safe_push (params, call2);
+	    vec_safe_push (params, and_expr);
+	    call2 = altivec_resolve_overloaded_builtin (loc, as_c_builtin,
+							params);
+	    params = make_tree_vector ();
+	    vec_safe_push (params, call1);
+	    vec_safe_push (params, call2);
+	    tree or_builtin = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_OR];
+	    return altivec_resolve_overloaded_builtin (loc, or_builtin,
+						       params);
+	    }
+	  /* For {un}signed __int128s use the vaddecuq/vsubbecuq
+	     instructions.  */
+	  case E_TImode:
+	    {
+	       tree bii;
+
+	       if (fcode == ALTIVEC_BUILTIN_VEC_ADDEC)
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VADDECUQ];
+
+	       else
+		 bii = rs6000_builtin_decls[P8V_BUILTIN_VEC_VSUBECUQ];
+
+	       return altivec_resolve_overloaded_builtin (loc, bii, arglist);
+	    }
+	  /* Types other than {un}signed int and {un}signed __int128
+		are errors.  */
+	  default:
+	    goto bad;
+	}
+    }
+
+  /* For now treat vec_splats and vec_promote as the same.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS
+      || fcode == ALTIVEC_BUILTIN_VEC_PROMOTE)
+    {
+      tree type, arg;
+      int size;
+      int i;
+      bool unsigned_p;
+      vec<constructor_elt, va_gc> *vec;
+      const char *name = fcode == ALTIVEC_BUILTIN_VEC_SPLATS ? "vec_splats": "vec_promote";
+
+      if (fcode == ALTIVEC_BUILTIN_VEC_SPLATS && nargs != 1)
+	{
+	  error ("builtin %qs only accepts 1 argument", name);
+	  return error_mark_node;
+	}
+      if (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE && nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", name);
+	  return error_mark_node;
+	}
+      /* Ignore promote's element argument.  */
+      if (fcode == ALTIVEC_BUILTIN_VEC_PROMOTE
+	  && !INTEGRAL_TYPE_P (TREE_TYPE ((*arglist)[1])))
+	goto bad;
+
+      arg = (*arglist)[0];
+      type = TREE_TYPE (arg);
+      if (!SCALAR_FLOAT_TYPE_P (type)
+	  && !INTEGRAL_TYPE_P (type))
+	goto bad;
+      unsigned_p = TYPE_UNSIGNED (type);
+      switch (TYPE_MODE (type))
+	{
+	  case E_TImode:
+	    type = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
+	    size = 1;
+	    break;
+	  case E_DImode:
+	    type = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
+	    size = 2;
+	    break;
+	  case E_SImode:
+	    type = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
+	    size = 4;
+	    break;
+	  case E_HImode:
+	    type = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
+	    size = 8;
+	    break;
+	  case E_QImode:
+	    type = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
+	    size = 16;
+	    break;
+	  case E_SFmode: type = V4SF_type_node; size = 4; break;
+	  case E_DFmode: type = V2DF_type_node; size = 2; break;
+	  default:
+	    goto bad;
+	}
+      arg = save_expr (fold_convert (TREE_TYPE (type), arg));
+      vec_alloc (vec, size);
+      for(i = 0; i < size; i++)
+	{
+	  constructor_elt elt = {NULL_TREE, arg};
+	  vec->quick_push (elt);
+	}
+	return build_constructor (type, vec);
+    }
+
+  /* For now use pointer tricks to do the extraction, unless we are on VSX
+     extracting a double from a constant offset.  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_EXTRACT)
+    {
+      tree arg1;
+      tree arg1_type;
+      tree arg2;
+      tree arg1_inner_type;
+      tree decl, stmt;
+      tree innerptrtype;
+      machine_mode mode;
+
+      /* No second argument. */
+      if (nargs != 2)
+	{
+	  error ("builtin %qs only accepts 2 arguments", "vec_extract");
+	  return error_mark_node;
+	}
+
+      arg2 = (*arglist)[1];
+      arg1 = (*arglist)[0];
+      arg1_type = TREE_TYPE (arg1);
+
+      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
+	goto bad;
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
+	goto bad;
+
+      /* See if we can optimize vec_extracts with the current VSX instruction
+	 set.  */
+      mode = TYPE_MODE (arg1_type);
+      if (VECTOR_MEM_VSX_P (mode))
+
+	{
+	  tree call = NULL_TREE;
+	  int nunits = GET_MODE_NUNITS (mode);
+
+	  arg2 = fold_for_warn (arg2);
+
+	  /* If the second argument is an integer constant, generate
+	     the built-in code if we can.  We need 64-bit and direct
+	     move to extract the small integer vectors.  */
+	  if (TREE_CODE (arg2) == INTEGER_CST)
+	    {
+	      wide_int selector = wi::to_wide (arg2);
+	      selector = wi::umod_trunc (selector, nunits);
+	      arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case E_V1TImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V1TI];
+		  break;
+
+		case E_V2DFmode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
+		  break;
+
+		case E_V2DImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
+		  break;
+
+		case E_V4SFmode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SF];
+		  break;
+
+		case E_V4SImode:
+		  if (TARGET_DIRECT_MOVE_64BIT)
+		    call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SI];
+		  break;
+
+		case E_V8HImode:
+		  if (TARGET_DIRECT_MOVE_64BIT)
+		    call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V8HI];
+		  break;
+
+		case E_V16QImode:
+		  if (TARGET_DIRECT_MOVE_64BIT)
+		    call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V16QI];
+		  break;
+		}
+	    }
+
+	  /* If the second argument is variable, we can optimize it if we are
+	     generating 64-bit code on a machine with direct move.  */
+	  else if (TREE_CODE (arg2) != INTEGER_CST && TARGET_DIRECT_MOVE_64BIT)
+	    {
+	      switch (mode)
+		{
+		default:
+		  break;
+
+		case E_V2DFmode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DF];
+		  break;
+
+		case E_V2DImode:
+		  call = rs6000_builtin_decls[VSX_BUILTIN_VEC_EXT_V2DI];
+		  break;
+
+		case E_V4SFmode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SF];
+		  break;
+
+		case E_V4SImode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V4SI];
+		  break;
+
+		case E_V8HImode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V8HI];
+		  break;
+
+		case E_V16QImode:
+		  call = rs6000_builtin_decls[ALTIVEC_BUILTIN_VEC_EXT_V16QI];
+		  break;
+		}
+	    }
+
+	  if (call)
+	    {
+	      tree result = build_call_expr (call, 2, arg1, arg2);
+	      /* Coerce the result to vector element type.  May be no-op.  */
+	      arg1_inner_type = TREE_TYPE (arg1_type);
+	      result = fold_convert (arg1_inner_type, result);
+	      return result;
+	    }
+	}
+
+      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2). */
+      arg1_inner_type = TREE_TYPE (arg1_type);
+      arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
+			      build_int_cst (TREE_TYPE (arg2),
+					     TYPE_VECTOR_SUBPARTS (arg1_type)
+					     - 1), 0);
+      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
+      DECL_EXTERNAL (decl) = 0;
+      TREE_PUBLIC (decl) = 0;
+      DECL_CONTEXT (decl) = current_function_decl;
+      TREE_USED (decl) = 1;
+      TREE_TYPE (decl) = arg1_type;
+      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  TREE_ADDRESSABLE (decl) = 1;
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
+
+      innerptrtype = build_pointer_type (arg1_inner_type);
+
+      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
+      stmt = convert (innerptrtype, stmt);
+      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
+      stmt = build_indirect_ref (loc, stmt, RO_NULL);
+
+      /* PR83660: We mark this as having side effects so that
+	 downstream in fold_build_cleanup_point_expr () it will get a
+	 CLEANUP_POINT_EXPR.  If it does not we can run into an ICE
+	 later in gimplify_cleanup_point_expr ().  Potentially this
+	 causes missed optimization because the actually is no side
+	 effect.  */
+      if (c_dialect_cxx ())
+	TREE_SIDE_EFFECTS (stmt) = 1;
+
+      return stmt;
+    }
+
+  /* For now use pointer tricks to do the insertion, unless we are on VSX
+     inserting a double to a constant offset..  */
+  if (fcode == ALTIVEC_BUILTIN_VEC_INSERT)
+    {
+      tree arg0;
+      tree arg1;
+      tree arg2;
+      tree arg1_type;
+      tree arg1_inner_type;
+      tree decl, stmt;
+      tree innerptrtype;
+      machine_mode mode;
+
+      /* No second or third arguments. */
+      if (nargs != 3)
+	{
+	  error ("builtin %qs only accepts 3 arguments", "vec_insert");
+	  return error_mark_node;
+	}
+
+      arg0 = (*arglist)[0];
+      arg1 = (*arglist)[1];
+      arg1_type = TREE_TYPE (arg1);
+      arg2 = fold_for_warn ((*arglist)[2]);
+
+      if (TREE_CODE (arg1_type) != VECTOR_TYPE)
+	goto bad;
+      if (!INTEGRAL_TYPE_P (TREE_TYPE (arg2)))
+	goto bad;
+
+      /* If we can use the VSX xxpermdi instruction, use that for insert.  */
+      mode = TYPE_MODE (arg1_type);
+      if ((mode == V2DFmode || mode == V2DImode) && VECTOR_UNIT_VSX_P (mode)
+	  && TREE_CODE (arg2) == INTEGER_CST)
+	{
+	  wide_int selector = wi::to_wide (arg2);
+	  selector = wi::umod_trunc (selector, 2);
+	  tree call = NULL_TREE;
+
+	  arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
+	  if (mode == V2DFmode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DF];
+	  else if (mode == V2DImode)
+	    call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V2DI];
+
+	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+	     reversed.  */
+	  if (call)
+	    return build_call_expr (call, 3, arg1, arg0, arg2);
+	}
+      else if (mode == V1TImode && VECTOR_UNIT_VSX_P (mode)
+	       && TREE_CODE (arg2) == INTEGER_CST)
+	{
+	  tree call = rs6000_builtin_decls[VSX_BUILTIN_VEC_SET_V1TI];
+	  wide_int selector = wi::zero(32);
+
+	  arg2 = wide_int_to_tree (TREE_TYPE (arg2), selector);
+	  /* Note, __builtin_vec_insert_<xxx> has vector and scalar types
+	     reversed.  */
+	  return build_call_expr (call, 3, arg1, arg0, arg2);
+	}
+
+      /* Build *(((arg1_inner_type*)&(vector type){arg1})+arg2) = arg0. */
+      arg1_inner_type = TREE_TYPE (arg1_type);
+      if (TYPE_VECTOR_SUBPARTS (arg1_type) == 1)
+	arg2 = build_int_cst (TREE_TYPE (arg2), 0);
+      else
+	arg2 = build_binary_op (loc, BIT_AND_EXPR, arg2,
+				build_int_cst (TREE_TYPE (arg2),
+					       TYPE_VECTOR_SUBPARTS (arg1_type)
+					       - 1), 0);
+      decl = build_decl (loc, VAR_DECL, NULL_TREE, arg1_type);
+      DECL_EXTERNAL (decl) = 0;
+      TREE_PUBLIC (decl) = 0;
+      DECL_CONTEXT (decl) = current_function_decl;
+      TREE_USED (decl) = 1;
+      TREE_TYPE (decl) = arg1_type;
+      TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
+      if (c_dialect_cxx ())
+	{
+	  stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+			 NULL_TREE, NULL_TREE);
+	  SET_EXPR_LOCATION (stmt, loc);
+	}
+      else
+	{
+	  DECL_INITIAL (decl) = arg1;
+	  stmt = build1 (DECL_EXPR, arg1_type, decl);
+	  TREE_ADDRESSABLE (decl) = 1;
+	  SET_EXPR_LOCATION (stmt, loc);
+	  stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+	}
+
+      innerptrtype = build_pointer_type (arg1_inner_type);
+
+      stmt = build_unary_op (loc, ADDR_EXPR, stmt, 0);
+      stmt = convert (innerptrtype, stmt);
+      stmt = build_binary_op (loc, PLUS_EXPR, stmt, arg2, 1);
+      stmt = build_indirect_ref (loc, stmt, RO_NULL);
+      stmt = build2 (MODIFY_EXPR, TREE_TYPE (stmt), stmt,
+		     convert (TREE_TYPE (stmt), arg0));
+      stmt = build2 (COMPOUND_EXPR, arg1_type, stmt, decl);
+      return stmt;
+    }
+
+  for (n = 0;
+       !VOID_TYPE_P (TREE_VALUE (fnargs)) && n < nargs;
+       fnargs = TREE_CHAIN (fnargs), n++)
+    {
+      tree decl_type = TREE_VALUE (fnargs);
+      tree arg = (*arglist)[n];
+      tree type;
+
+      if (arg == error_mark_node)
+	return error_mark_node;
+
+      if (n >= 4)
+	abort ();
+
+      arg = default_conversion (arg);
+
+      /* The C++ front-end converts float * to const void * using
+	 NOP_EXPR<const void *> (NOP_EXPR<void *> (x)).  */
+      type = TREE_TYPE (arg);
+      if (POINTER_TYPE_P (type)
+	  && TREE_CODE (arg) == NOP_EXPR
+	  && lang_hooks.types_compatible_p (TREE_TYPE (arg),
+					    const_ptr_type_node)
+	  && lang_hooks.types_compatible_p (TREE_TYPE (TREE_OPERAND (arg, 0)),
+					    ptr_type_node))
+	{
+	  arg = TREE_OPERAND (arg, 0);
+	  type = TREE_TYPE (arg);
+	}
+
+      /* Remove the const from the pointers to simplify the overload
+	 matching further down.  */
+      if (POINTER_TYPE_P (decl_type)
+	  && POINTER_TYPE_P (type)
+	  && TYPE_QUALS (TREE_TYPE (type)) != 0)
+	{
+	  if (TYPE_READONLY (TREE_TYPE (type))
+	      && !TYPE_READONLY (TREE_TYPE (decl_type)))
+	    warning (0, "passing argument %d of %qE discards qualifiers from "
+		     "pointer target type", n + 1, fndecl);
+	  type = build_pointer_type (build_qualified_type (TREE_TYPE (type),
+							   0));
+	  arg = fold_convert (type, arg);
+	}
+
+      /* For P9V_BUILTIN_VEC_LXVL, convert any const * to its non constant
+	 equivalent to simplify the overload matching below.  */
+      if (fcode == P9V_BUILTIN_VEC_LXVL)
+	{
+	  if (POINTER_TYPE_P (type)
+	      && TYPE_READONLY (TREE_TYPE (type)))
+	    {
+	      type = build_pointer_type (build_qualified_type (
+						TREE_TYPE (type),0));
+	      arg = fold_convert (type, arg);
+	    }
+	}
+
+      args[n] = arg;
+      types[n] = type;
+    }
+
+  /* If the number of arguments did not match the prototype, return NULL
+     and the generic code will issue the appropriate error message.  */
+  if (!VOID_TYPE_P (TREE_VALUE (fnargs)) || n < nargs)
+    return NULL;
+
+  if (n == 0)
+    abort ();
+
+  if (fcode == ALTIVEC_BUILTIN_VEC_STEP)
+    {
+      if (TREE_CODE (types[0]) != VECTOR_TYPE)
+	goto bad;
+
+      return build_int_cst (NULL_TREE, TYPE_VECTOR_SUBPARTS (types[0]));
+    }
+
+  {
+    bool unsupported_builtin = false;
+    enum rs6000_builtins overloaded_code;
+    tree result = NULL;
+    for (desc = altivec_overloaded_builtins;
+	 desc->code && desc->code != fcode; desc++)
+      continue;
+
+    /* Need to special case __builtin_cmp because the overloaded forms
+       of this function take (unsigned int, unsigned int) or (unsigned
+       long long int, unsigned long long int).  Since C conventions
+       allow the respective argument types to be implicitly coerced into
+       each other, the default handling does not provide adequate
+       discrimination between the desired forms of the function.  */
+    if (fcode == P6_OV_BUILTIN_CMPB)
+      {
+	machine_mode arg1_mode = TYPE_MODE (types[0]);
+	machine_mode arg2_mode = TYPE_MODE (types[1]);
+
+	if (nargs != 2)
+	  {
+	    error ("builtin %qs only accepts 2 arguments", "__builtin_cmpb");
+	    return error_mark_node;
+	  }
+
+	/* If any supplied arguments are wider than 32 bits, resolve to
+	   64-bit variant of built-in function.  */
+	if ((GET_MODE_PRECISION (arg1_mode) > 32)
+	    || (GET_MODE_PRECISION (arg2_mode) > 32))
+	  {
+	    /* Assure all argument and result types are compatible with
+	       the built-in function represented by P6_BUILTIN_CMPB.  */
+	    overloaded_code = P6_BUILTIN_CMPB;
+	  }
+	else
+	  {
+	    /* Assure all argument and result types are compatible with
+	       the built-in function represented by P6_BUILTIN_CMPB_32.  */
+	    overloaded_code = P6_BUILTIN_CMPB_32;
+	  }
+
+	while (desc->code && desc->code == fcode
+	       && desc->overloaded_code != overloaded_code)
+	  desc++;
+
+	if (desc->code && (desc->code == fcode)
+	    && rs6000_builtin_type_compatible (types[0], desc->op1)
+	    && rs6000_builtin_type_compatible (types[1], desc->op2))
+	  {
+	    if (rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
+	      {
+		result = altivec_build_resolved_builtin (args, n, desc);
+		/* overloaded_code is set above */
+		if (!rs6000_builtin_is_supported_p (overloaded_code))
+		  unsupported_builtin = true;
+		else
+		  return result;
+	      }
+	    else
+	      unsupported_builtin = true;
+	  }
+      }
+    else if (fcode == P9V_BUILTIN_VEC_VSIEDP)
+      {
+	machine_mode arg1_mode = TYPE_MODE (types[0]);
+
+	if (nargs != 2)
+	  {
+	    error ("builtin %qs only accepts 2 arguments",
+		   "scalar_insert_exp");
+	    return error_mark_node;
+	  }
+
+	/* If supplied first argument is wider than 64 bits, resolve to
+	   128-bit variant of built-in function.  */
+	if (GET_MODE_PRECISION (arg1_mode) > 64)
+	  {
+	    /* If first argument is of float variety, choose variant
+	       that expects __ieee128 argument.  Otherwise, expect
+	       __int128 argument.  */
+	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
+	      overloaded_code = P9V_BUILTIN_VSIEQPF;
+	    else
+	      overloaded_code = P9V_BUILTIN_VSIEQP;
+	  }
+	else
+	  {
+	    /* If first argument is of float variety, choose variant
+	       that expects double argument.  Otherwise, expect
+	       long long int argument.  */
+	    if (GET_MODE_CLASS (arg1_mode) == MODE_FLOAT)
+	      overloaded_code = P9V_BUILTIN_VSIEDPF;
+	    else
+	      overloaded_code = P9V_BUILTIN_VSIEDP;
+	  }
+	while (desc->code && desc->code == fcode
+	       && desc->overloaded_code != overloaded_code)
+	  desc++;
+
+	if (desc->code && (desc->code == fcode)
+	    && rs6000_builtin_type_compatible (types[0], desc->op1)
+	    && rs6000_builtin_type_compatible (types[1], desc->op2))
+	  {
+	    if (rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
+	      {
+		result = altivec_build_resolved_builtin (args, n, desc);
+		/* overloaded_code is set above.  */
+		if (!rs6000_builtin_is_supported_p (overloaded_code))
+		  unsupported_builtin = true;
+		else
+		  return result;
+	      }
+	    else
+	      unsupported_builtin = true;
+	  }
+      }
+    else if ((fcode == P10_BUILTIN_VEC_XXEVAL)
+	    || (fcode == P10V_BUILTIN_VXXPERMX))
+      {
+	signed char op3_type;
+
+	/* Need to special case P10_BUILTIN_VEC_XXEVAL and
+	   P10V_BUILTIN_VXXPERMX because they take 4 arguments and the
+	   existing infrastructure only handles three.  */
+	if (nargs != 4)
+	  {
+	    const char *name = fcode == P10_BUILTIN_VEC_XXEVAL ?
+	      "__builtin_vec_xxeval":"__builtin_vec_xxpermx";
+
+	    error ("builtin %qs requires 4 arguments", name);
+	    return error_mark_node;
+	  }
+
+	for ( ; desc->code == fcode; desc++)
+	  {
+	    if (fcode == P10_BUILTIN_VEC_XXEVAL)
+	      op3_type = desc->op3;
+	    else  /* P10V_BUILTIN_VXXPERMX */
+	      op3_type = RS6000_BTI_V16QI;
+
+	    if (rs6000_builtin_type_compatible (types[0], desc->op1)
+		&& rs6000_builtin_type_compatible (types[1], desc->op2)
+		&& rs6000_builtin_type_compatible (types[2], desc->op3)
+		&& rs6000_builtin_type_compatible (types[2], op3_type)
+		&& rs6000_builtin_type_compatible (types[3],
+						   RS6000_BTI_UINTSI))
+	      {
+		if (rs6000_builtin_decls[desc->overloaded_code] == NULL_TREE)
+		  unsupported_builtin = true;
+		else
+		  {
+		    result = altivec_build_resolved_builtin (args, n, desc);
+		    if (rs6000_builtin_is_supported_p (desc->overloaded_code))
+		      return result;
+		    /* Allow loop to continue in case a different
+		       definition is supported.  */
+		    overloaded_code = desc->overloaded_code;
+		    unsupported_builtin = true;
+		  }
+	      }
+	  }
+      }
+    else
+      {
+	/* For arguments after the last, we have RS6000_BTI_NOT_OPAQUE in
+	   the opX fields.  */
+	for (; desc->code == fcode; desc++)
+	  {
+	    if ((desc->op1 == RS6000_BTI_NOT_OPAQUE
+		 || rs6000_builtin_type_compatible (types[0], desc->op1))
+		&& (desc->op2 == RS6000_BTI_NOT_OPAQUE
+		    || rs6000_builtin_type_compatible (types[1], desc->op2))
+		&& (desc->op3 == RS6000_BTI_NOT_OPAQUE
+		    || rs6000_builtin_type_compatible (types[2], desc->op3)))
+	      {
+		if (rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
+		  {
+		    result = altivec_build_resolved_builtin (args, n, desc);
+		    if (!rs6000_builtin_is_supported_p (desc->overloaded_code))
+		      {
+			/* Allow loop to continue in case a different
+			   definition is supported.  */
+			overloaded_code = desc->overloaded_code;
+			unsupported_builtin = true;
+		      }
+		    else
+		      return result;
+		  }
+		else
+		  unsupported_builtin = true;
+	      }
+	  }
+      }
+
+    if (unsupported_builtin)
+      {
+	const char *name = rs6000_overloaded_builtin_name (fcode);
+	if (result != NULL)
+	  {
+	    const char *internal_name
+	      = rs6000_overloaded_builtin_name (overloaded_code);
+	    /* An error message making reference to the name of the
+	       non-overloaded function has already been issued.  Add
+	       clarification of the previous message.  */
+	    rich_location richloc (line_table, input_location);
+	    inform (&richloc, "builtin %qs requires builtin %qs",
+		    name, internal_name);
+	  }
+	else
+	  error ("%qs is not supported in this compiler configuration", name);
+	/* If an error-representing  result tree was returned from
+	   altivec_build_resolved_builtin above, use it.  */
+	return (result != NULL) ? result : error_mark_node;
+      }
+  }
+ bad:
+  {
+    const char *name = rs6000_overloaded_builtin_name (fcode);
+    error ("invalid parameter combination for AltiVec intrinsic %qs", name);
+    return error_mark_node;
+  }
+}
+
+/* Build a tree for a function call to an Altivec non-overloaded builtin.
+   The overloaded builtin that matched the types and args is described
+   by DESC.  The N arguments are given in ARGS, respectively.
+
+   Actually the only thing it does is calling fold_convert on ARGS, with
+   a small exception for vec_{all,any}_{ge,le} predicates. */
+
+static tree
+altivec_build_new_resolved_builtin (tree *args, int n,
+				    const struct altivec_builtin_types *desc)
+{
+  tree impl_fndecl = rs6000_builtin_decls[desc->overloaded_code];
+  tree ret_type = rs6000_builtin_type (desc->ret_type);
+  tree argtypes = TYPE_ARG_TYPES (TREE_TYPE (impl_fndecl));
+  tree arg_type[4];
+  tree call;
+
+  int i;
+  for (i = 0; i < n; i++)
+    arg_type[i] = TREE_VALUE (argtypes), argtypes = TREE_CHAIN (argtypes);
+
+  /* The AltiVec overloading implementation is overall gross, but this
+     is particularly disgusting.  The vec_{all,any}_{ge,le} builtins
+     are completely different for floating-point vs. integer vector
+     types, because the former has vcmpgefp, but the latter should use
+     vcmpgtXX.
+
+     In practice, the second and third arguments are swapped, and the
+     condition (LT vs. EQ, which is recognizable by bit 1 of the first
+     argument) is reversed.  Patch the arguments here before building
+     the resolved CALL_EXPR.  */
+  if (n == 3
+      && desc->code == ALTIVEC_BUILTIN_VEC_VCMPGE_P
+      && desc->overloaded_code != ALTIVEC_BUILTIN_VCMPGEFP_P
+      && desc->overloaded_code != VSX_BUILTIN_XVCMPGEDP_P)
+    {
+      std::swap (args[1], args[2]);
+      std::swap (arg_type[1], arg_type[2]);
+
+      args[0] = fold_build2 (BIT_XOR_EXPR, TREE_TYPE (args[0]), args[0],
+			     build_int_cst (NULL_TREE, 2));
+    }
+
+  switch (n)
+    {
+    case 0:
+      call = build_call_expr (impl_fndecl, 0);
+      break;
+    case 1:
+      call = build_call_expr (impl_fndecl, 1,
+			      fully_fold_convert (arg_type[0], args[0]));
+      break;
+    case 2:
+      call = build_call_expr (impl_fndecl, 2,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]));
+      break;
+    case 3:
+      call = build_call_expr (impl_fndecl, 3,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]),
+			      fully_fold_convert (arg_type[2], args[2]));
+      break;
+    case 4:
+      call = build_call_expr (impl_fndecl, 4,
+			      fully_fold_convert (arg_type[0], args[0]),
+			      fully_fold_convert (arg_type[1], args[1]),
+			      fully_fold_convert (arg_type[2], args[2]),
+			      fully_fold_convert (arg_type[3], args[3]));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return fold_convert (ret_type, call);
+}
+
+/* Implementation of the resolve_overloaded_builtin target hook, to
+   support Altivec's overloaded builtins.  */
+
+static tree
+altivec_resolve_new_overloaded_builtin (location_t loc, tree fndecl,
+					void *passed_arglist)
 {
   vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
   unsigned int nargs = vec_safe_length (arglist);
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 21304197603..6b682d733ae 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -14115,7 +14115,7 @@ rs6000_gimple_fold_new_mma_builtin (gimple_stmt_iterator *gsi,
 
       /* We're disassembling an accumulator into a different type, so we need
 	 to emit a xxmfacc instruction now, since we cannot do it later.  */
-      new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL];
+      new_decl = rs6000_builtin_decls_x[RS6000_BIF_XXMFACC_INTERNAL];
       new_call = gimple_build_call (new_decl, 1, src);
       src = make_ssa_name (vector_quad_type_node);
       gimple_call_set_lhs (new_call, src);
@@ -14144,7 +14144,7 @@ rs6000_gimple_fold_new_mma_builtin (gimple_stmt_iterator *gsi,
 
   /* Convert this built-in into an internal version that uses pass-by-value
      arguments.  The internal built-in follows immediately after this one.  */
-  new_decl = rs6000_builtin_decls[fncode + 1];
+  new_decl = rs6000_builtin_decls_x[fncode + 1];
   tree lhs, op[MAX_MMA_OPERANDS];
   tree acc = gimple_call_arg (stmt, 0);
   push_gimplify_context (true);
@@ -15914,6 +15914,7 @@ rs6000_init_builtins (void)
 }
 
 /* Returns the rs6000 builtin decl for CODE.  */
+/* #### TODO: Rewrite this.  */
 
 tree
 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c
index 9e5adbae7c7..c7f405e33fd 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -2114,6 +2114,9 @@ write_decls ()
     fprintf (header_file, "  RS6000_OVLD_%s,\n", ovlds[i].ovld_id_name);
   fprintf (header_file, "  RS6000_OVLD_MAX\n};\n\n");
 
+  fprintf (header_file,
+	   "extern tree rs6000_builtin_decls_x[RS6000_OVLD_MAX];\n\n");
+
   fprintf (header_file, "enum restriction {\n");
   fprintf (header_file, "  RES_NONE,\n");
   fprintf (header_file, "  RES_BITS,\n");
@@ -2356,13 +2359,9 @@ static int
 write_header_file ()
 {
   write_autogenerated_header (header_file);
-  fprintf (header_file, "#include \"config.h\"\n");
-  fprintf (header_file, "#include \"system.h\"\n");
-  fprintf (header_file, "#include \"coretypes.h\"\n");
-  fprintf (header_file, "#include \"backend.h\"\n");
-  fprintf (header_file, "#include \"rtl.h\"\n");
-  fprintf (header_file, "#include \"tree.h\"\n");
-  fprintf (header_file, "\n");
+
+  fprintf (header_file, "#ifndef _RS6000_BUILTINS_H\n");
+  fprintf (header_file, "#define _RS6000_BUILTINS_H 1\n\n");
   fprintf (header_file, "extern int new_builtins_are_live;\n\n");
 
   write_decls ();
@@ -2370,6 +2369,7 @@ write_header_file ()
   /* Write function type list declarators to the header file.  */
   rbt_inorder_callback (&fntype_rbt, fntype_rbt.rbt_root, write_extern_fntype);
   fprintf (header_file, "\n");
+  fprintf (header_file, "\n#endif\n");
 
   return 1;
 }
@@ -2527,7 +2527,7 @@ write_init_bif_table ()
 	       "  if (new_builtins_are_live)\n");
       fprintf (init_file, "    {\n");
       fprintf (init_file,
-	       "      rs6000_builtin_decls[(int)RS6000_BIF_%s] = t\n",
+	       "      rs6000_builtin_decls_x[(int)RS6000_BIF_%s] = t\n",
 	       bifs[i].idname);
       fprintf (init_file,
 	       "        = add_builtin_function (\"%s\",\n",
@@ -2601,7 +2601,7 @@ write_init_ovld_table ()
 	       "  if (new_builtins_are_live)\n");
       fprintf (init_file, "    {\n");
       fprintf (init_file,
-	       "      rs6000_builtin_decls[(int)RS6000_OVLD_%s] = t\n",
+	       "      rs6000_builtin_decls_x[(int)RS6000_OVLD_%s] = t\n",
 	       ovlds[i].ovld_id_name);
       fprintf (init_file,
 	       "        = add_builtin_function (\"%s\",\n",
@@ -2643,6 +2643,8 @@ write_init_file ()
   fprintf (init_file, "int new_builtins_are_live = 0;\n\n");
 #endif
 
+  fprintf (init_file, "tree rs6000_builtin_decls_x[RS6000_OVLD_MAX];\n\n");
+
   write_bif_static_init ();
   write_ovld_static_init ();
 
@@ -2692,6 +2694,13 @@ write_init_file ()
   rbt_inorder_callback (&fntype_rbt, fntype_rbt.rbt_root, write_fntype_init);
   fprintf (init_file, "\n");
 
+  fprintf (init_file,
+	   "  rs6000_builtin_decls_x[RS6000_BIF_NONE] = NULL_TREE;\n");
+  fprintf (init_file,
+	   "  rs6000_builtin_decls_x[RS6000_BIF_MAX] = NULL_TREE;\n");
+  fprintf (init_file,
+	   "  rs6000_builtin_decls_x[RS6000_OVLD_NONE] = NULL_TREE;\n\n");
+
   write_init_bif_table ();
   write_init_ovld_table ();
 
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index bcd4c4a82b3..2e0f39ac9d6 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -77,6 +77,7 @@
 #include "case-cfn-macros.h"
 #include "ppc-auxv.h"
 #include "rs6000-internal.h"
+#include "rs6000-builtins.h"
 #include "opts.h"
 
 /* This file should be included last.  */
@@ -5391,6 +5392,198 @@ rs6000_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
   return nunroll;
 }
 
+/* Returns a function decl for a vectorized version of the builtin function
+   with builtin function code FN and the result vector type TYPE, or NULL_TREE
+   if it is not available.  */
+
+static tree
+rs6000_new_builtin_vectorized_function (unsigned int fn, tree type_out,
+					tree type_in)
+{
+  machine_mode in_mode, out_mode;
+  int in_n, out_n;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr, "rs6000_new_builtin_vectorized_function (%s, %s, %s)\n",
+	     combined_fn_name (combined_fn (fn)),
+	     GET_MODE_NAME (TYPE_MODE (type_out)),
+	     GET_MODE_NAME (TYPE_MODE (type_in)));
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE)
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+  switch (fn)
+    {
+    CASE_CFN_COPYSIGN:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_CPSGNDP];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_CPSGNSP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_COPYSIGN_V4SF];
+      break;
+    CASE_CFN_CEIL:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIP];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRFIP];
+      break;
+    CASE_CFN_FLOOR:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIM];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIM];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRFIM];
+      break;
+    CASE_CFN_FMA:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVMADDDP];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVMADDSP];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VMADDFP];
+      break;
+    CASE_CFN_TRUNC:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIZ];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIZ];
+      if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRFIZ];
+      break;
+    CASE_CFN_NEARBYINT:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && flag_unsafe_math_optimizations
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPI];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && flag_unsafe_math_optimizations
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPI];
+      break;
+    CASE_CFN_RINT:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && !flag_trapping_math
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRDPIC];
+      if (VECTOR_UNIT_VSX_P (V4SFmode)
+	  && !flag_trapping_math
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_XVRSPIC];
+      break;
+    default:
+      break;
+    }
+
+  /* Generate calls to libmass if appropriate.  */
+  if (rs6000_veclib_handler)
+    return rs6000_veclib_handler (combined_fn (fn), type_out, type_in);
+
+  return NULL_TREE;
+}
+
+/* Implement TARGET_VECTORIZE_BUILTIN_MD_VECTORIZED_FUNCTION.  */
+
+static tree
+rs6000_new_builtin_md_vectorized_function (tree fndecl, tree type_out,
+					   tree type_in)
+{
+  machine_mode in_mode, out_mode;
+  int in_n, out_n;
+
+  if (TARGET_DEBUG_BUILTIN)
+    fprintf (stderr,
+	     "rs6000_new_builtin_md_vectorized_function (%s, %s, %s)\n",
+	     IDENTIFIER_POINTER (DECL_NAME (fndecl)),
+	     GET_MODE_NAME (TYPE_MODE (type_out)),
+	     GET_MODE_NAME (TYPE_MODE (type_in)));
+
+  if (TREE_CODE (type_out) != VECTOR_TYPE
+      || TREE_CODE (type_in) != VECTOR_TYPE)
+    return NULL_TREE;
+
+  out_mode = TYPE_MODE (TREE_TYPE (type_out));
+  out_n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+
+  enum rs6000_builtins fn
+    = (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
+  switch (fn)
+    {
+    case RS6000_BUILTIN_RSQRTF:
+      if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRSQRTFP];
+      break;
+    case RS6000_BUILTIN_RSQRT:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_RSQRT_2DF];
+      break;
+    case RS6000_BUILTIN_RECIPF:
+      if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
+	  && out_mode == SFmode && out_n == 4
+	  && in_mode == SFmode && in_n == 4)
+	return rs6000_builtin_decls_x[RS6000_BIF_VRECIPFP];
+      break;
+    case RS6000_BUILTIN_RECIP:
+      if (VECTOR_UNIT_VSX_P (V2DFmode)
+	  && out_mode == DFmode && out_n == 2
+	  && in_mode == DFmode && in_n == 2)
+	return rs6000_builtin_decls_x[RS6000_BIF_RECIP_V2DF];
+      break;
+    default:
+      break;
+    }
+  return NULL_TREE;
+}
+
 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
    library with vectorized intrinsics.  */
 
@@ -5510,6 +5703,9 @@ rs6000_builtin_vectorized_function (unsigned int fn, tree type_out,
   machine_mode in_mode, out_mode;
   int in_n, out_n;
 
+  if (new_builtins_are_live)
+    return rs6000_new_builtin_vectorized_function (fn, type_out, type_in);
+
   if (TARGET_DEBUG_BUILTIN)
     fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
 	     combined_fn_name (combined_fn (fn)),
@@ -5641,6 +5837,10 @@ rs6000_builtin_md_vectorized_function (tree fndecl, tree type_out,
   machine_mode in_mode, out_mode;
   int in_n, out_n;
 
+  if (new_builtins_are_live)
+    return rs6000_new_builtin_md_vectorized_function (fndecl, type_out,
+						      type_in);
+
   if (TARGET_DEBUG_BUILTIN)
     fprintf (stderr, "rs6000_builtin_md_vectorized_function (%s, %s, %s)\n",
 	     IDENTIFIER_POINTER (DECL_NAME (fndecl)),
@@ -21916,12 +22116,16 @@ rs6000_builtin_reciprocal (tree fndecl)
       if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
 	return NULL_TREE;
 
+      if (new_builtins_are_live)
+	return rs6000_builtin_decls_x[RS6000_BIF_RSQRT_2DF];
       return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
 
     case VSX_BUILTIN_XVSQRTSP:
       if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
 	return NULL_TREE;
 
+      if (new_builtins_are_live)
+	return rs6000_builtin_decls_x[RS6000_BIF_RSQRT_4SF];
       return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
 
     default:
@@ -24473,7 +24677,10 @@ add_condition_to_bb (tree function_decl, tree version_decl,
 
   tree bool_zero = build_int_cst (bool_int_type_node, 0);
   tree cond_var = create_tmp_var (bool_int_type_node);
-  tree predicate_decl = rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS];
+  tree predicate_decl
+    = (new_builtins_are_live
+       ? rs6000_builtin_decls_x[(int) RS6000_BIF_CPU_SUPPORTS]
+       : rs6000_builtin_decls [(int) RS6000_BUILTIN_CPU_SUPPORTS]);
   const char *arg_str = rs6000_clone_map[clone_isa].name;
   tree predicate_arg = build_string_literal (strlen (arg_str) + 1, arg_str);
   gimple *call_cond_stmt = gimple_build_call (predicate_decl, 1, predicate_arg);
@@ -26589,8 +26796,14 @@ rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
       return;
     }
 
-  tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
-  tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
+  tree mffs
+    = (new_builtins_are_live
+       ? rs6000_builtin_decls_x[RS6000_BIF_MFFS]
+       : rs6000_builtin_decls[RS6000_BUILTIN_MFFS]);
+  tree mtfsf
+    = (new_builtins_are_live
+       ? rs6000_builtin_decls_x[RS6000_BIF_MTFSF]
+       : rs6000_builtin_decls[RS6000_BUILTIN_MTFSF]);
   tree call_mffs = build_call_expr (mffs, 0);
 
   /* Generates the equivalent of feholdexcept (&fenv_var)
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 2ed15485f4b..ebcefc443cc 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -23,10 +23,6 @@ TM_H += $(srcdir)/config/rs6000/rs6000-cpus.def
 TM_H += $(srcdir)/config/rs6000/rs6000-modes.h
 PASSES_EXTRA += $(srcdir)/config/rs6000/rs6000-passes.def
 
-rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c
-	$(COMPILE) $<
-	$(POSTCOMPILE)
-
 rs6000-string.o: $(srcdir)/config/rs6000/rs6000-string.c
 	$(COMPILE) $<
 	$(POSTCOMPILE)
@@ -79,6 +75,20 @@ rs6000-call.o: $(srcdir)/config/rs6000/rs6000-call.c rs6000-builtins.c
 	$(COMPILE) $<
 	$(POSTCOMPILE)
 
+rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c rs6000-builtins.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
+
+# We need a dependency from rs6000.c on rs6000-builtins.h, but
+# apparently rs6000.c is automatically generated, so not sure
+# what to do about this.  #### TODO.  The following "works" but
+# generates a warning about ignoring the old recipe for rs6000.o.
+# Better to try to figure out what that recipe is...
+#rs6000.o: $(srcdir)/config/rs6000/rs6000.c rs6000-builtins.c
+#	$(COMPILE) $<
+#	$(POSTCOMPILE)
+
+
 $(srcdir)/config/rs6000/rs6000-tables.opt: $(srcdir)/config/rs6000/genopt.sh \
   $(srcdir)/config/rs6000/rs6000-cpus.def
 	$(SHELL) $(srcdir)/config/rs6000/genopt.sh $(srcdir)/config/rs6000 > \


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2021-02-07 18:15 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-11-24 16:46 [gcc(refs/users/wschmidt/heads/builtins4)] rs6000: Introduce rs6000_builtin_decls_x William Schmidt
  -- strict thread matches above, loose matches on Subject: below --
2021-02-07 18:15 William Schmidt
2020-12-16 18:09 William Schmidt
2020-11-20 19:36 William Schmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).