public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-09-16 21:32 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-09-16 21:32 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:b3364b3bed5658d341ca5abc42a950b1706d28a0

commit b3364b3bed5658d341ca5abc42a950b1706d28a0
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Tue Sep 15 11:53:33 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-09-15  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.
            * config/rs6000/rs6000-gen-builtins.c (type_map): Change
            "long_double" to "float128".

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 135 +++++++++++++++----------------
 gcc/config/rs6000/rs6000-gen-builtins.c  |   2 +-
 2 files changed, 65 insertions(+), 72 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index 6b66908d7c7..5e41e9e3316 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -169,6 +169,7 @@
   void __builtin_cpu_init ();
     CPU_INIT nothing {cpu}
 
+  ; TODO: Following two should return bool?
   unsigned int __builtin_cpu_is (const char *);
     CPU_IS nothing {cpu}
 
@@ -234,7 +235,7 @@
 
 ; Power6 builtins.
 [power6]
-  const signed int __builtin_p6_cmpb (signed int, signed int);
+  const signed long long __builtin_p6_cmpb (signed long long, signed long long);
     CMPB cmpbdi3 {}
 
   const signed int __builtin_p6_cmpb_32 (signed int, signed int);
@@ -810,7 +811,7 @@
   const vuc __builtin_altivec_vpkshus (vss, vss);
     VPKSHUS altivec_vpkshus {}
 
-  const vsi __builtin_altivec_vpkswss (vsi, vsi);
+  const vss __builtin_altivec_vpkswss (vsi, vsi);
     VPKSWSS altivec_vpkswss {}
 
   const vus __builtin_altivec_vpkswus (vsi, vsi);
@@ -1539,10 +1540,10 @@
   const vsi __builtin_vsx_vsigned_v4sf (vf);
     VEC_VSIGNED_V4SF vsx_xvcvspsxws {}
 
-  const vsll __builtin_vsx_vsignede_v2df (vd);
+  const vsi __builtin_vsx_vsignede_v2df (vd);
     VEC_VSIGNEDE_V2DF vsignede_v2df {}
 
-  const vsll __builtin_vsx_vsignedo_v2df (vd);
+  const vsi __builtin_vsx_vsignedo_v2df (vd);
     VEC_VSIGNEDO_V2DF vsignedo_v2df {}
 
   const vull __builtin_vsx_vunsigned_v2df (vd);
@@ -1560,7 +1561,7 @@
   const vf __builtin_vsx_xscvdpsp (vd);
     XSCVDPSP vsx_xscvdpsp {}
 
-  const vd __builtin_vsx_xscvspdp (vf);
+  const double __builtin_vsx_xscvspdp (float);
     XSCVSPDP vsx_xscvspdp {}
 
   const double __builtin_vsx_xsmaxdp (double, double);
@@ -1569,19 +1570,19 @@
   const double __builtin_vsx_xsmindp (double, double);
     XSMINDP smindf3 {}
 
-  const vd __builtin_vsx_xsrdpi (vd);
+  const double __builtin_vsx_xsrdpi (double);
     XSRDPI vsx_xsrdpi {}
 
-  const vd __builtin_vsx_xsrdpic (vd);
+  const double __builtin_vsx_xsrdpic (double);
     XSRDPIC vsx_xsrdpic {}
 
-  const vd __builtin_vsx_xsrdpim (vd);
+  const double __builtin_vsx_xsrdpim (double);
     XSRDPIM floordf2 {}
 
-  const vd __builtin_vsx_xsrdpip (vd);
+  const double __builtin_vsx_xsrdpip (double);
     XSRDPIP ceildf2 {}
 
-  const vd __builtin_vsx_xsrdpiz (vd);
+  const double __builtin_vsx_xsrdpiz (double);
     XSRDPIZ btruncdf2 {}
 
   const unsigned int __builtin_vsx_xstdivdp_fe (vd, vd);
@@ -1611,49 +1612,37 @@
   const vbll __builtin_vsx_xvcmpeqdp (vd, vd);
     XVCMPEQDP vector_eqv2df {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vull __builtin_vsx_xvcmpeqdp_p (vd);
+  const signed int __builtin_vsx_xvcmpeqdp_p (vd);
     XVCMPEQDP_P vector_eq_v2df_p {pred}
 
   const vbi __builtin_vsx_xvcmpeqsp (vf, vf);
     XVCMPEQSP vector_eqv4sf {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vui __builtin_vsx_xvcmpeqsp_p (vf);
+  const signed int __builtin_vsx_xvcmpeqsp_p (vf);
     XVCMPEQSP_P vector_eq_v4sf_p {pred}
 
   const vbll __builtin_vsx_xvcmpgedp (vd, vd);
     XVCMPGEDP vector_gev2df {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vull __builtin_vsx_xvcmpgedp_p (vd);
+  const signed int __builtin_vsx_xvcmpgedp_p (vd);
     XVCMPGEDP_P vector_ge_v2df_p {pred}
 
   const vbi __builtin_vsx_xvcmpgesp (vf, vf);
     XVCMPGESP vector_gev4sf {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vui __builtin_vsx_xvcmpgesp_p (vf);
+  const signed int __builtin_vsx_xvcmpgesp_p (vf);
     XVCMPGESP_P vector_ge_v4sf_p {pred}
 
   const vbll __builtin_vsx_xvcmpgtdp (vd, vd);
     XVCMPGTDP vector_gtv2df {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vull __builtin_vsx_xvcmpgtdp_p (vd);
+  const signed int __builtin_vsx_xvcmpgtdp_p (vd);
     XVCMPGTDP_P vector_gt_v2df_p {pred}
 
   const vbi __builtin_vsx_xvcmpgtsp (vf, vf);
     XVCMPGTSP vector_gtv4sf {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vui __builtin_vsx_xvcmpgtsp_p (vf, vf);
+  const signed int __builtin_vsx_xvcmpgtsp_p (vf, vf);
     XVCMPGTSP_P vector_gt_v4sf_p {pred}
 
   const vf __builtin_vsx_xvcvdpsp (vd);
@@ -1665,7 +1654,7 @@
   const vsll __builtin_vsx_xvcvdpsxds_scale (vd, const int);
     XVCVDPSXDS_SCALE vsx_xvcvdpsxds_scale {}
 
-  const vsll __builtin_vsx_xvcvdpsxws (vd);
+  const vsi __builtin_vsx_xvcvdpsxws (vd);
     XVCVDPSXWS vsx_xvcvdpsxws {}
 
   const vull __builtin_vsx_xvcvdpuxds (vd);
@@ -1678,7 +1667,7 @@
   const vull __builtin_vsx_xvcvdpuxds_uns (vd);
     XVCVDPUXDS_UNS vsx_fixuns_truncv2dfv2di2 {}
 
-  const vull __builtin_vsx_xvcvdpuxws (vd);
+  const vui __builtin_vsx_xvcvdpuxws (vd);
     XVCVDPUXWS vsx_xvcvdpuxws {}
 
   const vd __builtin_vsx_xvcvspdp (vf);
@@ -1736,7 +1725,7 @@
   const vf __builtin_vsx_xvcvuxwsp (vui);
     XVCVUXWSP_V4SF vsx_xvcvuxwsp {}
 
-  fpmath vf __builtin_vsx_xvdivdp (vf, vf);
+  fpmath vd __builtin_vsx_xvdivdp (vd, vd);
     XVDIVDP divv2df3 {}
 
   fpmath vf __builtin_vsx_xvdivsp (vf, vf);
@@ -1895,7 +1884,7 @@
   const vf __builtin_vsx_xxmrglw (vf, vf);
     XXMRGLW_4SF vsx_xxmrglw_v4sf {}
 
-  const vss __builtin_vsx_xxmrglw_4si (vsi, vsi);
+  const vsi __builtin_vsx_xxmrglw_4si (vsi, vsi);
     XXMRGLW_4SI vsx_xxmrglw_v4si {}
 
   const vsc __builtin_vsx_xxpermdi_16qi (vsc, vsc, const int<1>);
@@ -2268,7 +2257,8 @@
     VPKUDUS altivec_vpkudus {}
 
 ; #### Following are duplicates of __builtin_crypto_vpmsum*.  This
-; can't have ever worked properly!
+; can't have ever worked properly!  However, these have the right
+; result types, and the others have wrong ones!!  Whatwhatwhat...
 ;
 ;  const vus __builtin_altivec_vpmsumb (vuc, vuc);
 ;    VPMSUMB crypto_vpmsumb {}
@@ -2282,13 +2272,13 @@
 ;  const vull __builtin_altivec_vpmsumw (vui, vui);
 ;    VPMSUMW crypto_vpmsumw {}
 
-  const vuc __builtin_altivec_vpopcntb (vsc);
+  const vsc __builtin_altivec_vpopcntb (vsc);
     VPOPCNTB popcountv16qi2 {}
 
-  const vull __builtin_altivec_vpopcntd (vsll);
+  const vsll __builtin_altivec_vpopcntd (vsll);
     VPOPCNTD popcountv2di2 {}
 
-  const vus __builtin_altivec_vpopcnth (vss);
+  const vss __builtin_altivec_vpopcnth (vss);
     VPOPCNTH popcountv8hi2 {}
 
   const vuc __builtin_altivec_vpopcntub (vuc);
@@ -2303,7 +2293,7 @@
   const vui __builtin_altivec_vpopcntuw (vui);
     VPOPCNTUW popcountv4si2 {}
 
-  const vui __builtin_altivec_vpopcntw (vsi);
+  const vsi __builtin_altivec_vpopcntw (vsi);
     VPOPCNTW popcountv4si2 {}
 
   const vsll __builtin_altivec_vrld (vsll, vull);
@@ -2342,31 +2332,31 @@
   const vsq __builtin_bcdadd (vsq, vsq, const int<1>);
     BCDADD bcdadd {}
 
-  const unsigned int __builtin_bcdadd_eq (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_eq (vsq, vsq, const int<1>);
     BCDADD_EQ bcdadd_eq {}
 
-  const unsigned int __builtin_bcdadd_gt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_gt (vsq, vsq, const int<1>);
     BCDADD_GT bcdadd_gt {}
 
-  const unsigned int __builtin_bcdadd_lt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_lt (vsq, vsq, const int<1>);
     BCDADD_LT bcdadd_lt {}
 
-  const unsigned int __builtin_bcdadd_ov (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_ov (vsq, vsq, const int<1>);
     BCDADD_OV bcdadd_unordered {}
 
   const vsq __builtin_bcdsub (vsq, vsq, const int<1>);
     BCDSUB bcdsub {}
 
-  const unsigned int __builtin_bcdsub_eq (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_eq (vsq, vsq, const int<1>);
     BCDSUB_EQ bcdsub_eq {}
 
-  const unsigned int __builtin_bcdsub_gt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_gt (vsq, vsq, const int<1>);
     BCDSUB_GT bcdsub_gt {}
 
-  const unsigned int __builtin_bcdsub_lt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_lt (vsq, vsq, const int<1>);
     BCDSUB_LT bcdsub_lt {}
 
-  const unsigned int __builtin_bcdsub_ov (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_ov (vsq, vsq, const int<1>);
     BCDSUB_OV bcdsub_unordered {}
 
   const vuc __builtin_crypto_vpermxor_v16qi (vuc, vuc, vuc);
@@ -2381,16 +2371,19 @@
   const vus __builtin_crypto_vpermxor_v8hi (vus, vus, vus);
     VPERMXOR_V8HI crypto_vpermxor_v8hi {}
 
-  const vus __builtin_crypto_vpmsumb (vuc, vuc);
+; Note: these four have incorrect return types per the way the
+; instructions work, but this matches the old signatures that
+; have been around for too long.
+  const vuc __builtin_crypto_vpmsumb (vuc, vuc);
     VPMSUMB crypto_vpmsumb {}
 
-  const vuq __builtin_crypto_vpmsumd (vull, vull);
+  const vull __builtin_crypto_vpmsumd (vull, vull);
     VPMSUMD crypto_vpmsumd {}
 
-  const vui __builtin_crypto_vpmsumh (vus, vus);
+  const vus __builtin_crypto_vpmsumh (vus, vus);
     VPMSUMH crypto_vpmsumh {}
 
-  const vull __builtin_crypto_vpmsumw (vui, vui);
+  const vui __builtin_crypto_vpmsumw (vui, vui);
     VPMSUMW crypto_vpmsumw {}
 
   const vf __builtin_vsx_float2_v2df (vd, vd);
@@ -2615,16 +2608,16 @@
   const vsi __builtin_altivec_vprtybw (vsi);
     VPRTYBW parityv4si2 {}
 
-  const vull __builtin_altivec_vrldmi (vull, vull, vull);
+  const vsll __builtin_altivec_vrldmi (vsll, vsll, vsll);
     VRLDMI altivec_vrldmi {}
 
-  const vull __builtin_altivec_vrldnm (vull, vull);
+  const vsll __builtin_altivec_vrldnm (vsll, vsll);
     VRLDNM altivec_vrldnm {}
 
-  const vui __builtin_altivec_vrlwmi (vui, vui, vui);
+  const vsi __builtin_altivec_vrlwmi (vsi, vsi, vsi);
     VRLWMI altivec_vrlwmi {}
 
-  const vui __builtin_altivec_vrlwnm (vui, vui);
+  const vsi __builtin_altivec_vrlwnm (vsi, vsi);
     VRLWNM altivec_vrlwnm {}
 
   const vuc __builtin_altivec_vslv (vuc, vuc);
@@ -2687,10 +2680,10 @@
   const unsigned int __builtin_vsx_scalar_test_neg_sp (float);
     VSTDCNSP xststdcnegsp {}
 
-  const unsigned long long __builtin_vsx_test_data_class_dp (vd, signed int);
+  const vbll __builtin_vsx_test_data_class_dp (vd, signed int);
     VTDCDP xvtstdcdp {}
 
-  const unsigned int __builtin_vsx_test_data_class_sp (vf, signed int);
+  const vbi __builtin_vsx_test_data_class_sp (vf, signed int);
     VTDCSP xvtstdcsp {}
 
   const vf __builtin_vsx_vextract_fp_from_shorth (vus);
@@ -2735,28 +2728,28 @@
   double __builtin_mffsl ();
     MFFSL rs6000_mffsl {}
 
-  const signed int __builtin_dfp_dtstsfi_eq_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_eq_dd (unsigned int, _Decimal64);
     TSTSFI_EQ_DD dfptstsfi_eq_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_eq_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_eq_td (unsigned int, _Decimal128);
     TSTSFI_EQ_TD dfptstsfi_eq_td {}
 
-  const signed int __builtin_dfp_dtstsfi_gt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_gt_dd (unsigned int, _Decimal64);
     TSTSFI_GT_DD dfptstsfi_gt_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_gt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_gt_td (unsigned int, _Decimal128);
     TSTSFI_GT_TD dfptstsfi_gt_td {}
 
-  const signed int __builtin_dfp_dtstsfi_lt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_lt_dd (unsigned int, _Decimal64);
     TSTSFI_LT_DD dfptstsfi_lt_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_lt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_lt_td (unsigned int, _Decimal128);
     TSTSFI_LT_TD dfptstsfi_lt_td {}
 
-  const signed int __builtin_dfp_dtstsfi_ov_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_ov_dd (unsigned int, _Decimal64);
     TSTSFI_OV_DD dfptstsfi_unordered_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_ov_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_ov_td (unsigned int, _Decimal128);
     TSTSFI_OV_TD dfptstsfi_unordered_td {}
 
 
@@ -3219,25 +3212,25 @@
   const vss __builtin_altivec_vsrdb_v8hi (vss, vss, const int<3>);
     VSRDB_V8HI vsrdb_v8hi {}
 
-  const vuc __builtin_altivec_vstribl (vuc);
+  const vsc __builtin_altivec_vstribl (vsc);
     VSTRIBL vstril_v16qi {}
 
   const signed int __builtin_altivec_vstribl_p (vuc);
     VSTRIBL_P vstril_p_v16qi {}
 
-  const vuc __builtin_altivec_vstribr (vuc);
+  const vsc __builtin_altivec_vstribr (vsc);
     VSTRIBR vstrir_v16qi {}
 
   const signed int __builtin_altivec_vstribr_p (vuc);
     VSTRIBR_P vstrir_p_v16qi {}
 
-  const vus __builtin_altivec_vstrihl (vus);
+  const vss __builtin_altivec_vstrihl (vss);
     VSTRIHL vstril_v8hi {}
 
   const signed int __builtin_altivec_vstrihl_p (vus);
     VSTRIHL_P vstril_p_v8hi {}
 
-  const vus __builtin_altivec_vstrihr (vus);
+  const vss __builtin_altivec_vstrihr (vss);
     VSTRIHR vstrir_v8hi {}
 
   const signed int __builtin_altivec_vstrihr_p (vus);
@@ -3270,22 +3263,22 @@
   const vuc __builtin_vsx_xvcvspbf16 (vuc);
     XVCVSPBF16 vsx_xvcvspbf16 {}
 
-  const vsc __builtin_vsx_xxblend_v16qi (vsc, vsc, vuc);
+  const vuc __builtin_vsx_xxblend_v16qi (vuc, vuc, vuc);
     VXXBLEND_V16QI xxblend_v16qi {}
 
   const vd __builtin_vsx_xxblend_v2df (vd, vd, vull);
     VXXBLEND_V2DF xxblend_v2df {}
 
-  const vsll __builtin_vsx_xxblend_v2di (vsll, vsll, vull);
+  const vull __builtin_vsx_xxblend_v2di (vull, vull, vull);
     VXXBLEND_V2DI xxblend_v2di {}
 
   const vf __builtin_vsx_xxblend_v4sf (vf, vf, vui);
     VXXBLEND_V4SF xxblend_v4sf {}
 
-  const vsi __builtin_vsx_xxblend_v4si (vsi, vsi, vui);
+  const vui __builtin_vsx_xxblend_v4si (vui, vui, vui);
     VXXBLEND_V4SI xxblend_v4si {}
 
-  const vss __builtin_vsx_xxblend_v8hi (vss, vss, vus);
+  const vus __builtin_vsx_xxblend_v8hi (vus, vus, vus);
     VXXBLEND_V8HI xxblend_v8hi {}
 
   const vop __builtin_vsx_xxeval (vop, vop, vop, const int <8>);
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c
index ba64d90cda3..2f317d761d7 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -458,7 +458,7 @@ static typemap type_map[TYPE_MAP_SIZE] =
     { "sf",	"float" },
     { "si",	"intSI" },
     { "td",	"dfloat128" },
-    { "tf",	"long_double" },
+    { "tf",	"float128" },
     { "ti",	"intTI" },
     { "udi",	"unsigned_intDI" },
     { "uhi",	"unsigned_intHI" },


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-10-29 19:53 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-10-29 19:53 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8ce934371ea840b69e26b043f965ebd7a71bc1ab

commit 8ce934371ea840b69e26b043f965ebd7a71bc1ab
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Tue Sep 15 11:53:33 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-09-15  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.
            * config/rs6000/rs6000-gen-builtins.c (type_map): Change
            "long_double" to "float128".

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 135 +++++++++++++++----------------
 gcc/config/rs6000/rs6000-gen-builtins.c  |   2 +-
 2 files changed, 65 insertions(+), 72 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index 6b66908d7c7..5e41e9e3316 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -169,6 +169,7 @@
   void __builtin_cpu_init ();
     CPU_INIT nothing {cpu}
 
+  ; TODO: Following two should return bool?
   unsigned int __builtin_cpu_is (const char *);
     CPU_IS nothing {cpu}
 
@@ -234,7 +235,7 @@
 
 ; Power6 builtins.
 [power6]
-  const signed int __builtin_p6_cmpb (signed int, signed int);
+  const signed long long __builtin_p6_cmpb (signed long long, signed long long);
     CMPB cmpbdi3 {}
 
   const signed int __builtin_p6_cmpb_32 (signed int, signed int);
@@ -810,7 +811,7 @@
   const vuc __builtin_altivec_vpkshus (vss, vss);
     VPKSHUS altivec_vpkshus {}
 
-  const vsi __builtin_altivec_vpkswss (vsi, vsi);
+  const vss __builtin_altivec_vpkswss (vsi, vsi);
     VPKSWSS altivec_vpkswss {}
 
   const vus __builtin_altivec_vpkswus (vsi, vsi);
@@ -1539,10 +1540,10 @@
   const vsi __builtin_vsx_vsigned_v4sf (vf);
     VEC_VSIGNED_V4SF vsx_xvcvspsxws {}
 
-  const vsll __builtin_vsx_vsignede_v2df (vd);
+  const vsi __builtin_vsx_vsignede_v2df (vd);
     VEC_VSIGNEDE_V2DF vsignede_v2df {}
 
-  const vsll __builtin_vsx_vsignedo_v2df (vd);
+  const vsi __builtin_vsx_vsignedo_v2df (vd);
     VEC_VSIGNEDO_V2DF vsignedo_v2df {}
 
   const vull __builtin_vsx_vunsigned_v2df (vd);
@@ -1560,7 +1561,7 @@
   const vf __builtin_vsx_xscvdpsp (vd);
     XSCVDPSP vsx_xscvdpsp {}
 
-  const vd __builtin_vsx_xscvspdp (vf);
+  const double __builtin_vsx_xscvspdp (float);
     XSCVSPDP vsx_xscvspdp {}
 
   const double __builtin_vsx_xsmaxdp (double, double);
@@ -1569,19 +1570,19 @@
   const double __builtin_vsx_xsmindp (double, double);
     XSMINDP smindf3 {}
 
-  const vd __builtin_vsx_xsrdpi (vd);
+  const double __builtin_vsx_xsrdpi (double);
     XSRDPI vsx_xsrdpi {}
 
-  const vd __builtin_vsx_xsrdpic (vd);
+  const double __builtin_vsx_xsrdpic (double);
     XSRDPIC vsx_xsrdpic {}
 
-  const vd __builtin_vsx_xsrdpim (vd);
+  const double __builtin_vsx_xsrdpim (double);
     XSRDPIM floordf2 {}
 
-  const vd __builtin_vsx_xsrdpip (vd);
+  const double __builtin_vsx_xsrdpip (double);
     XSRDPIP ceildf2 {}
 
-  const vd __builtin_vsx_xsrdpiz (vd);
+  const double __builtin_vsx_xsrdpiz (double);
     XSRDPIZ btruncdf2 {}
 
   const unsigned int __builtin_vsx_xstdivdp_fe (vd, vd);
@@ -1611,49 +1612,37 @@
   const vbll __builtin_vsx_xvcmpeqdp (vd, vd);
     XVCMPEQDP vector_eqv2df {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vull __builtin_vsx_xvcmpeqdp_p (vd);
+  const signed int __builtin_vsx_xvcmpeqdp_p (vd);
     XVCMPEQDP_P vector_eq_v2df_p {pred}
 
   const vbi __builtin_vsx_xvcmpeqsp (vf, vf);
     XVCMPEQSP vector_eqv4sf {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vui __builtin_vsx_xvcmpeqsp_p (vf);
+  const signed int __builtin_vsx_xvcmpeqsp_p (vf);
     XVCMPEQSP_P vector_eq_v4sf_p {pred}
 
   const vbll __builtin_vsx_xvcmpgedp (vd, vd);
     XVCMPGEDP vector_gev2df {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vull __builtin_vsx_xvcmpgedp_p (vd);
+  const signed int __builtin_vsx_xvcmpgedp_p (vd);
     XVCMPGEDP_P vector_ge_v2df_p {pred}
 
   const vbi __builtin_vsx_xvcmpgesp (vf, vf);
     XVCMPGESP vector_gev4sf {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vui __builtin_vsx_xvcmpgesp_p (vf);
+  const signed int __builtin_vsx_xvcmpgesp_p (vf);
     XVCMPGESP_P vector_ge_v4sf_p {pred}
 
   const vbll __builtin_vsx_xvcmpgtdp (vd, vd);
     XVCMPGTDP vector_gtv2df {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vull __builtin_vsx_xvcmpgtdp_p (vd);
+  const signed int __builtin_vsx_xvcmpgtdp_p (vd);
     XVCMPGTDP_P vector_gt_v2df_p {pred}
 
   const vbi __builtin_vsx_xvcmpgtsp (vf, vf);
     XVCMPGTSP vector_gtv4sf {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vui __builtin_vsx_xvcmpgtsp_p (vf, vf);
+  const signed int __builtin_vsx_xvcmpgtsp_p (vf, vf);
     XVCMPGTSP_P vector_gt_v4sf_p {pred}
 
   const vf __builtin_vsx_xvcvdpsp (vd);
@@ -1665,7 +1654,7 @@
   const vsll __builtin_vsx_xvcvdpsxds_scale (vd, const int);
     XVCVDPSXDS_SCALE vsx_xvcvdpsxds_scale {}
 
-  const vsll __builtin_vsx_xvcvdpsxws (vd);
+  const vsi __builtin_vsx_xvcvdpsxws (vd);
     XVCVDPSXWS vsx_xvcvdpsxws {}
 
   const vull __builtin_vsx_xvcvdpuxds (vd);
@@ -1678,7 +1667,7 @@
   const vull __builtin_vsx_xvcvdpuxds_uns (vd);
     XVCVDPUXDS_UNS vsx_fixuns_truncv2dfv2di2 {}
 
-  const vull __builtin_vsx_xvcvdpuxws (vd);
+  const vui __builtin_vsx_xvcvdpuxws (vd);
     XVCVDPUXWS vsx_xvcvdpuxws {}
 
   const vd __builtin_vsx_xvcvspdp (vf);
@@ -1736,7 +1725,7 @@
   const vf __builtin_vsx_xvcvuxwsp (vui);
     XVCVUXWSP_V4SF vsx_xvcvuxwsp {}
 
-  fpmath vf __builtin_vsx_xvdivdp (vf, vf);
+  fpmath vd __builtin_vsx_xvdivdp (vd, vd);
     XVDIVDP divv2df3 {}
 
   fpmath vf __builtin_vsx_xvdivsp (vf, vf);
@@ -1895,7 +1884,7 @@
   const vf __builtin_vsx_xxmrglw (vf, vf);
     XXMRGLW_4SF vsx_xxmrglw_v4sf {}
 
-  const vss __builtin_vsx_xxmrglw_4si (vsi, vsi);
+  const vsi __builtin_vsx_xxmrglw_4si (vsi, vsi);
     XXMRGLW_4SI vsx_xxmrglw_v4si {}
 
   const vsc __builtin_vsx_xxpermdi_16qi (vsc, vsc, const int<1>);
@@ -2268,7 +2257,8 @@
     VPKUDUS altivec_vpkudus {}
 
 ; #### Following are duplicates of __builtin_crypto_vpmsum*.  This
-; can't have ever worked properly!
+; can't have ever worked properly!  However, these have the right
+; result types, and the others have wrong ones!!  Whatwhatwhat...
 ;
 ;  const vus __builtin_altivec_vpmsumb (vuc, vuc);
 ;    VPMSUMB crypto_vpmsumb {}
@@ -2282,13 +2272,13 @@
 ;  const vull __builtin_altivec_vpmsumw (vui, vui);
 ;    VPMSUMW crypto_vpmsumw {}
 
-  const vuc __builtin_altivec_vpopcntb (vsc);
+  const vsc __builtin_altivec_vpopcntb (vsc);
     VPOPCNTB popcountv16qi2 {}
 
-  const vull __builtin_altivec_vpopcntd (vsll);
+  const vsll __builtin_altivec_vpopcntd (vsll);
     VPOPCNTD popcountv2di2 {}
 
-  const vus __builtin_altivec_vpopcnth (vss);
+  const vss __builtin_altivec_vpopcnth (vss);
     VPOPCNTH popcountv8hi2 {}
 
   const vuc __builtin_altivec_vpopcntub (vuc);
@@ -2303,7 +2293,7 @@
   const vui __builtin_altivec_vpopcntuw (vui);
     VPOPCNTUW popcountv4si2 {}
 
-  const vui __builtin_altivec_vpopcntw (vsi);
+  const vsi __builtin_altivec_vpopcntw (vsi);
     VPOPCNTW popcountv4si2 {}
 
   const vsll __builtin_altivec_vrld (vsll, vull);
@@ -2342,31 +2332,31 @@
   const vsq __builtin_bcdadd (vsq, vsq, const int<1>);
     BCDADD bcdadd {}
 
-  const unsigned int __builtin_bcdadd_eq (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_eq (vsq, vsq, const int<1>);
     BCDADD_EQ bcdadd_eq {}
 
-  const unsigned int __builtin_bcdadd_gt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_gt (vsq, vsq, const int<1>);
     BCDADD_GT bcdadd_gt {}
 
-  const unsigned int __builtin_bcdadd_lt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_lt (vsq, vsq, const int<1>);
     BCDADD_LT bcdadd_lt {}
 
-  const unsigned int __builtin_bcdadd_ov (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_ov (vsq, vsq, const int<1>);
     BCDADD_OV bcdadd_unordered {}
 
   const vsq __builtin_bcdsub (vsq, vsq, const int<1>);
     BCDSUB bcdsub {}
 
-  const unsigned int __builtin_bcdsub_eq (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_eq (vsq, vsq, const int<1>);
     BCDSUB_EQ bcdsub_eq {}
 
-  const unsigned int __builtin_bcdsub_gt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_gt (vsq, vsq, const int<1>);
     BCDSUB_GT bcdsub_gt {}
 
-  const unsigned int __builtin_bcdsub_lt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_lt (vsq, vsq, const int<1>);
     BCDSUB_LT bcdsub_lt {}
 
-  const unsigned int __builtin_bcdsub_ov (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_ov (vsq, vsq, const int<1>);
     BCDSUB_OV bcdsub_unordered {}
 
   const vuc __builtin_crypto_vpermxor_v16qi (vuc, vuc, vuc);
@@ -2381,16 +2371,19 @@
   const vus __builtin_crypto_vpermxor_v8hi (vus, vus, vus);
     VPERMXOR_V8HI crypto_vpermxor_v8hi {}
 
-  const vus __builtin_crypto_vpmsumb (vuc, vuc);
+; Note: these four have incorrect return types per the way the
+; instructions work, but this matches the old signatures that
+; have been around for too long.
+  const vuc __builtin_crypto_vpmsumb (vuc, vuc);
     VPMSUMB crypto_vpmsumb {}
 
-  const vuq __builtin_crypto_vpmsumd (vull, vull);
+  const vull __builtin_crypto_vpmsumd (vull, vull);
     VPMSUMD crypto_vpmsumd {}
 
-  const vui __builtin_crypto_vpmsumh (vus, vus);
+  const vus __builtin_crypto_vpmsumh (vus, vus);
     VPMSUMH crypto_vpmsumh {}
 
-  const vull __builtin_crypto_vpmsumw (vui, vui);
+  const vui __builtin_crypto_vpmsumw (vui, vui);
     VPMSUMW crypto_vpmsumw {}
 
   const vf __builtin_vsx_float2_v2df (vd, vd);
@@ -2615,16 +2608,16 @@
   const vsi __builtin_altivec_vprtybw (vsi);
     VPRTYBW parityv4si2 {}
 
-  const vull __builtin_altivec_vrldmi (vull, vull, vull);
+  const vsll __builtin_altivec_vrldmi (vsll, vsll, vsll);
     VRLDMI altivec_vrldmi {}
 
-  const vull __builtin_altivec_vrldnm (vull, vull);
+  const vsll __builtin_altivec_vrldnm (vsll, vsll);
     VRLDNM altivec_vrldnm {}
 
-  const vui __builtin_altivec_vrlwmi (vui, vui, vui);
+  const vsi __builtin_altivec_vrlwmi (vsi, vsi, vsi);
     VRLWMI altivec_vrlwmi {}
 
-  const vui __builtin_altivec_vrlwnm (vui, vui);
+  const vsi __builtin_altivec_vrlwnm (vsi, vsi);
     VRLWNM altivec_vrlwnm {}
 
   const vuc __builtin_altivec_vslv (vuc, vuc);
@@ -2687,10 +2680,10 @@
   const unsigned int __builtin_vsx_scalar_test_neg_sp (float);
     VSTDCNSP xststdcnegsp {}
 
-  const unsigned long long __builtin_vsx_test_data_class_dp (vd, signed int);
+  const vbll __builtin_vsx_test_data_class_dp (vd, signed int);
     VTDCDP xvtstdcdp {}
 
-  const unsigned int __builtin_vsx_test_data_class_sp (vf, signed int);
+  const vbi __builtin_vsx_test_data_class_sp (vf, signed int);
     VTDCSP xvtstdcsp {}
 
   const vf __builtin_vsx_vextract_fp_from_shorth (vus);
@@ -2735,28 +2728,28 @@
   double __builtin_mffsl ();
     MFFSL rs6000_mffsl {}
 
-  const signed int __builtin_dfp_dtstsfi_eq_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_eq_dd (unsigned int, _Decimal64);
     TSTSFI_EQ_DD dfptstsfi_eq_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_eq_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_eq_td (unsigned int, _Decimal128);
     TSTSFI_EQ_TD dfptstsfi_eq_td {}
 
-  const signed int __builtin_dfp_dtstsfi_gt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_gt_dd (unsigned int, _Decimal64);
     TSTSFI_GT_DD dfptstsfi_gt_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_gt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_gt_td (unsigned int, _Decimal128);
     TSTSFI_GT_TD dfptstsfi_gt_td {}
 
-  const signed int __builtin_dfp_dtstsfi_lt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_lt_dd (unsigned int, _Decimal64);
     TSTSFI_LT_DD dfptstsfi_lt_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_lt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_lt_td (unsigned int, _Decimal128);
     TSTSFI_LT_TD dfptstsfi_lt_td {}
 
-  const signed int __builtin_dfp_dtstsfi_ov_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_ov_dd (unsigned int, _Decimal64);
     TSTSFI_OV_DD dfptstsfi_unordered_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_ov_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_ov_td (unsigned int, _Decimal128);
     TSTSFI_OV_TD dfptstsfi_unordered_td {}
 
 
@@ -3219,25 +3212,25 @@
   const vss __builtin_altivec_vsrdb_v8hi (vss, vss, const int<3>);
     VSRDB_V8HI vsrdb_v8hi {}
 
-  const vuc __builtin_altivec_vstribl (vuc);
+  const vsc __builtin_altivec_vstribl (vsc);
     VSTRIBL vstril_v16qi {}
 
   const signed int __builtin_altivec_vstribl_p (vuc);
     VSTRIBL_P vstril_p_v16qi {}
 
-  const vuc __builtin_altivec_vstribr (vuc);
+  const vsc __builtin_altivec_vstribr (vsc);
     VSTRIBR vstrir_v16qi {}
 
   const signed int __builtin_altivec_vstribr_p (vuc);
     VSTRIBR_P vstrir_p_v16qi {}
 
-  const vus __builtin_altivec_vstrihl (vus);
+  const vss __builtin_altivec_vstrihl (vss);
     VSTRIHL vstril_v8hi {}
 
   const signed int __builtin_altivec_vstrihl_p (vus);
     VSTRIHL_P vstril_p_v8hi {}
 
-  const vus __builtin_altivec_vstrihr (vus);
+  const vss __builtin_altivec_vstrihr (vss);
     VSTRIHR vstrir_v8hi {}
 
   const signed int __builtin_altivec_vstrihr_p (vus);
@@ -3270,22 +3263,22 @@
   const vuc __builtin_vsx_xvcvspbf16 (vuc);
     XVCVSPBF16 vsx_xvcvspbf16 {}
 
-  const vsc __builtin_vsx_xxblend_v16qi (vsc, vsc, vuc);
+  const vuc __builtin_vsx_xxblend_v16qi (vuc, vuc, vuc);
     VXXBLEND_V16QI xxblend_v16qi {}
 
   const vd __builtin_vsx_xxblend_v2df (vd, vd, vull);
     VXXBLEND_V2DF xxblend_v2df {}
 
-  const vsll __builtin_vsx_xxblend_v2di (vsll, vsll, vull);
+  const vull __builtin_vsx_xxblend_v2di (vull, vull, vull);
     VXXBLEND_V2DI xxblend_v2di {}
 
   const vf __builtin_vsx_xxblend_v4sf (vf, vf, vui);
     VXXBLEND_V4SF xxblend_v4sf {}
 
-  const vsi __builtin_vsx_xxblend_v4si (vsi, vsi, vui);
+  const vui __builtin_vsx_xxblend_v4si (vui, vui, vui);
     VXXBLEND_V4SI xxblend_v4si {}
 
-  const vss __builtin_vsx_xxblend_v8hi (vss, vss, vus);
+  const vus __builtin_vsx_xxblend_v8hi (vus, vus, vus);
     VXXBLEND_V8HI xxblend_v8hi {}
 
   const vop __builtin_vsx_xxeval (vop, vop, vop, const int <8>);
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c
index ba64d90cda3..2f317d761d7 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -458,7 +458,7 @@ static typemap type_map[TYPE_MAP_SIZE] =
     { "sf",	"float" },
     { "si",	"intSI" },
     { "td",	"dfloat128" },
-    { "tf",	"long_double" },
+    { "tf",	"float128" },
     { "ti",	"intTI" },
     { "udi",	"unsigned_intDI" },
     { "uhi",	"unsigned_intHI" },


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-10-29 19:53 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-10-29 19:53 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:e193a86e9ccec41e3a7a5483ec25e285b39172f4

commit e193a86e9ccec41e3a7a5483ec25e285b39172f4
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Sun Sep 13 18:21:20 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-09-13  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 195 ++++++++++++++++---------------
 1 file changed, 99 insertions(+), 96 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index 755399b1843..6b66908d7c7 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -531,73 +531,73 @@
   const int __builtin_altivec_vcmpbfp_p (int, vf, vf);
     VCMPBFP_P altivec_vcmpbfp_p {pred}
 
-  const vbi __builtin_altivec_vcmpeqfp (vf, vf);
+  const vf __builtin_altivec_vcmpeqfp (vf, vf);
     VCMPEQFP vector_eqv4sf {}
 
   const int __builtin_altivec_vcmpeqfp_p (int, vf, vf);
     VCMPEQFP_P vector_eq_v4sf_p {pred}
 
-  const vbc __builtin_altivec_vcmpequb (vuc, vuc);
+  const vsc __builtin_altivec_vcmpequb (vuc, vuc);
     VCMPEQUB vector_eqv16qi {}
 
   const int __builtin_altivec_vcmpequb_p (int, vuc, vuc);
     VCMPEQUB_P vector_eq_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpequh (vus, vus);
+  const vss __builtin_altivec_vcmpequh (vus, vus);
     VCMPEQUH vector_eqv8hi {}
 
   const int __builtin_altivec_vcmpequh_p (int, vus, vus);
     VCMPEQUH_P vector_eq_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpequw (vui, vui);
+  const vsi __builtin_altivec_vcmpequw (vui, vui);
     VCMPEQUW vector_eqv4si {}
 
   const int __builtin_altivec_vcmpequw_p (int, vui, vui);
     VCMPEQUW_P vector_eq_v4si_p {pred}
 
-  const vbi __builtin_altivec_vcmpgefp (vf, vf);
+  const vf __builtin_altivec_vcmpgefp (vf, vf);
     VCMPGEFP vector_gev4sf {}
 
   const int __builtin_altivec_vcmpgefp_p (int, vf, vf);
     VCMPGEFP_P vector_ge_v4sf_p {pred}
 
-  const vbi __builtin_altivec_vcmpgtfp (vf, vf);
+  const vf __builtin_altivec_vcmpgtfp (vf, vf);
     VCMPGTFP vector_gtv4sf {}
 
   const int __builtin_altivec_vcmpgtfp_p (int, vf, vf);
     VCMPGTFP_P vector_gt_v4sf_p {pred}
 
-  const vbc __builtin_altivec_vcmpgtsb (vsc, vsc);
+  const vsc __builtin_altivec_vcmpgtsb (vsc, vsc);
     VCMPGTSB vector_gtv16qi {}
 
   const int __builtin_altivec_vcmpgtsb_p (int, vsc, vsc);
     VCMPGTSB_P vector_gt_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpgtsh (vss, vss);
+  const vss __builtin_altivec_vcmpgtsh (vss, vss);
     VCMPGTSH vector_gtv8hi {}
 
   const int __builtin_altivec_vcmpgtsh_p (int, vss, vss);
     VCMPGTSH_P vector_gt_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpgtsw (vsi, vsi);
+  const vsi __builtin_altivec_vcmpgtsw (vsi, vsi);
     VCMPGTSW vector_gtv4si {}
 
   const int __builtin_altivec_vcmpgtsw_p (int, vsi, vsi);
     VCMPGTSW_P vector_gt_v4si_p {pred}
 
-  const vbc __builtin_altivec_vcmpgtub (vuc, vuc);
+  const vsc __builtin_altivec_vcmpgtub (vuc, vuc);
     VCMPGTUB vector_gtuv16qi {}
 
   const int __builtin_altivec_vcmpgtub_p (int, vuc, vuc);
     VCMPGTUB_P vector_gtu_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpgtuh (vus, vus);
+  const vss __builtin_altivec_vcmpgtuh (vus, vus);
     VCMPGTUH vector_gtuv8hi {}
 
   const int __builtin_altivec_vcmpgtuh_p (int, vus, vus);
     VCMPGTUH_P vector_gtu_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpgtuw (vui, vui);
+  const vsi __builtin_altivec_vcmpgtuw (vui, vui);
     VCMPGTUW vector_gtuv4si {}
 
   const int __builtin_altivec_vcmpgtuw_p (int, vui, vui);
@@ -2162,7 +2162,7 @@
   const vsi __builtin_altivec_vclzw (vsi);
     VCLZW clzv4si2 {}
 
-  const vsc __builtin_altivec_vgbbd (vsc);
+  const vuc __builtin_altivec_vgbbd (vuc);
     VGBBD p8v_vgbbd {}
 
   const vsq __builtin_altivec_vaddcuq (vsq, vsq);
@@ -2186,19 +2186,19 @@
   const vuc __builtin_altivec_vbpermq2 (vuc, vuc);
     VBPERMQ2 altivec_vbpermq2 {}
 
-  const vbll __builtin_altivec_vcmpequd (vsll, vsll);
+  const vsll __builtin_altivec_vcmpequd (vull, vull);
     VCMPEQUD vector_eqv2di {}
 
   const int __builtin_altivec_vcmpequd_p (int, vsll, vsll);
     VCMPEQUD_P vector_eq_v2di_p {pred}
 
-  const vbll __builtin_altivec_vcmpgtsd (vsll, vsll);
+  const vsll __builtin_altivec_vcmpgtsd (vsll, vsll);
     VCMPGTSD vector_gtv2di {}
 
   const int __builtin_altivec_vcmpgtsd_p (int, vsll, vsll);
     VCMPGTSD_P vector_gt_v2di_p {pred}
 
-  const vbll __builtin_altivec_vcmpgtud (vull, vull);
+  const vsll __builtin_altivec_vcmpgtud (vull, vull);
     VCMPGTUD vector_gtuv2di {}
 
   const int __builtin_altivec_vcmpgtud_p (vull, vull);
@@ -2540,7 +2540,7 @@
   const signed int __builtin_altivec_vcmpaew_p (vsi, vsi);
     VCMPAEW_P vector_ae_v4si_p {pred}
 
-  const vbc __builtin_altivec_vcmpneb (vsc, vsc);
+  const vsc __builtin_altivec_vcmpneb (vsc, vsc);
     CMPNEB vcmpneb {}
 
   const signed int __builtin_altivec_vcmpneb_p (vsc, vsc);
@@ -2555,31 +2555,31 @@
   const signed int __builtin_altivec_vcmpnefp_p (vf, vf);
     VCMPNEFP_P vector_ne_v4sf_p {pred}
 
-  const vbs __builtin_altivec_vcmpneh (vss, vss);
+  const vss __builtin_altivec_vcmpneh (vss, vss);
     CMPNEH vcmpneh {}
 
   const signed int __builtin_altivec_vcmpneh_p (vss, vss);
     VCMPNEH_P vector_ne_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpnew (vsi, vsi);
+  const vsi __builtin_altivec_vcmpnew (vsi, vsi);
     CMPNEW vcmpnew {}
 
   const signed int __builtin_altivec_vcmpnew_p (vsi, vsi);
     VCMPNEW_P vector_ne_v4si_p {pred}
 
-  const vbc __builtin_altivec_vcmpnezb (vsc, vsc);
+  const vsc __builtin_altivec_vcmpnezb (vsc, vsc);
     CMPNEZB vcmpnezb {}
 
   const signed int __builtin_altivec_vcmpnezb_p (signed int, vsc, vsc);
     VCMPNEZB_P vector_nez_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpnezh (vss, vss);
+  const vss __builtin_altivec_vcmpnezh (vss, vss);
     CMPNEZH vcmpnezh {}
 
   const signed int __builtin_altivec_vcmpnezh_p (signed int, vss, vss);
     VCMPNEZH_P vector_nez_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpnezw (vsi, vsi);
+  const vsi __builtin_altivec_vcmpnezw (vsi, vsi);
     CMPNEZW vcmpnezw {}
 
   const signed int __builtin_altivec_vcmpnezw_p (vsi, vsi);
@@ -3039,7 +3039,7 @@
   const vui __builtin_altivec_mtvsrwm (unsigned long long);
     MTVSRWM vec_mtvsr_v4si {}
 
-  const vuc __builtin_altivec_vcfuged (vuc, vuc);
+  const vull __builtin_altivec_vcfuged (vull, vull);
     VCFUGED vcfuged {}
 
   const vsc __builtin_altivec_vclrlb (vsc, unsigned int);
@@ -3048,10 +3048,10 @@
   const vsc __builtin_altivec_vclrrb (vsc, unsigned int);
     VCLRRB vclrrb {}
 
-  const vuc __builtin_altivec_vclzdm (vuc, vuc);
+  const vull __builtin_altivec_vclzdm (vull, vull);
     VCLZDM vclzdm {}
 
-  const vuc __builtin_altivec_vctzdm (vuc, vuc);
+  const vull __builtin_altivec_vctzdm (vull, vull);
     VCTZDM vctzdm {}
 
   const vuc __builtin_altivec_vexpandmb (vuc);
@@ -3093,19 +3093,19 @@
   const vull __builtin_altivec_vextduwvlx (vui, vui, unsigned char);
     VEXTRACTWL vextractlv4si {}
 
-  const unsigned int __builtin_altivec_vextractmb (vuc);
+  const signed int __builtin_altivec_vextractmb (vuc);
     VEXTRACTMB vec_extract_v16qi {}
 
-  const unsigned int __builtin_altivec_vextractmd (vull);
+  const signed int __builtin_altivec_vextractmd (vull);
     VEXTRACTMD vec_extract_v2di {}
 
-  const unsigned int __builtin_altivec_vextractmh (vus);
+  const signed int __builtin_altivec_vextractmh (vus);
     VEXTRACTMH vec_extract_v8hi {}
 
-  const unsigned int __builtin_altivec_vextractmq (vuq);
+  const signed int __builtin_altivec_vextractmq (vuq);
     VEXTRACTMQ vec_extract_v1ti {}
 
-  const unsigned int __builtin_altivec_vextractmw (vui);
+  const signed int __builtin_altivec_vextractmw (vui);
     VEXTRACTMW vec_extract_v4si {}
 
   const unsigned long long __builtin_altivec_vgnb (vuq, const int <2,7>);
@@ -3153,10 +3153,10 @@
   const vui __builtin_altivec_vinsvuwvrx (vui, vui, unsigned int);
     VINSERTVPRWR vinsertvr_v4si {}
 
-  const vuc __builtin_altivec_vpdepd (vuc, vuc);
+  const vull __builtin_altivec_vpdepd (vull, vull);
     VPDEPD vpdepd {}
 
-  const vuc __builtin_altivec_vpextd (vuc, vuc);
+  const vull __builtin_altivec_vpextd (vull, vull);
     VPEXTD vpextd {}
 
   const vull __builtin_altivec_vreplace_un_uv2di (vull, unsigned long long, const int<4>);
@@ -3360,18 +3360,21 @@
 ; external copy in the built-in table.  This is fragile.  For the
 ; new support, we should transition this to do a name lookup in
 ; the built-in hash table, but to start with we will honor the
-; positioning of the built-ins in the table.
+; positioning of the built-ins in the table.  Note that right now
+; there is going to be breakage with __builtin_mma_disassemble_{acc,pair}
+; since they each require a blank builtin to follow them with icode
+; CODE_FOR_nothing.
 [mma]
   void __builtin_mma_assemble_acc (v512 *, vuc, vuc, vuc, vuc);
     ASSEMBLE_ACC nothing {mma}
 
-  void __builtin_mma_assemble_acc_internal (v512 *, vuc, vuc, vuc, vuc);
+  v512 __builtin_mma_assemble_acc_internal (v512 *, vuc, vuc, vuc, vuc);
     ASSEMBLE_ACC_INTERNAL mma_assemble_acc {mma}
 
   void __builtin_mma_assemble_pair (v256 *, vuc, vuc);
     ASSEMBLE_PAIR nothing {mma}
 
-  void __builtin_mma_assemble_pair_internal (v256 *, vuc, vuc);
+  v256 __builtin_mma_assemble_pair_internal (v256 *, vuc, vuc);
     ASSEMBLE_PAIR_INTERNAL mma_assemble_pair {mma}
 
   void __builtin_mma_disassemble_acc (void *, v512 *);
@@ -3383,367 +3386,367 @@
   void __builtin_mma_pmxvbf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2 nothing {mma}
 
-  void __builtin_mma_pmxvbf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma}
 
   void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NN_INTERNAL mma_pmxvbf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NP_INTERNAL mma_pmxvbf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PN_INTERNAL mma_pmxvbf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PP_INTERNAL mma_pmxvbf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2 nothing {mma}
 
-  void __builtin_mma_pmxvf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2_INTERNAL mma_pmxvf16ger2 {mma}
 
   void __builtin_mma_pmxvf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NN_INTERNAL mma_pmxvf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NP_INTERNAL mma_pmxvf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PN_INTERNAL mma_pmxvf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PP_INTERNAL mma_pmxvf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf32ger (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GER nothing {mma}
 
-  void __builtin_mma_pmxvf32ger_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32ger_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GER_INTERNAL mma_pmxvf32ger {mma}
 
   void __builtin_mma_pmxvf32gernn (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gernn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gernn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNN_INTERNAL mma_pmxvf32gernn {mma,quad}
 
   void __builtin_mma_pmxvf32gernp (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gernp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gernp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNP_INTERNAL mma_pmxvf32gernp {mma,quad}
 
   void __builtin_mma_pmxvf32gerpn (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gerpn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gerpn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPN_INTERNAL mma_pmxvf32gerpn {mma,quad}
 
   void __builtin_mma_pmxvf32gerpp (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gerpp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gerpp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPP_INTERNAL mma_pmxvf32gerpp {mma,quad}
 
   void __builtin_mma_pmxvf64ger (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GER nothing {mma,pair}
 
-  void __builtin_mma_pmxvf64ger_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64ger_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GER_INTERNAL mma_pmxvf64ger {mma,pair}
 
   void __builtin_mma_pmxvf64gernn (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNN nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gernn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gernn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNN_INTERNAL mma_pmxvf64gernn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gernp (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNP nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gernp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gernp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNP_INTERNAL mma_pmxvf64gernp {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpn (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPN nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gerpn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gerpn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPN_INTERNAL mma_pmxvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpp (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPP nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gerpp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gerpp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPP_INTERNAL mma_pmxvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_pmxvi16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2 nothing {mma}
 
-  void __builtin_mma_pmxvi16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2_INTERNAL mma_pmxvi16ger2 {mma}
 
   void __builtin_mma_pmxvi16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2PP_INTERNAL mma_pmxvi16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvi16ger2s (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2S nothing {mma}
 
-  void __builtin_mma_pmxvi16ger2s_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2s_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2S_INTERNAL mma_pmxvi16ger2s {mma}
 
   void __builtin_mma_pmxvi16ger2spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2SPP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi16ger2spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2SPP_INTERNAL mma_pmxvi16ger2spp {mma,quad}
 
   void __builtin_mma_pmxvi4ger8 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
     PMXVI4GER8 nothing {mma}
 
-  void __builtin_mma_pmxvi4ger8_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
+  v512 __builtin_mma_pmxvi4ger8_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
     PMXVI4GER8_INTERNAL mma_pmxvi4ger8 {mma}
 
   void __builtin_mma_pmxvi4ger8pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI4GER8PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi4ger8pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi4ger8pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI4GER8PP_INTERNAL mma_pmxvi4ger8pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4 nothing {mma}
 
-  void __builtin_mma_pmxvi8ger4_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi8ger4_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4_INTERNAL mma_pmxvi8ger4 {mma}
 
   void __builtin_mma_pmxvi8ger4pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi8ger4pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi8ger4pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4PP_INTERNAL mma_pmxvi8ger4pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4SPP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi8ger4spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi8ger4spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4SPP_INTERNAL mma_pmxvi8ger4spp {mma,quad}
 
   void __builtin_mma_xvbf16ger2 (v512 *, vuc, vuc);
     XVBF16GER2 nothing {mma}
 
-  void __builtin_mma_xvbf16ger2_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2_internal (v512 *, vuc, vuc);
     XVBF16GER2_INTERNAL mma_xvbf16ger2 {mma}
 
   void __builtin_mma_xvbf16ger2nn (v512 *, vuc, vuc);
     XVBF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2nn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2nn_internal (v512 *, vuc, vuc);
     XVBF16GER2NN_INTERNAL mma_xvbf16ger2nn {mma,quad}
 
   void __builtin_mma_xvbf16ger2np (v512 *, vuc, vuc);
     XVBF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2np_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2np_internal (v512 *, vuc, vuc);
     XVBF16GER2NP_INTERNAL mma_xvbf16ger2np {mma,quad}
 
   void __builtin_mma_xvbf16ger2pn (v512 *, vuc, vuc);
     XVBF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2pn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2pn_internal (v512 *, vuc, vuc);
     XVBF16GER2PN_INTERNAL mma_xvbf16ger2pn {mma,quad}
 
   void __builtin_mma_xvbf16ger2pp (v512 *, vuc, vuc);
     XVBF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2pp_internal (v512 *, vuc, vuc);
     XVBF16GER2PP_INTERNAL mma_xvbf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf16ger2 (v512 *, vuc, vuc);
     XVF16GER2 nothing {mma}
 
-  void __builtin_mma_xvf16ger2_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2_internal (v512 *, vuc, vuc);
     XVF16GER2_INTERNAL mma_xvf16ger2 {mma}
 
   void __builtin_mma_xvf16ger2nn (v512 *, vuc, vuc);
     XVF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2nn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2nn_internal (v512 *, vuc, vuc);
     XVF16GER2NN_INTERNAL mma_xvf16ger2nn {mma,quad}
 
   void __builtin_mma_xvf16ger2np (v512 *, vuc, vuc);
     XVF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2np_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2np_internal (v512 *, vuc, vuc);
     XVF16GER2NP_INTERNAL mma_xvf16ger2np {mma,quad}
 
   void __builtin_mma_xvf16ger2pn (v512 *, vuc, vuc);
     XVF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2pn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2pn_internal (v512 *, vuc, vuc);
     XVF16GER2PN_INTERNAL mma_xvf16ger2pn {mma,quad}
 
   void __builtin_mma_xvf16ger2pp (v512 *, vuc, vuc);
     XVF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2pp_internal (v512 *, vuc, vuc);
     XVF16GER2PP_INTERNAL mma_xvf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf32ger (v512 *, vuc, vuc);
     XVF32GER nothing {mma}
 
-  void __builtin_mma_xvf32ger_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32ger_internal (v512 *, vuc, vuc);
     XVF32GER_INTERNAL mma_xvf32ger {mma}
 
   void __builtin_mma_xvf32gernn (v512 *, vuc, vuc);
     XVF32GERNN nothing {mma,quad}
 
-  void __builtin_mma_xvf32gernn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gernn_internal (v512 *, vuc, vuc);
     XVF32GERNN_INTERNAL mma_xvf32gernn {mma,quad}
 
   void __builtin_mma_xvf32gernp (v512 *, vuc, vuc);
     XVF32GERNP nothing {mma,quad}
 
-  void __builtin_mma_xvf32gernp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gernp_internal (v512 *, vuc, vuc);
     XVF32GERNP_INTERNAL mma_xvf32gernp {mma,quad}
 
   void __builtin_mma_xvf32gerpn (v512 *, vuc, vuc);
     XVF32GERPN nothing {mma,quad}
 
-  void __builtin_mma_xvf32gerpn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gerpn_internal (v512 *, vuc, vuc);
     XVF32GERPN_INTERNAL mma_xvf32gerpn {mma,quad}
 
   void __builtin_mma_xvf32gerpp (v512 *, vuc, vuc);
     XVF32GERPP nothing {mma,quad}
 
-  void __builtin_mma_xvf32gerpp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gerpp_internal (v512 *, vuc, vuc);
     XVF32GERPP_INTERNAL mma_xvf32gerpp {mma,quad}
 
   void __builtin_mma_xvf64ger (v512 *, v256, vuc);
     XVF64GER nothing {mma,pair}
 
-  void __builtin_mma_xvf64ger_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64ger_internal (v512 *, v256, vuc);
     XVF64GER_INTERNAL mma_xvf64ger {mma,pair}
 
   void __builtin_mma_xvf64gernn (v512 *, v256, vuc);
     XVF64GERNN nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gernn_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gernn_internal (v512 *, v256, vuc);
     XVF64GERNN_INTERNAL mma_xvf64gernn {mma,pair,quad}
 
   void __builtin_mma_xvf64gernp (v512 *, v256, vuc);
     XVF64GERNP nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gernp_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gernp_internal (v512 *, v256, vuc);
     XVF64GERNP_INTERNAL mma_xvf64gernp {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpn (v512 *, v256, vuc);
     XVF64GERPN nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gerpn_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gerpn_internal (v512 *, v256, vuc);
     XVF64GERPN_INTERNAL mma_xvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpp (v512 *, v256, vuc);
     XVF64GERPP nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gerpp_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gerpp_internal (v512 *, v256, vuc);
     XVF64GERPP_INTERNAL mma_xvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_xvi16ger2 (v512 *, vuc, vuc);
     XVI16GER2 nothing {mma}
 
-  void __builtin_mma_xvi16ger2_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2_internal (v512 *, vuc, vuc);
     XVI16GER2_INTERNAL mma_xvi16ger2 {mma}
 
   void __builtin_mma_xvi16ger2pp (v512 *, vuc, vuc);
     XVI16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_xvi16ger2pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2pp_internal (v512 *, vuc, vuc);
     XVI16GER2PP_INTERNAL mma_xvi16ger2pp {mma,quad}
 
   void __builtin_mma_xvi16ger2s (v512 *, vuc, vuc);
     XVI16GER2S nothing {mma}
 
-  void __builtin_mma_xvi16ger2s_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2s_internal (v512 *, vuc, vuc);
     XVI16GER2S_INTERNAL mma_xvi16ger2s {mma}
 
   void __builtin_mma_xvi16ger2spp (v512 *, vuc, vuc);
     XVI16GER2SPP nothing {mma,quad}
 
-  void __builtin_mma_xvi16ger2spp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2spp_internal (v512 *, vuc, vuc);
     XVI16GER2SPP_INTERNAL mma_xvi16ger2spp {mma,quad}
 
   void __builtin_mma_xvi4ger8 (v512 *, vuc, vuc);
     XVI4GER8 nothing {mma}
 
-  void __builtin_mma_xvi4ger8_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi4ger8_internal (v512 *, vuc, vuc);
     XVI4GER8_INTERNAL mma_xvi4ger8 {mma}
 
   void __builtin_mma_xvi4ger8pp (v512 *, vuc, vuc);
     XVI4GER8PP nothing {mma,quad}
 
-  void __builtin_mma_xvi4ger8pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi4ger8pp_internal (v512 *, vuc, vuc);
     XVI4GER8PP_INTERNAL mma_xvi4ger8pp {mma,quad}
 
   void __builtin_mma_xvi8ger4 (v512 *, vuc, vuc);
     XVI8GER4 nothing {mma}
 
-  void __builtin_mma_xvi8ger4_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi8ger4_internal (v512 *, vuc, vuc);
     XVI8GER4_INTERNAL mma_xvi8ger4 {mma}
 
   void __builtin_mma_xvi8ger4pp (v512 *, vuc, vuc);
     XVI8GER4PP nothing {mma,quad}
 
-  void __builtin_mma_xvi8ger4pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi8ger4pp_internal (v512 *, vuc, vuc);
     XVI8GER4PP_INTERNAL mma_xvi8ger4pp {mma,quad}
 
   void __builtin_mma_xvi8ger4spp (v512 *, vuc, vuc);
     XVI8GER4SPP nothing {mma,quad}
 
-  void __builtin_mma_xvi8ger4spp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi8ger4spp_internal (v512 *, vuc, vuc);
     XVI8GER4SPP_INTERNAL mma_xvi8ger4spp {mma,quad}
 
   void __builtin_mma_xxmfacc (v512 *);
     XXMFACC nothing {mma,quad}
 
-  void __builtin_mma_xxmfacc_internal (v512 *);
+  v512 __builtin_mma_xxmfacc_internal (v512 *);
     XXMFACC_INTERNAL mma_xxmfacc {mma,quad}
 
   void __builtin_mma_xxmtacc (v512 *);
     XXMTACC nothing {mma,quad}
 
-  void __builtin_mma_xxmtacc_internal (v512 *);
+  v512 __builtin_mma_xxmtacc_internal (v512 *);
     XXMTACC_INTERNAL mma_xxmtacc {mma,quad}
 
   void __builtin_mma_xxsetaccz (v512 *);
     XXSETACCZ nothing {mma}
 
-  void __builtin_mma_xxsetaccz_internal (v512 *);
+  v512 __builtin_mma_xxsetaccz_internal (v512 *);
     XXSETACCZ_INTERNAL mma_xxsetaccz {mma}


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-10-29 19:53 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-10-29 19:53 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:fb0ee6e502648601dbb77f33946ef3ae817f2589

commit fb0ee6e502648601dbb77f33946ef3ae817f2589
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Sun Sep 13 11:06:36 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-09-13  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.
            * config/rs6000/rs6000-call.c (altivec_init_builtins): Correct
            name of __builtin_altivec_xst_len_r.

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 549 ++++++++++++++++++++++++++-----
 1 file changed, 475 insertions(+), 74 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index 565c14f9f4c..755399b1843 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -1494,6 +1494,45 @@
   const vf __builtin_vsx_uns_floato_v2di (vull);
     UNS_FLOATO_V2DI unsfloatov2di {}
 
+; I have no idea why we have __builtin_vsx_* duplicates of these when
+; the __builtin_altivec_* counterparts are already present.  Keeping
+; them for compatibility, but...oy.
+  const vsc __builtin_vsx_vperm_16qi (vsc, vsc, vuc);
+    VPERM_16QI_X altivec_vperm_v16qi {}
+
+  const vuc __builtin_vsx_vperm_16qi_uns (vuc, vuc, vuc);
+    VPERM_16QI_UNS_X altivec_vperm_v16qi_uns {}
+
+  const vsq __builtin_vsx_vperm_1ti (vsq, vsq, vuc);
+    VPERM_1TI_X altivec_vperm_v1ti {}
+
+  const vuq __builtin_vsx_vperm_1ti_uns (vuq, vuq, vuc);
+    VPERM_1TI_UNS_X altivec_vperm_v1ti_uns {}
+
+  const vd __builtin_vsx_vperm_2df (vd, vd, vuc);
+    VPERM_2DF_X altivec_vperm_v2df {}
+
+  const vsll __builtin_vsx_vperm_2di (vsll, vsll, vuc);
+    VPERM_2DI_X altivec_vperm_v2di {}
+
+  const vull __builtin_vsx_vperm_2di_uns (vull, vull, vuc);
+    VPERM_2DI_UNS_X altivec_vperm_v2di_uns {}
+
+  const vf __builtin_vsx_vperm_4sf (vf, vf, vuc);
+    VPERM_4SF_X altivec_vperm_v4sf {}
+
+  const vsi __builtin_vsx_vperm_4si (vsi, vsi, vuc);
+    VPERM_4SI_X altivec_vperm_v4si {}
+
+  const vui __builtin_vsx_vperm_4si_uns (vui, vui, vuc);
+    VPERM_4SI_UNS_X altivec_vperm_v4si_uns {}
+
+  const vss __builtin_vsx_vperm_8hi (vss, vss, vuc);
+    VPERM_8HI_X altivec_vperm_v8hi {}
+
+  const vus __builtin_vsx_vperm_8hi_uns (vus, vus, vuc);
+    VPERM_8HI_UNS_X altivec_vperm_v8hi_uns {}
+
   const vsll __builtin_vsx_vsigned_v2df (vd);
     VEC_VSIGNED_V2DF vsx_xvcvdpsxds {}
 
@@ -1617,6 +1656,9 @@
   const vui __builtin_vsx_xvcmpgtsp_p (vf, vf);
     XVCMPGTSP_P vector_gt_v4sf_p {pred}
 
+  const vf __builtin_vsx_xvcvdpsp (vd);
+    XVCVDPSP vsx_xvcvdpsp {}
+
   const vsll __builtin_vsx_xvcvdpsxds (vd);
     XVCVDPSXDS vsx_fix_truncv2dfv2di2 {}
 
@@ -1639,6 +1681,9 @@
   const vull __builtin_vsx_xvcvdpuxws (vd);
     XVCVDPUXWS vsx_xvcvdpuxws {}
 
+  const vd __builtin_vsx_xvcvspdp (vf);
+    XVCVSPDP vsx_xvcvspdp {}
+
   const vsll __builtin_vsx_xvcvspsxds (vf);
     XVCVSPSXDS vsx_xvcvspsxds {}
 
@@ -2687,28 +2732,31 @@
   signed long long __builtin_darn_raw ();
     DARN_RAW darn_raw {}
 
-  const signed int __builtin_dtstsfi_eq_dd (unsigned int, _Decimal64);
+  double __builtin_mffsl ();
+    MFFSL rs6000_mffsl {}
+
+  const signed int __builtin_dfp_dtstsfi_eq_dd (unsigned int, _Decimal64);
     TSTSFI_EQ_DD dfptstsfi_eq_dd {}
 
-  const signed int __builtin_dtstsfi_eq_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_eq_td (unsigned int, _Decimal128);
     TSTSFI_EQ_TD dfptstsfi_eq_td {}
 
-  const signed int __builtin_dtstsfi_gt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dfp_dtstsfi_gt_dd (unsigned int, _Decimal64);
     TSTSFI_GT_DD dfptstsfi_gt_dd {}
 
-  const signed int __builtin_dtstsfi_gt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_gt_td (unsigned int, _Decimal128);
     TSTSFI_GT_TD dfptstsfi_gt_td {}
 
-  const signed int __builtin_dtstsfi_lt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dfp_dtstsfi_lt_dd (unsigned int, _Decimal64);
     TSTSFI_LT_DD dfptstsfi_lt_dd {}
 
-  const signed int __builtin_dtstsfi_lt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_lt_td (unsigned int, _Decimal128);
     TSTSFI_LT_TD dfptstsfi_lt_td {}
 
-  const signed int __builtin_dtstsfi_ov_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dfp_dtstsfi_ov_dd (unsigned int, _Decimal64);
     TSTSFI_OV_DD dfptstsfi_unordered_dd {}
 
-  const signed int __builtin_dtstsfi_ov_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_ov_td (unsigned int, _Decimal128);
     TSTSFI_OV_TD dfptstsfi_unordered_td {}
 
 
@@ -2720,10 +2768,10 @@
 ; 32 bits, and the return value is DImode, so it seems that
 ; TARGET_64BIT (actually TARGET_POWERPC64) is justified.  TBD. ####
 [power9-64]
-; The following two are inexplicably named __builtin_{alti,}vec_* while
+; The following two are inexplicably named __builtin_altivec_* while
 ; their load counterparts are __builtin_vsx_*.  Need to deprecate
 ; these interfaces in favor of the other naming scheme (or vice versa).
-  void __builtin_vec_xst_len_r (vop, void *, unsigned long long);
+  void __builtin_altivec_xst_len_r (vop, void *, unsigned long long);
     XST_LEN_R xst_len_r {}
 
   void __builtin_altivec_stxvl (vop, void *, unsigned long long);
@@ -3063,12 +3111,114 @@
   const unsigned long long __builtin_altivec_vgnb (vuq, const int <2,7>);
     VGNB vgnb {}
 
+  const vuc __builtin_altivec_vinsgubvlx (unsigned char, vuc, unsigned int);
+    VINSERTGPRBL vinsertgl_v16qi {}
+
+  const vuc __builtin_altivec_vinsgubvrx (unsigned char, vuc, unsigned int);
+    VINSERTGPRBR vinsertgr_v16qi {}
+
+  const vull __builtin_altivec_vinsgudvlx (unsigned long long, vull, unsigned int);
+    VINSERTGPRDL vinsertgl_v2di {}
+
+  const vull __builtin_altivec_vinsgudvrx (unsigned long long, vull, unsigned int);
+    VINSERTGPRDR vinsertgr_v2di {}
+
+  const vus __builtin_altivec_vinsguhvlx (unsigned short, vus, unsigned int);
+    VINSERTGPRHL vinsertgl_v8hi {}
+
+  const vus __builtin_altivec_vinsguhvrx (unsigned short, vus, unsigned int);
+    VINSERTGPRHR vinsertgr_v8hi {}
+
+  const vui __builtin_altivec_vinsguwvlx (unsigned int, vui, unsigned int);
+    VINSERTGPRWL vinsertgl_v4si {}
+
+  const vui __builtin_altivec_vinsguwvrx (unsigned int, vui, unsigned int);
+    VINSERTGPRWR vinsertgr_v4si {}
+
+  const vuc __builtin_altivec_vinsvubvlx (vuc, vuc, unsigned int);
+    VINSERTVPRBL vinsertvl_v16qi {}
+
+  const vuc __builtin_altivec_vinsvubvrx (vuc, vuc, unsigned int);
+    VINSERTVPRBR vinsertvr_v16qi {}
+
+  const vus __builtin_altivec_vinsvuhvlx (vus, vus, unsigned int);
+    VINSERTVPRHL vinsertvl_v8hi {}
+
+  const vus __builtin_altivec_vinsvuhvrx (vus, vus, unsigned int);
+    VINSERTVPRHR vinsertvr_v8hi {}
+
+  const vui __builtin_altivec_vinsvuwvlx (vui, vui, unsigned int);
+    VINSERTVPRWL vinsertvl_v4si {}
+
+  const vui __builtin_altivec_vinsvuwvrx (vui, vui, unsigned int);
+    VINSERTVPRWR vinsertvr_v4si {}
+
   const vuc __builtin_altivec_vpdepd (vuc, vuc);
     VPDEPD vpdepd {}
 
   const vuc __builtin_altivec_vpextd (vuc, vuc);
     VPEXTD vpextd {}
 
+  const vull __builtin_altivec_vreplace_un_uv2di (vull, unsigned long long, const int<4>);
+    VREPLACE_UN_UV2DI vreplace_un_v2di {}
+
+  const vui __builtin_altivec_vreplace_un_uv4si (vui, unsigned int, const int<4>);
+    VREPLACE_UN_UV4SI vreplace_un_v4si {}
+
+  const vd __builtin_altivec_vreplace_un_v2df (vd, double, const int<4>);
+    VREPLACE_UN_V2DF vreplace_un_v2df {}
+
+  const vsll __builtin_altivec_vreplace_un_v2di (vsll, unsigned long long, const int<4>);
+    VREPLACE_UN_V2DI vreplace_un_v2di {}
+
+  const vf __builtin_altivec_vreplace_un_v4sf (vf, float, const int<4>);
+    VREPLACE_UN_V4SF vreplace_un_v4sf {}
+
+  const vsi __builtin_altivec_vreplace_un_v4si (vsi, signed int, const int<4>);
+    VREPLACE_UN_V4SI vreplace_un_v4si {}
+
+  const vull __builtin_altivec_vreplace_uv2di (vull, unsigned long long, const int<1>);
+    VREPLACE_ELT_UV2DI vreplace_elt_v2di {}
+
+  const vui __builtin_altivec_vreplace_uv4si (vui, unsigned int, const int<2>);
+    VREPLACE_ELT_UV4SI vreplace_elt_v4si {}
+
+  const vd __builtin_altivec_vreplace_v2df (vd, double, const int<1>);
+    VREPLACE_ELT_V2DF vreplace_elt_v2df {}
+
+  const vsll __builtin_altivec_vreplace_v2di (vsll, unsigned long long, const int<1>);
+    VREPLACE_ELT_V2DI vreplace_elt_v2di {}
+
+  const vf __builtin_altivec_vreplace_v4sf (vf, float, const int<2>);
+    VREPLACE_ELT_V4SF vreplace_elt_v4sf {}
+
+  const vsi __builtin_altivec_vreplace_v4si (vsi, signed int, const int<2>);
+    VREPLACE_ELT_V4SI vreplace_elt_v4si {}
+
+  const vsc __builtin_altivec_vsldb_v16qi (vsc, vsc, const int<3>);
+    VSLDB_V16QI vsldb_v16qi {}
+
+  const vsll __builtin_altivec_vsldb_v2di (vsll, vsll, const int<3>);
+    VSLDB_V2DI vsldb_v2di {}
+
+  const vsi __builtin_altivec_vsldb_v4si (vsi, vsi, const int<3>);
+    VSLDB_V4SI vsldb_v4si {}
+
+  const vss __builtin_altivec_vsldb_v8hi (vss, vss, const int<3>);
+    VSLDB_V8HI vsldb_v8hi {}
+
+  const vsc __builtin_altivec_vsrdb_v16qi (vsc, vsc, const int<3>);
+    VSRDB_V16QI vsrdb_v16qi {}
+
+  const vsll __builtin_altivec_vsrdb_v2di (vsll, vsll, const int<3>);
+    VSRDB_V2DI vsrdb_v2di {}
+
+  const vsi __builtin_altivec_vsrdb_v4si (vsi, vsi, const int<3>);
+    VSRDB_V4SI vsrdb_v4si {}
+
+  const vss __builtin_altivec_vsrdb_v8hi (vss, vss, const int<3>);
+    VSRDB_V8HI vsrdb_v8hi {}
+
   const vuc __builtin_altivec_vstribl (vuc);
     VSTRIBL vstril_v16qi {}
 
@@ -3093,6 +3243,51 @@
   const signed int __builtin_altivec_vstrihr_p (vus);
     VSTRIHR_P vstrir_p_v8hi {}
 
+  const signed int __builtin_vsx_xvtlsbb_all_ones (vuc);
+    XVTLSBB_ONES xvtlsbbo {}
+
+  const signed int __builtin_vsx_xvtlsbb_all_zeros (vuc);
+    XVTLSBB_ZEROS xvtlsbbz {}
+
+  const vf __builtin_vsx_vxxsplti32dx_v4sf (vf, const int<1>, float);
+    VXXSPLTI32DX_V4SF xxsplti32dx_v4sf {}
+
+  const vsi __builtin_vsx_vxxsplti32dx_v4si (vsi, const int<1>, signed int);
+    VXXSPLTI32DX_V4SI xxsplti32dx_v4si {}
+
+  const vd __builtin_vsx_vxxspltidp (float);
+    VXXSPLTIDP xxspltidp_v2df {}
+
+  const vf __builtin_vsx_vxxspltiw_v4sf (float);
+    VXXSPLTIW_V4SF xxspltiw_v4sf {}
+
+  const vsi __builtin_vsx_vxxspltiw_v4si (signed int);
+    VXXSPLTIW_V4SI xxspltiw_v4si {}
+
+  const vuc __builtin_vsx_xvcvbf16spn (vuc);
+    XVCVBF16SPN vsx_xvcvbf16spn {}
+
+  const vuc __builtin_vsx_xvcvspbf16 (vuc);
+    XVCVSPBF16 vsx_xvcvspbf16 {}
+
+  const vsc __builtin_vsx_xxblend_v16qi (vsc, vsc, vuc);
+    VXXBLEND_V16QI xxblend_v16qi {}
+
+  const vd __builtin_vsx_xxblend_v2df (vd, vd, vull);
+    VXXBLEND_V2DF xxblend_v2df {}
+
+  const vsll __builtin_vsx_xxblend_v2di (vsll, vsll, vull);
+    VXXBLEND_V2DI xxblend_v2di {}
+
+  const vf __builtin_vsx_xxblend_v4sf (vf, vf, vui);
+    VXXBLEND_V4SF xxblend_v4sf {}
+
+  const vsi __builtin_vsx_xxblend_v4si (vsi, vsi, vui);
+    VXXBLEND_V4SI xxblend_v4si {}
+
+  const vss __builtin_vsx_xxblend_v8hi (vss, vss, vus);
+    VXXBLEND_V8HI xxblend_v8hi {}
+
   const vop __builtin_vsx_xxeval (vop, vop, vop, const int <8>);
     XXEVAL xxeval {}
 
@@ -3108,11 +3303,37 @@
   const vus __builtin_vsx_xxgenpcvm_v8hi (vus, const int <2>);
     XXGENPCVM_V8HI xxgenpcvm_v8hi {}
 
-  const vuc __builtin_vsx_xvcvbf16spn (vuc);
-    XVCVBF16SPN vsx_xvcvbf16spn {}
+; TODO: This was quite hackish in the original code, and we may need to add
+; mode-specific expansions rather than using CODE_FOR_xxpermx throughout.
+  const vuc __builtin_vsx_xxpermx_uv16qi (vuc, vuc, vuc, const int<3>);
+    XXPERMX_UV16QI xxpermx {}
 
-  const vuc __builtin_vsx_xvcvspbf16 (vuc);
-    XVCVSPBF16 vsx_xvcvspbf16 {}
+  const vull __builtin_vsx_xxpermx_uv2di (vull, vull, vuc, const int<3>);
+    XXPERMX_UV2DI xxpermx {}
+
+  const vui __builtin_vsx_xxpermx_uv4si (vui, vui, vuc, const int<3>);
+    XXPERMX_UV4SI xxpermx {}
+
+  const vus __builtin_vsx_xxpermx_uv8hi (vus, vus, vuc, const int<3>);
+    XXPERMX_UV8HI xxpermx {}
+
+  const vsc __builtin_vsx_xxpermx_v16qi (vsc, vsc, vuc, const int<3>);
+    XXPERMX_V16QI xxpermx {}
+
+  const vd __builtin_vsx_xxpermx_v2df (vd, vd, vuc, const int<3>);
+    XXPERMX_V2DF xxpermx {}
+
+  const vsll __builtin_vsx_xxpermx_v2di (vsll, vsll, vuc, const int<3>);
+    XXPERMX_V2DI xxpermx {}
+
+  const vf __builtin_vsx_xxpermx_v4sf (vf, vf, vuc, const int<3>);
+    XXPERMX_V4SF xxpermx {}
+
+  const vsi __builtin_vsx_xxpermx_v4si (vsi, vsi, vuc, const int<3>);
+    XXPERMX_V4SI xxpermx {}
+
+  const vss __builtin_vsx_xxpermx_v8hi (vss, vss, vuc, const int<3>);
+    XXPERMX_V8HI xxpermx {}
 
 
 [power10-64]
@@ -3166,184 +3387,364 @@
     PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma}
 
   void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2NN mma_pmxvbf16ger2nn {mma,quad}
+    PMXVBF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2NN_INTERNAL mma_pmxvbf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2NP mma_pmxvbf16ger2np {mma,quad}
+    PMXVBF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2NP_INTERNAL mma_pmxvbf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2PN mma_pmxvbf16ger2pn {mma,quad}
+    PMXVBF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2PN_INTERNAL mma_pmxvbf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2PP mma_pmxvbf16ger2pp {mma,quad}
+    PMXVBF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2PP_INTERNAL mma_pmxvbf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2 mma_pmxvf16ger2 {mma}
+    PMXVF16GER2 nothing {mma}
+
+  void __builtin_mma_pmxvf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2_INTERNAL mma_pmxvf16ger2 {mma}
 
   void __builtin_mma_pmxvf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2NN mma_pmxvf16ger2nn {mma,quad}
+    PMXVF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2NN_INTERNAL mma_pmxvf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2NP mma_pmxvf16ger2np {mma,quad}
+    PMXVF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2NP_INTERNAL mma_pmxvf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2PN mma_pmxvf16ger2pn {mma,quad}
+    PMXVF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2PN_INTERNAL mma_pmxvf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2PP mma_pmxvf16ger2pp {mma,quad}
+    PMXVF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2PP_INTERNAL mma_pmxvf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf32ger (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GER mma_pmxvf32ger {mma}
+    PMXVF32GER nothing {mma}
+
+  void __builtin_mma_pmxvf32ger_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GER_INTERNAL mma_pmxvf32ger {mma}
 
   void __builtin_mma_pmxvf32gernn (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERNN mma_pmxvf32gernn {mma,quad}
+    PMXVF32GERNN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gernn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERNN_INTERNAL mma_pmxvf32gernn {mma,quad}
 
   void __builtin_mma_pmxvf32gernp (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERNP mma_pmxvf32gernp {mma,quad}
+    PMXVF32GERNP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gernp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERNP_INTERNAL mma_pmxvf32gernp {mma,quad}
 
   void __builtin_mma_pmxvf32gerpn (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERPN mma_pmxvf32gerpn {mma,quad}
+    PMXVF32GERPN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gerpn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERPN_INTERNAL mma_pmxvf32gerpn {mma,quad}
 
   void __builtin_mma_pmxvf32gerpp (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERPP mma_pmxvf32gerpp {mma,quad}
+    PMXVF32GERPP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gerpp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERPP_INTERNAL mma_pmxvf32gerpp {mma,quad}
 
   void __builtin_mma_pmxvf64ger (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GER mma_pmxvf64ger {mma,pair}
+    PMXVF64GER nothing {mma,pair}
+
+  void __builtin_mma_pmxvf64ger_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GER_INTERNAL mma_pmxvf64ger {mma,pair}
 
   void __builtin_mma_pmxvf64gernn (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERNN mma_pmxvf64gernn {mma,pair,quad}
+    PMXVF64GERNN nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gernn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERNN_INTERNAL mma_pmxvf64gernn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gernp (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERNP mma_pmxvf64gernp {mma,pair,quad}
+    PMXVF64GERNP nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gernp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERNP_INTERNAL mma_pmxvf64gernp {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpn (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERPN mma_pmxvf64gerpn {mma,pair,quad}
+    PMXVF64GERPN nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gerpn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERPN_INTERNAL mma_pmxvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpp (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERPP mma_pmxvf64gerpp {mma,pair,quad}
+    PMXVF64GERPP nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gerpp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERPP_INTERNAL mma_pmxvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_pmxvi16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2 mma_pmxvi16ger2 {mma}
+    PMXVI16GER2 nothing {mma}
+
+  void __builtin_mma_pmxvi16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2_INTERNAL mma_pmxvi16ger2 {mma}
 
   void __builtin_mma_pmxvi16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2PP mma_pmxvi16ger2pp {mma,quad}
+    PMXVI16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2PP_INTERNAL mma_pmxvi16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvi16ger2s (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2S mma_pmxvi16ger2s {mma}
+    PMXVI16GER2S nothing {mma}
+
+  void __builtin_mma_pmxvi16ger2s_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2S_INTERNAL mma_pmxvi16ger2s {mma}
 
   void __builtin_mma_pmxvi16ger2spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2SPP mma_pmxvi16ger2spp {mma,quad}
+    PMXVI16GER2SPP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi16ger2spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2SPP_INTERNAL mma_pmxvi16ger2spp {mma,quad}
 
   void __builtin_mma_pmxvi4ger8 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
-    PMXVI4GER8 mma_pmxvi4ger8 {mma}
+    PMXVI4GER8 nothing {mma}
+
+  void __builtin_mma_pmxvi4ger8_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
+    PMXVI4GER8_INTERNAL mma_pmxvi4ger8 {mma}
 
   void __builtin_mma_pmxvi4ger8pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI4GER8PP mma_pmxvi4ger8pp {mma,quad}
+    PMXVI4GER8PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi4ger8pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI4GER8PP_INTERNAL mma_pmxvi4ger8pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI8GER4 mma_pmxvi8ger4 {mma}
+    PMXVI8GER4 nothing {mma}
+
+  void __builtin_mma_pmxvi8ger4_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI8GER4_INTERNAL mma_pmxvi8ger4 {mma}
 
   void __builtin_mma_pmxvi8ger4pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI8GER4PP mma_pmxvi8ger4pp {mma,quad}
+    PMXVI8GER4PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi8ger4pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI8GER4PP_INTERNAL mma_pmxvi8ger4pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI8GER4SPP mma_pmxvi8ger4spp {mma,quad}
+    PMXVI8GER4SPP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi8ger4spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI8GER4SPP_INTERNAL mma_pmxvi8ger4spp {mma,quad}
 
   void __builtin_mma_xvbf16ger2 (v512 *, vuc, vuc);
-    XVBF16GER2 mma_xvbf16ger2 {mma}
+    XVBF16GER2 nothing {mma}
+
+  void __builtin_mma_xvbf16ger2_internal (v512 *, vuc, vuc);
+    XVBF16GER2_INTERNAL mma_xvbf16ger2 {mma}
 
   void __builtin_mma_xvbf16ger2nn (v512 *, vuc, vuc);
-    XVBF16GER2NN mma_xvbf16ger2nn {mma,quad}
+    XVBF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2nn_internal (v512 *, vuc, vuc);
+    XVBF16GER2NN_INTERNAL mma_xvbf16ger2nn {mma,quad}
 
   void __builtin_mma_xvbf16ger2np (v512 *, vuc, vuc);
-    XVBF16GER2NP mma_xvbf16ger2np {mma,quad}
+    XVBF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2np_internal (v512 *, vuc, vuc);
+    XVBF16GER2NP_INTERNAL mma_xvbf16ger2np {mma,quad}
 
   void __builtin_mma_xvbf16ger2pn (v512 *, vuc, vuc);
-    XVBF16GER2PN mma_xvbf16ger2pn {mma,quad}
+    XVBF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2pn_internal (v512 *, vuc, vuc);
+    XVBF16GER2PN_INTERNAL mma_xvbf16ger2pn {mma,quad}
 
   void __builtin_mma_xvbf16ger2pp (v512 *, vuc, vuc);
-    XVBF16GER2PP mma_xvbf16ger2pp {mma,quad}
+    XVBF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2pp_internal (v512 *, vuc, vuc);
+    XVBF16GER2PP_INTERNAL mma_xvbf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf16ger2 (v512 *, vuc, vuc);
-    XVF16GER2 mma_xvf16ger2 {mma}
+    XVF16GER2 nothing {mma}
+
+  void __builtin_mma_xvf16ger2_internal (v512 *, vuc, vuc);
+    XVF16GER2_INTERNAL mma_xvf16ger2 {mma}
 
   void __builtin_mma_xvf16ger2nn (v512 *, vuc, vuc);
-    XVF16GER2NN mma_xvf16ger2nn {mma,quad}
+    XVF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2nn_internal (v512 *, vuc, vuc);
+    XVF16GER2NN_INTERNAL mma_xvf16ger2nn {mma,quad}
 
   void __builtin_mma_xvf16ger2np (v512 *, vuc, vuc);
-    XVF16GER2NP mma_xvf16ger2np {mma,quad}
+    XVF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2np_internal (v512 *, vuc, vuc);
+    XVF16GER2NP_INTERNAL mma_xvf16ger2np {mma,quad}
 
   void __builtin_mma_xvf16ger2pn (v512 *, vuc, vuc);
-    XVF16GER2PN mma_xvf16ger2pn {mma,quad}
+    XVF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2pn_internal (v512 *, vuc, vuc);
+    XVF16GER2PN_INTERNAL mma_xvf16ger2pn {mma,quad}
 
   void __builtin_mma_xvf16ger2pp (v512 *, vuc, vuc);
-    XVF16GER2PP mma_xvf16ger2pp {mma,quad}
+    XVF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2pp_internal (v512 *, vuc, vuc);
+    XVF16GER2PP_INTERNAL mma_xvf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf32ger (v512 *, vuc, vuc);
-    XVF32GER mma_xvf32ger {mma}
+    XVF32GER nothing {mma}
+
+  void __builtin_mma_xvf32ger_internal (v512 *, vuc, vuc);
+    XVF32GER_INTERNAL mma_xvf32ger {mma}
 
   void __builtin_mma_xvf32gernn (v512 *, vuc, vuc);
-    XVF32GERNN mma_xvf32gernn {mma,quad}
+    XVF32GERNN nothing {mma,quad}
+
+  void __builtin_mma_xvf32gernn_internal (v512 *, vuc, vuc);
+    XVF32GERNN_INTERNAL mma_xvf32gernn {mma,quad}
 
   void __builtin_mma_xvf32gernp (v512 *, vuc, vuc);
-    XVF32GERNP mma_xvf32gernp {mma,quad}
+    XVF32GERNP nothing {mma,quad}
+
+  void __builtin_mma_xvf32gernp_internal (v512 *, vuc, vuc);
+    XVF32GERNP_INTERNAL mma_xvf32gernp {mma,quad}
 
   void __builtin_mma_xvf32gerpn (v512 *, vuc, vuc);
-    XVF32GERPN mma_xvf32gerpn {mma,quad}
+    XVF32GERPN nothing {mma,quad}
+
+  void __builtin_mma_xvf32gerpn_internal (v512 *, vuc, vuc);
+    XVF32GERPN_INTERNAL mma_xvf32gerpn {mma,quad}
 
   void __builtin_mma_xvf32gerpp (v512 *, vuc, vuc);
-    XVF32GERPP mma_xvf32gerpp {mma,quad}
+    XVF32GERPP nothing {mma,quad}
+
+  void __builtin_mma_xvf32gerpp_internal (v512 *, vuc, vuc);
+    XVF32GERPP_INTERNAL mma_xvf32gerpp {mma,quad}
 
   void __builtin_mma_xvf64ger (v512 *, v256, vuc);
-    XVF64GER mma_xvf64ger {mma,pair}
+    XVF64GER nothing {mma,pair}
+
+  void __builtin_mma_xvf64ger_internal (v512 *, v256, vuc);
+    XVF64GER_INTERNAL mma_xvf64ger {mma,pair}
 
   void __builtin_mma_xvf64gernn (v512 *, v256, vuc);
-    XVF64GERNN mma_xvf64gernn {mma,pair,quad}
+    XVF64GERNN nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gernn_internal (v512 *, v256, vuc);
+    XVF64GERNN_INTERNAL mma_xvf64gernn {mma,pair,quad}
 
   void __builtin_mma_xvf64gernp (v512 *, v256, vuc);
-    XVF64GERNP mma_xvf64gernp {mma,pair,quad}
+    XVF64GERNP nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gernp_internal (v512 *, v256, vuc);
+    XVF64GERNP_INTERNAL mma_xvf64gernp {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpn (v512 *, v256, vuc);
-    XVF64GERPN mma_xvf64gerpn {mma,pair,quad}
+    XVF64GERPN nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gerpn_internal (v512 *, v256, vuc);
+    XVF64GERPN_INTERNAL mma_xvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpp (v512 *, v256, vuc);
-    XVF64GERPP mma_xvf64gerpp {mma,pair,quad}
+    XVF64GERPP nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gerpp_internal (v512 *, v256, vuc);
+    XVF64GERPP_INTERNAL mma_xvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_xvi16ger2 (v512 *, vuc, vuc);
-    XVI16GER2 mma_xvi16ger2 {mma}
+    XVI16GER2 nothing {mma}
+
+  void __builtin_mma_xvi16ger2_internal (v512 *, vuc, vuc);
+    XVI16GER2_INTERNAL mma_xvi16ger2 {mma}
 
   void __builtin_mma_xvi16ger2pp (v512 *, vuc, vuc);
-    XVI16GER2PP mma_xvi16ger2pp {mma,quad}
+    XVI16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_xvi16ger2pp_internal (v512 *, vuc, vuc);
+    XVI16GER2PP_INTERNAL mma_xvi16ger2pp {mma,quad}
 
   void __builtin_mma_xvi16ger2s (v512 *, vuc, vuc);
-    XVI16GER2S mma_xvi16ger2s {mma}
+    XVI16GER2S nothing {mma}
+
+  void __builtin_mma_xvi16ger2s_internal (v512 *, vuc, vuc);
+    XVI16GER2S_INTERNAL mma_xvi16ger2s {mma}
 
   void __builtin_mma_xvi16ger2spp (v512 *, vuc, vuc);
-    XVI16GER2SPP mma_xvi16ger2spp {mma,quad}
+    XVI16GER2SPP nothing {mma,quad}
+
+  void __builtin_mma_xvi16ger2spp_internal (v512 *, vuc, vuc);
+    XVI16GER2SPP_INTERNAL mma_xvi16ger2spp {mma,quad}
 
   void __builtin_mma_xvi4ger8 (v512 *, vuc, vuc);
-    XVI4GER8 mma_xvi4ger8 {mma}
+    XVI4GER8 nothing {mma}
+
+  void __builtin_mma_xvi4ger8_internal (v512 *, vuc, vuc);
+    XVI4GER8_INTERNAL mma_xvi4ger8 {mma}
 
   void __builtin_mma_xvi4ger8pp (v512 *, vuc, vuc);
-    XVI4GER8PP mma_xvi4ger8pp {mma,quad}
+    XVI4GER8PP nothing {mma,quad}
+
+  void __builtin_mma_xvi4ger8pp_internal (v512 *, vuc, vuc);
+    XVI4GER8PP_INTERNAL mma_xvi4ger8pp {mma,quad}
 
   void __builtin_mma_xvi8ger4 (v512 *, vuc, vuc);
-    XVI8GER4 mma_xvi8ger4 {mma}
+    XVI8GER4 nothing {mma}
+
+  void __builtin_mma_xvi8ger4_internal (v512 *, vuc, vuc);
+    XVI8GER4_INTERNAL mma_xvi8ger4 {mma}
 
   void __builtin_mma_xvi8ger4pp (v512 *, vuc, vuc);
-    XVI8GER4PP mma_xvi8ger4pp {mma,quad}
+    XVI8GER4PP nothing {mma,quad}
+
+  void __builtin_mma_xvi8ger4pp_internal (v512 *, vuc, vuc);
+    XVI8GER4PP_INTERNAL mma_xvi8ger4pp {mma,quad}
 
   void __builtin_mma_xvi8ger4spp (v512 *, vuc, vuc);
-    XVI8GER4SPP mma_xvi8ger4spp {mma,quad}
+    XVI8GER4SPP nothing {mma,quad}
+
+  void __builtin_mma_xvi8ger4spp_internal (v512 *, vuc, vuc);
+    XVI8GER4SPP_INTERNAL mma_xvi8ger4spp {mma,quad}
 
   void __builtin_mma_xxmfacc (v512 *);
-    XXMFACC mma_xxmfacc {mma,quad}
+    XXMFACC nothing {mma,quad}
+
+  void __builtin_mma_xxmfacc_internal (v512 *);
+    XXMFACC_INTERNAL mma_xxmfacc {mma,quad}
 
   void __builtin_mma_xxmtacc (v512 *);
-    XXMTACC mma_xxmtacc {mma,quad}
+    XXMTACC nothing {mma,quad}
+
+  void __builtin_mma_xxmtacc_internal (v512 *);
+    XXMTACC_INTERNAL mma_xxmtacc {mma,quad}
 
   void __builtin_mma_xxsetaccz (v512 *);
-    XXSETACCZ mma_xxsetaccz {mma}
+    XXSETACCZ nothing {mma}
+
+  void __builtin_mma_xxsetaccz_internal (v512 *);
+    XXSETACCZ_INTERNAL mma_xxsetaccz {mma}


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-10-29 19:53 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-10-29 19:53 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8410cd2c5dd2148fc5a01fa701ce186f590cd708

commit 8410cd2c5dd2148fc5a01fa701ce186f590cd708
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Sun Aug 30 10:28:28 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-08-30  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 127 ++++++++++++++++++-------------
 1 file changed, 72 insertions(+), 55 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index c0f8821be64..565c14f9f4c 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -321,9 +321,7 @@
   pure vuc __builtin_altivec_lvsr (signed long long, void *);
     LVSR altivec_lvsr {ldvec}
 
-; Following LVX one is redundant, and I don't think we need to
-; keep it.  It only maps to LVX_V4SI.  Probably remove.
-  pure vop __builtin_altivec_lvx (signed long long, void *);
+  pure vsi __builtin_altivec_lvx (signed long long, void *);
     LVX altivec_lvx_v4si {ldvec}
 
   pure vsc __builtin_altivec_lvx_v16qi (signed long long, void *);
@@ -395,7 +393,8 @@
   void __builtin_altivec_stvrxl (vop, signed long long, void *);
     STVRXL altivec_stvrxl {stvec}
 
-; Skipping the STVX one that maps to STVX_V4SI (see above for LVX)
+  void __builtin_altivec_stvx (vsi, signed long long, void *);
+    STVX altivec_stvx_v4si {stvec}
 
   void __builtin_altivec_stvx_v16qi (vsc, signed long long, void *);
     STVX_V16QI altivec_stvx_v16qi {stvec}
@@ -409,7 +408,8 @@
   void __builtin_altivec_stvx_v8hi (vss, signed long long, void *);
     STVX_V8HI altivec_stvx_v8hi {stvec}
 
-; Skipping the STVXL one that maps to STVXL_V4SI (see above for LVX)
+  void __builtin_altivec_stvxl (vsi, signed long long, void *);
+    STVXL altivec_stvxl_v4si {stvec}
 
   void __builtin_altivec_stvxl_v16qi (vsc, signed long long, void *);
     STVXL_V16QI altivec_stvxl_v16qi {stvec}
@@ -1206,6 +1206,33 @@
   const vull __builtin_altivec_vxor_v2di_uns (vull, vull);
     VXOR_V2DI_UNS xorv2di3 {}
 
+  const signed __int128 __builtin_vec_ext_v1ti (vsq, signed int);
+    VEC_EXT_V1TI nothing {extract}
+
+  const double __builtin_vec_ext_v2df (vd, signed int);
+    VEC_EXT_V2DF nothing {extract}
+
+  const signed long long __builtin_vec_ext_v2di (vsll, signed int);
+    VEC_EXT_V2DI nothing {extract}
+
+  const vsq __builtin_vec_init_v1ti (signed __int128);
+    VEC_INIT_V1TI nothing {init}
+
+  const vd __builtin_vec_init_v2df (double, double);
+    VEC_INIT_V2DF nothing {init}
+
+  const vsll __builtin_vec_init_v2di (signed long long, signed long long);
+    VEC_INIT_V2DI nothing {init}
+
+  const vsq __builtin_vec_set_v1ti (vsq, signed __int128, const int<0,0>);
+    VEC_SET_V1TI nothing {set}
+
+  const vd __builtin_vec_set_v2df (vd, double, const int<1>);
+    VEC_SET_V2DF nothing {set}
+
+  const vsll __builtin_vec_set_v2di (vsll, signed long long, const int<1>);
+    VEC_SET_V2DI nothing {set}
+
   const vbc __builtin_vsx_cmpge_16qi (vsc, vsc);
     CMPGE_16QI vector_nltv16qi {}
 
@@ -1348,7 +1375,7 @@
   pure vsll __builtin_vsx_lxvd2x_v2di (signed long long, void *);
     LXVD2X_V2DI vsx_load_v2di {ldvec}
 
-  pure vsc __builtin_vsx_lxvw4x_16qi (signed long long, void *);
+  pure vsc __builtin_vsx_lxvw4x_v16qi (signed long long, void *);
     LXVW4X_V16QI vsx_load_v16qi {ldvec}
 
   pure vf __builtin_vsx_lxvw4x_v4sf (signed long long, void *);
@@ -1467,33 +1494,6 @@
   const vf __builtin_vsx_uns_floato_v2di (vull);
     UNS_FLOATO_V2DI unsfloatov2di {}
 
-  const signed __int128 __builtin_vsx_vec_ext_v1ti (vsq, signed int);
-    VEC_EXT_V1TI nothing {extract}
-
-  const double __builtin_vsx_vec_ext_v2df (vd, signed int);
-    VEC_EXT_V2DF nothing {extract}
-
-  const signed long long __builtin_vsx_vec_ext_v2di (vsll, signed int);
-    VEC_EXT_V2DI nothing {extract}
-
-  const vsq __builtin_vsx_vec_init_v1ti (signed __int128);
-    VEC_INIT_V1TI nothing {init}
-
-  const vd __builtin_vsx_vec_init_v2df (double, double);
-    VEC_INIT_V2DF nothing {init}
-
-  const vsll __builtin_vsx_vec_init_v2di (signed long long, signed long long);
-    VEC_INIT_V2DI nothing {init}
-
-  const vsq __builtin_vsx_vec_set_v1ti (vsq, signed __int128, const int<0,0>);
-    VEC_SET_V1TI nothing {set}
-
-  const vd __builtin_vsx_vec_set_v2df (vd, double, const int<1>);
-    VEC_SET_V2DF nothing {set}
-
-  const vsll __builtin_vsx_vec_set_v2di (vsll, signed long long, const int<1>);
-    VEC_SET_V2DI nothing {set}
-
   const vsll __builtin_vsx_vsigned_v2df (vd);
     VEC_VSIGNED_V2DF vsx_xvcvdpsxds {}
 
@@ -2753,24 +2753,18 @@
 
 ; Builtins requiring hardware support for IEEE-128 floating-point.
 [ieee128-hw]
-  fpmath _Float128 __builtin_vsx_addf128_round_to_odd (_Float128, _Float128);
+  fpmath _Float128 __builtin_addf128_round_to_odd (_Float128, _Float128);
     ADDF128_ODD addkf3_odd {}
 
-  fpmath _Float128 __builtin_vsx_divf128_round_to_odd (_Float128, _Float128);
+  fpmath _Float128 __builtin_divf128_round_to_odd (_Float128, _Float128);
     DIVF128_ODD divkf3_odd {}
 
-  fpmath _Float128 __builtin_vsx_fmaf128_round_to_odd (_Float128, _Float128, _Float128);
+  fpmath _Float128 __builtin_fmaf128_round_to_odd (_Float128, _Float128, _Float128);
     FMAF128_ODD fmakf4_odd {}
 
-  fpmath _Float128 __builtin_vsx_mulf128_round_to_odd (_Float128, _Float128);
+  fpmath _Float128 __builtin_mulf128_round_to_odd (_Float128, _Float128);
     MULF128_ODD mulkf3_odd {}
 
-  const unsigned long long __builtin_vsx_scalar_extract_expq (_Float128);
-    VSEEQP xsxexpqp_kf {}
-
-  const unsigned __int128 __builtin_vsx_scalar_extract_sigq (_Float128);
-    VSESQP xsxsigqp_kf {}
-
   const signed int __builtin_vsx_scalar_cmp_exp_qp_eq (_Float128, _Float128);
     VSCEQPEQ xscmpexpqp_eq_kf {}
 
@@ -2783,6 +2777,21 @@
   const signed int __builtin_vsx_scalar_cmp_exp_qp_unordered (_Float128, _Float128);
     VSCEQPUO xscmpexpqp_unordered_kf {}
 
+  fpmath _Float128 __builtin_sqrtf128_round_to_odd (_Float128);
+    SQRTF128_ODD sqrtkf2_odd {}
+
+  fpmath _Float128 __builtin_subf128_round_to_odd (_Float128, _Float128);
+    SUBF128_ODD subkf3_odd {}
+
+  fpmath double __builtin_truncf128_round_to_odd (_Float128);
+    TRUNCF128_ODD trunckfdf2_odd {}
+
+  const unsigned long long __builtin_vsx_scalar_extract_expq (_Float128);
+    VSEEQP xsxexpqp_kf {}
+
+  const unsigned __int128 __builtin_vsx_scalar_extract_sigq (_Float128);
+    VSESQP xsxsigqp_kf {}
+
   const _Float128 __builtin_vsx_scalar_insert_exp_q (unsigned __int128, unsigned long long);
     VSIEQP xsiexpqp_kf {}
 
@@ -2795,15 +2804,6 @@
   const unsigned int __builtin_vsx_scalar_test_neg_qp (_Float128);
     VSTDCNQP xststdcnegqp_kf {}
 
-  fpmath _Float128 __builtin_vsx_sqrtf128_round_to_odd (_Float128);
-    SQRTF128_ODD sqrtkf2_odd {}
-
-  fpmath _Float128 __builtin_vsx_subf128_round_to_odd (_Float128, _Float128);
-    SUBF128_ODD subkf3_odd {}
-
-  fpmath double __builtin_vsx_truncf128_round_to_odd (_Float128);
-    TRUNCF128_ODD trunckfdf2_odd {}
-
 
 
 ; Decimal floating-point builtins.
@@ -3132,12 +3132,26 @@
     PEXTD pextd {}
 
 
+; TODO: Land-mine alert.
+;
+; The original built-in support has code that assumes the internal
+; copy of an MMA built-in function appears immediately after the
+; external copy in the built-in table.  This is fragile.  For the
+; new support, we should transition this to do a name lookup in
+; the built-in hash table, but to start with we will honor the
+; positioning of the built-ins in the table.
 [mma]
   void __builtin_mma_assemble_acc (v512 *, vuc, vuc, vuc, vuc);
-    ASSEMBLE_ACC mma_assemble_acc {mma}
+    ASSEMBLE_ACC nothing {mma}
+
+  void __builtin_mma_assemble_acc_internal (v512 *, vuc, vuc, vuc, vuc);
+    ASSEMBLE_ACC_INTERNAL mma_assemble_acc {mma}
 
   void __builtin_mma_assemble_pair (v256 *, vuc, vuc);
-    ASSEMBLE_PAIR mma_assemble_pair {mma}
+    ASSEMBLE_PAIR nothing {mma}
+
+  void __builtin_mma_assemble_pair_internal (v256 *, vuc, vuc);
+    ASSEMBLE_PAIR_INTERNAL mma_assemble_pair {mma}
 
   void __builtin_mma_disassemble_acc (void *, v512 *);
     DISASSEMBLE_ACC nothing {mma,quad}
@@ -3146,7 +3160,10 @@
     DISASSEMBLE_PAIR nothing {mma,pair}
 
   void __builtin_mma_pmxvbf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2 mma_pmxvbf16ger2 {mma}
+    PMXVBF16GER2 nothing {mma}
+
+  void __builtin_mma_pmxvbf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma}
 
   void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NN mma_pmxvbf16ger2nn {mma,quad}


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-10-27 16:30 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-10-27 16:30 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4a4d4a7b2ad9b33b8980f5e36753953acf853276

commit 4a4d4a7b2ad9b33b8980f5e36753953acf853276
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Tue Sep 15 11:53:33 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-09-15  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.
            * config/rs6000/rs6000-gen-builtins.c (type_map): Change
            "long_double" to "float128".

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 135 +++++++++++++++----------------
 gcc/config/rs6000/rs6000-gen-builtins.c  |   2 +-
 2 files changed, 65 insertions(+), 72 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index 6b66908d7c7..5e41e9e3316 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -169,6 +169,7 @@
   void __builtin_cpu_init ();
     CPU_INIT nothing {cpu}
 
+  ; TODO: Following two should return bool?
   unsigned int __builtin_cpu_is (const char *);
     CPU_IS nothing {cpu}
 
@@ -234,7 +235,7 @@
 
 ; Power6 builtins.
 [power6]
-  const signed int __builtin_p6_cmpb (signed int, signed int);
+  const signed long long __builtin_p6_cmpb (signed long long, signed long long);
     CMPB cmpbdi3 {}
 
   const signed int __builtin_p6_cmpb_32 (signed int, signed int);
@@ -810,7 +811,7 @@
   const vuc __builtin_altivec_vpkshus (vss, vss);
     VPKSHUS altivec_vpkshus {}
 
-  const vsi __builtin_altivec_vpkswss (vsi, vsi);
+  const vss __builtin_altivec_vpkswss (vsi, vsi);
     VPKSWSS altivec_vpkswss {}
 
   const vus __builtin_altivec_vpkswus (vsi, vsi);
@@ -1539,10 +1540,10 @@
   const vsi __builtin_vsx_vsigned_v4sf (vf);
     VEC_VSIGNED_V4SF vsx_xvcvspsxws {}
 
-  const vsll __builtin_vsx_vsignede_v2df (vd);
+  const vsi __builtin_vsx_vsignede_v2df (vd);
     VEC_VSIGNEDE_V2DF vsignede_v2df {}
 
-  const vsll __builtin_vsx_vsignedo_v2df (vd);
+  const vsi __builtin_vsx_vsignedo_v2df (vd);
     VEC_VSIGNEDO_V2DF vsignedo_v2df {}
 
   const vull __builtin_vsx_vunsigned_v2df (vd);
@@ -1560,7 +1561,7 @@
   const vf __builtin_vsx_xscvdpsp (vd);
     XSCVDPSP vsx_xscvdpsp {}
 
-  const vd __builtin_vsx_xscvspdp (vf);
+  const double __builtin_vsx_xscvspdp (float);
     XSCVSPDP vsx_xscvspdp {}
 
   const double __builtin_vsx_xsmaxdp (double, double);
@@ -1569,19 +1570,19 @@
   const double __builtin_vsx_xsmindp (double, double);
     XSMINDP smindf3 {}
 
-  const vd __builtin_vsx_xsrdpi (vd);
+  const double __builtin_vsx_xsrdpi (double);
     XSRDPI vsx_xsrdpi {}
 
-  const vd __builtin_vsx_xsrdpic (vd);
+  const double __builtin_vsx_xsrdpic (double);
     XSRDPIC vsx_xsrdpic {}
 
-  const vd __builtin_vsx_xsrdpim (vd);
+  const double __builtin_vsx_xsrdpim (double);
     XSRDPIM floordf2 {}
 
-  const vd __builtin_vsx_xsrdpip (vd);
+  const double __builtin_vsx_xsrdpip (double);
     XSRDPIP ceildf2 {}
 
-  const vd __builtin_vsx_xsrdpiz (vd);
+  const double __builtin_vsx_xsrdpiz (double);
     XSRDPIZ btruncdf2 {}
 
   const unsigned int __builtin_vsx_xstdivdp_fe (vd, vd);
@@ -1611,49 +1612,37 @@
   const vbll __builtin_vsx_xvcmpeqdp (vd, vd);
     XVCMPEQDP vector_eqv2df {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vull __builtin_vsx_xvcmpeqdp_p (vd);
+  const signed int __builtin_vsx_xvcmpeqdp_p (vd);
     XVCMPEQDP_P vector_eq_v2df_p {pred}
 
   const vbi __builtin_vsx_xvcmpeqsp (vf, vf);
     XVCMPEQSP vector_eqv4sf {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vui __builtin_vsx_xvcmpeqsp_p (vf);
+  const signed int __builtin_vsx_xvcmpeqsp_p (vf);
     XVCMPEQSP_P vector_eq_v4sf_p {pred}
 
   const vbll __builtin_vsx_xvcmpgedp (vd, vd);
     XVCMPGEDP vector_gev2df {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vull __builtin_vsx_xvcmpgedp_p (vd);
+  const signed int __builtin_vsx_xvcmpgedp_p (vd);
     XVCMPGEDP_P vector_ge_v2df_p {pred}
 
   const vbi __builtin_vsx_xvcmpgesp (vf, vf);
     XVCMPGESP vector_gev4sf {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vui __builtin_vsx_xvcmpgesp_p (vf);
+  const signed int __builtin_vsx_xvcmpgesp_p (vf);
     XVCMPGESP_P vector_ge_v4sf_p {pred}
 
   const vbll __builtin_vsx_xvcmpgtdp (vd, vd);
     XVCMPGTDP vector_gtv2df {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vull __builtin_vsx_xvcmpgtdp_p (vd);
+  const signed int __builtin_vsx_xvcmpgtdp_p (vd);
     XVCMPGTDP_P vector_gt_v2df_p {pred}
 
   const vbi __builtin_vsx_xvcmpgtsp (vf, vf);
     XVCMPGTSP vector_gtv4sf {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vui __builtin_vsx_xvcmpgtsp_p (vf, vf);
+  const signed int __builtin_vsx_xvcmpgtsp_p (vf, vf);
     XVCMPGTSP_P vector_gt_v4sf_p {pred}
 
   const vf __builtin_vsx_xvcvdpsp (vd);
@@ -1665,7 +1654,7 @@
   const vsll __builtin_vsx_xvcvdpsxds_scale (vd, const int);
     XVCVDPSXDS_SCALE vsx_xvcvdpsxds_scale {}
 
-  const vsll __builtin_vsx_xvcvdpsxws (vd);
+  const vsi __builtin_vsx_xvcvdpsxws (vd);
     XVCVDPSXWS vsx_xvcvdpsxws {}
 
   const vull __builtin_vsx_xvcvdpuxds (vd);
@@ -1678,7 +1667,7 @@
   const vull __builtin_vsx_xvcvdpuxds_uns (vd);
     XVCVDPUXDS_UNS vsx_fixuns_truncv2dfv2di2 {}
 
-  const vull __builtin_vsx_xvcvdpuxws (vd);
+  const vui __builtin_vsx_xvcvdpuxws (vd);
     XVCVDPUXWS vsx_xvcvdpuxws {}
 
   const vd __builtin_vsx_xvcvspdp (vf);
@@ -1736,7 +1725,7 @@
   const vf __builtin_vsx_xvcvuxwsp (vui);
     XVCVUXWSP_V4SF vsx_xvcvuxwsp {}
 
-  fpmath vf __builtin_vsx_xvdivdp (vf, vf);
+  fpmath vd __builtin_vsx_xvdivdp (vd, vd);
     XVDIVDP divv2df3 {}
 
   fpmath vf __builtin_vsx_xvdivsp (vf, vf);
@@ -1895,7 +1884,7 @@
   const vf __builtin_vsx_xxmrglw (vf, vf);
     XXMRGLW_4SF vsx_xxmrglw_v4sf {}
 
-  const vss __builtin_vsx_xxmrglw_4si (vsi, vsi);
+  const vsi __builtin_vsx_xxmrglw_4si (vsi, vsi);
     XXMRGLW_4SI vsx_xxmrglw_v4si {}
 
   const vsc __builtin_vsx_xxpermdi_16qi (vsc, vsc, const int<1>);
@@ -2268,7 +2257,8 @@
     VPKUDUS altivec_vpkudus {}
 
 ; #### Following are duplicates of __builtin_crypto_vpmsum*.  This
-; can't have ever worked properly!
+; can't have ever worked properly!  However, these have the right
+; result types, and the others have wrong ones!!  Whatwhatwhat...
 ;
 ;  const vus __builtin_altivec_vpmsumb (vuc, vuc);
 ;    VPMSUMB crypto_vpmsumb {}
@@ -2282,13 +2272,13 @@
 ;  const vull __builtin_altivec_vpmsumw (vui, vui);
 ;    VPMSUMW crypto_vpmsumw {}
 
-  const vuc __builtin_altivec_vpopcntb (vsc);
+  const vsc __builtin_altivec_vpopcntb (vsc);
     VPOPCNTB popcountv16qi2 {}
 
-  const vull __builtin_altivec_vpopcntd (vsll);
+  const vsll __builtin_altivec_vpopcntd (vsll);
     VPOPCNTD popcountv2di2 {}
 
-  const vus __builtin_altivec_vpopcnth (vss);
+  const vss __builtin_altivec_vpopcnth (vss);
     VPOPCNTH popcountv8hi2 {}
 
   const vuc __builtin_altivec_vpopcntub (vuc);
@@ -2303,7 +2293,7 @@
   const vui __builtin_altivec_vpopcntuw (vui);
     VPOPCNTUW popcountv4si2 {}
 
-  const vui __builtin_altivec_vpopcntw (vsi);
+  const vsi __builtin_altivec_vpopcntw (vsi);
     VPOPCNTW popcountv4si2 {}
 
   const vsll __builtin_altivec_vrld (vsll, vull);
@@ -2342,31 +2332,31 @@
   const vsq __builtin_bcdadd (vsq, vsq, const int<1>);
     BCDADD bcdadd {}
 
-  const unsigned int __builtin_bcdadd_eq (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_eq (vsq, vsq, const int<1>);
     BCDADD_EQ bcdadd_eq {}
 
-  const unsigned int __builtin_bcdadd_gt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_gt (vsq, vsq, const int<1>);
     BCDADD_GT bcdadd_gt {}
 
-  const unsigned int __builtin_bcdadd_lt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_lt (vsq, vsq, const int<1>);
     BCDADD_LT bcdadd_lt {}
 
-  const unsigned int __builtin_bcdadd_ov (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_ov (vsq, vsq, const int<1>);
     BCDADD_OV bcdadd_unordered {}
 
   const vsq __builtin_bcdsub (vsq, vsq, const int<1>);
     BCDSUB bcdsub {}
 
-  const unsigned int __builtin_bcdsub_eq (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_eq (vsq, vsq, const int<1>);
     BCDSUB_EQ bcdsub_eq {}
 
-  const unsigned int __builtin_bcdsub_gt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_gt (vsq, vsq, const int<1>);
     BCDSUB_GT bcdsub_gt {}
 
-  const unsigned int __builtin_bcdsub_lt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_lt (vsq, vsq, const int<1>);
     BCDSUB_LT bcdsub_lt {}
 
-  const unsigned int __builtin_bcdsub_ov (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_ov (vsq, vsq, const int<1>);
     BCDSUB_OV bcdsub_unordered {}
 
   const vuc __builtin_crypto_vpermxor_v16qi (vuc, vuc, vuc);
@@ -2381,16 +2371,19 @@
   const vus __builtin_crypto_vpermxor_v8hi (vus, vus, vus);
     VPERMXOR_V8HI crypto_vpermxor_v8hi {}
 
-  const vus __builtin_crypto_vpmsumb (vuc, vuc);
+; Note: these four have incorrect return types per the way the
+; instructions work, but this matches the old signatures that
+; have been around for too long.
+  const vuc __builtin_crypto_vpmsumb (vuc, vuc);
     VPMSUMB crypto_vpmsumb {}
 
-  const vuq __builtin_crypto_vpmsumd (vull, vull);
+  const vull __builtin_crypto_vpmsumd (vull, vull);
     VPMSUMD crypto_vpmsumd {}
 
-  const vui __builtin_crypto_vpmsumh (vus, vus);
+  const vus __builtin_crypto_vpmsumh (vus, vus);
     VPMSUMH crypto_vpmsumh {}
 
-  const vull __builtin_crypto_vpmsumw (vui, vui);
+  const vui __builtin_crypto_vpmsumw (vui, vui);
     VPMSUMW crypto_vpmsumw {}
 
   const vf __builtin_vsx_float2_v2df (vd, vd);
@@ -2615,16 +2608,16 @@
   const vsi __builtin_altivec_vprtybw (vsi);
     VPRTYBW parityv4si2 {}
 
-  const vull __builtin_altivec_vrldmi (vull, vull, vull);
+  const vsll __builtin_altivec_vrldmi (vsll, vsll, vsll);
     VRLDMI altivec_vrldmi {}
 
-  const vull __builtin_altivec_vrldnm (vull, vull);
+  const vsll __builtin_altivec_vrldnm (vsll, vsll);
     VRLDNM altivec_vrldnm {}
 
-  const vui __builtin_altivec_vrlwmi (vui, vui, vui);
+  const vsi __builtin_altivec_vrlwmi (vsi, vsi, vsi);
     VRLWMI altivec_vrlwmi {}
 
-  const vui __builtin_altivec_vrlwnm (vui, vui);
+  const vsi __builtin_altivec_vrlwnm (vsi, vsi);
     VRLWNM altivec_vrlwnm {}
 
   const vuc __builtin_altivec_vslv (vuc, vuc);
@@ -2687,10 +2680,10 @@
   const unsigned int __builtin_vsx_scalar_test_neg_sp (float);
     VSTDCNSP xststdcnegsp {}
 
-  const unsigned long long __builtin_vsx_test_data_class_dp (vd, signed int);
+  const vbll __builtin_vsx_test_data_class_dp (vd, signed int);
     VTDCDP xvtstdcdp {}
 
-  const unsigned int __builtin_vsx_test_data_class_sp (vf, signed int);
+  const vbi __builtin_vsx_test_data_class_sp (vf, signed int);
     VTDCSP xvtstdcsp {}
 
   const vf __builtin_vsx_vextract_fp_from_shorth (vus);
@@ -2735,28 +2728,28 @@
   double __builtin_mffsl ();
     MFFSL rs6000_mffsl {}
 
-  const signed int __builtin_dfp_dtstsfi_eq_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_eq_dd (unsigned int, _Decimal64);
     TSTSFI_EQ_DD dfptstsfi_eq_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_eq_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_eq_td (unsigned int, _Decimal128);
     TSTSFI_EQ_TD dfptstsfi_eq_td {}
 
-  const signed int __builtin_dfp_dtstsfi_gt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_gt_dd (unsigned int, _Decimal64);
     TSTSFI_GT_DD dfptstsfi_gt_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_gt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_gt_td (unsigned int, _Decimal128);
     TSTSFI_GT_TD dfptstsfi_gt_td {}
 
-  const signed int __builtin_dfp_dtstsfi_lt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_lt_dd (unsigned int, _Decimal64);
     TSTSFI_LT_DD dfptstsfi_lt_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_lt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_lt_td (unsigned int, _Decimal128);
     TSTSFI_LT_TD dfptstsfi_lt_td {}
 
-  const signed int __builtin_dfp_dtstsfi_ov_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_ov_dd (unsigned int, _Decimal64);
     TSTSFI_OV_DD dfptstsfi_unordered_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_ov_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_ov_td (unsigned int, _Decimal128);
     TSTSFI_OV_TD dfptstsfi_unordered_td {}
 
 
@@ -3219,25 +3212,25 @@
   const vss __builtin_altivec_vsrdb_v8hi (vss, vss, const int<3>);
     VSRDB_V8HI vsrdb_v8hi {}
 
-  const vuc __builtin_altivec_vstribl (vuc);
+  const vsc __builtin_altivec_vstribl (vsc);
     VSTRIBL vstril_v16qi {}
 
   const signed int __builtin_altivec_vstribl_p (vuc);
     VSTRIBL_P vstril_p_v16qi {}
 
-  const vuc __builtin_altivec_vstribr (vuc);
+  const vsc __builtin_altivec_vstribr (vsc);
     VSTRIBR vstrir_v16qi {}
 
   const signed int __builtin_altivec_vstribr_p (vuc);
     VSTRIBR_P vstrir_p_v16qi {}
 
-  const vus __builtin_altivec_vstrihl (vus);
+  const vss __builtin_altivec_vstrihl (vss);
     VSTRIHL vstril_v8hi {}
 
   const signed int __builtin_altivec_vstrihl_p (vus);
     VSTRIHL_P vstril_p_v8hi {}
 
-  const vus __builtin_altivec_vstrihr (vus);
+  const vss __builtin_altivec_vstrihr (vss);
     VSTRIHR vstrir_v8hi {}
 
   const signed int __builtin_altivec_vstrihr_p (vus);
@@ -3270,22 +3263,22 @@
   const vuc __builtin_vsx_xvcvspbf16 (vuc);
     XVCVSPBF16 vsx_xvcvspbf16 {}
 
-  const vsc __builtin_vsx_xxblend_v16qi (vsc, vsc, vuc);
+  const vuc __builtin_vsx_xxblend_v16qi (vuc, vuc, vuc);
     VXXBLEND_V16QI xxblend_v16qi {}
 
   const vd __builtin_vsx_xxblend_v2df (vd, vd, vull);
     VXXBLEND_V2DF xxblend_v2df {}
 
-  const vsll __builtin_vsx_xxblend_v2di (vsll, vsll, vull);
+  const vull __builtin_vsx_xxblend_v2di (vull, vull, vull);
     VXXBLEND_V2DI xxblend_v2di {}
 
   const vf __builtin_vsx_xxblend_v4sf (vf, vf, vui);
     VXXBLEND_V4SF xxblend_v4sf {}
 
-  const vsi __builtin_vsx_xxblend_v4si (vsi, vsi, vui);
+  const vui __builtin_vsx_xxblend_v4si (vui, vui, vui);
     VXXBLEND_V4SI xxblend_v4si {}
 
-  const vss __builtin_vsx_xxblend_v8hi (vss, vss, vus);
+  const vus __builtin_vsx_xxblend_v8hi (vus, vus, vus);
     VXXBLEND_V8HI xxblend_v8hi {}
 
   const vop __builtin_vsx_xxeval (vop, vop, vop, const int <8>);
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c
index ba64d90cda3..2f317d761d7 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -458,7 +458,7 @@ static typemap type_map[TYPE_MAP_SIZE] =
     { "sf",	"float" },
     { "si",	"intSI" },
     { "td",	"dfloat128" },
-    { "tf",	"long_double" },
+    { "tf",	"float128" },
     { "ti",	"intTI" },
     { "udi",	"unsigned_intDI" },
     { "uhi",	"unsigned_intHI" },


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-10-27 16:30 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-10-27 16:30 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:83e73b4e3d65a9ee6ea81db83da95aba2e117f98

commit 83e73b4e3d65a9ee6ea81db83da95aba2e117f98
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Sun Sep 13 18:21:20 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-09-13  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 195 ++++++++++++++++---------------
 1 file changed, 99 insertions(+), 96 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index 755399b1843..6b66908d7c7 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -531,73 +531,73 @@
   const int __builtin_altivec_vcmpbfp_p (int, vf, vf);
     VCMPBFP_P altivec_vcmpbfp_p {pred}
 
-  const vbi __builtin_altivec_vcmpeqfp (vf, vf);
+  const vf __builtin_altivec_vcmpeqfp (vf, vf);
     VCMPEQFP vector_eqv4sf {}
 
   const int __builtin_altivec_vcmpeqfp_p (int, vf, vf);
     VCMPEQFP_P vector_eq_v4sf_p {pred}
 
-  const vbc __builtin_altivec_vcmpequb (vuc, vuc);
+  const vsc __builtin_altivec_vcmpequb (vuc, vuc);
     VCMPEQUB vector_eqv16qi {}
 
   const int __builtin_altivec_vcmpequb_p (int, vuc, vuc);
     VCMPEQUB_P vector_eq_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpequh (vus, vus);
+  const vss __builtin_altivec_vcmpequh (vus, vus);
     VCMPEQUH vector_eqv8hi {}
 
   const int __builtin_altivec_vcmpequh_p (int, vus, vus);
     VCMPEQUH_P vector_eq_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpequw (vui, vui);
+  const vsi __builtin_altivec_vcmpequw (vui, vui);
     VCMPEQUW vector_eqv4si {}
 
   const int __builtin_altivec_vcmpequw_p (int, vui, vui);
     VCMPEQUW_P vector_eq_v4si_p {pred}
 
-  const vbi __builtin_altivec_vcmpgefp (vf, vf);
+  const vf __builtin_altivec_vcmpgefp (vf, vf);
     VCMPGEFP vector_gev4sf {}
 
   const int __builtin_altivec_vcmpgefp_p (int, vf, vf);
     VCMPGEFP_P vector_ge_v4sf_p {pred}
 
-  const vbi __builtin_altivec_vcmpgtfp (vf, vf);
+  const vf __builtin_altivec_vcmpgtfp (vf, vf);
     VCMPGTFP vector_gtv4sf {}
 
   const int __builtin_altivec_vcmpgtfp_p (int, vf, vf);
     VCMPGTFP_P vector_gt_v4sf_p {pred}
 
-  const vbc __builtin_altivec_vcmpgtsb (vsc, vsc);
+  const vsc __builtin_altivec_vcmpgtsb (vsc, vsc);
     VCMPGTSB vector_gtv16qi {}
 
   const int __builtin_altivec_vcmpgtsb_p (int, vsc, vsc);
     VCMPGTSB_P vector_gt_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpgtsh (vss, vss);
+  const vss __builtin_altivec_vcmpgtsh (vss, vss);
     VCMPGTSH vector_gtv8hi {}
 
   const int __builtin_altivec_vcmpgtsh_p (int, vss, vss);
     VCMPGTSH_P vector_gt_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpgtsw (vsi, vsi);
+  const vsi __builtin_altivec_vcmpgtsw (vsi, vsi);
     VCMPGTSW vector_gtv4si {}
 
   const int __builtin_altivec_vcmpgtsw_p (int, vsi, vsi);
     VCMPGTSW_P vector_gt_v4si_p {pred}
 
-  const vbc __builtin_altivec_vcmpgtub (vuc, vuc);
+  const vsc __builtin_altivec_vcmpgtub (vuc, vuc);
     VCMPGTUB vector_gtuv16qi {}
 
   const int __builtin_altivec_vcmpgtub_p (int, vuc, vuc);
     VCMPGTUB_P vector_gtu_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpgtuh (vus, vus);
+  const vss __builtin_altivec_vcmpgtuh (vus, vus);
     VCMPGTUH vector_gtuv8hi {}
 
   const int __builtin_altivec_vcmpgtuh_p (int, vus, vus);
     VCMPGTUH_P vector_gtu_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpgtuw (vui, vui);
+  const vsi __builtin_altivec_vcmpgtuw (vui, vui);
     VCMPGTUW vector_gtuv4si {}
 
   const int __builtin_altivec_vcmpgtuw_p (int, vui, vui);
@@ -2162,7 +2162,7 @@
   const vsi __builtin_altivec_vclzw (vsi);
     VCLZW clzv4si2 {}
 
-  const vsc __builtin_altivec_vgbbd (vsc);
+  const vuc __builtin_altivec_vgbbd (vuc);
     VGBBD p8v_vgbbd {}
 
   const vsq __builtin_altivec_vaddcuq (vsq, vsq);
@@ -2186,19 +2186,19 @@
   const vuc __builtin_altivec_vbpermq2 (vuc, vuc);
     VBPERMQ2 altivec_vbpermq2 {}
 
-  const vbll __builtin_altivec_vcmpequd (vsll, vsll);
+  const vsll __builtin_altivec_vcmpequd (vull, vull);
     VCMPEQUD vector_eqv2di {}
 
   const int __builtin_altivec_vcmpequd_p (int, vsll, vsll);
     VCMPEQUD_P vector_eq_v2di_p {pred}
 
-  const vbll __builtin_altivec_vcmpgtsd (vsll, vsll);
+  const vsll __builtin_altivec_vcmpgtsd (vsll, vsll);
     VCMPGTSD vector_gtv2di {}
 
   const int __builtin_altivec_vcmpgtsd_p (int, vsll, vsll);
     VCMPGTSD_P vector_gt_v2di_p {pred}
 
-  const vbll __builtin_altivec_vcmpgtud (vull, vull);
+  const vsll __builtin_altivec_vcmpgtud (vull, vull);
     VCMPGTUD vector_gtuv2di {}
 
   const int __builtin_altivec_vcmpgtud_p (vull, vull);
@@ -2540,7 +2540,7 @@
   const signed int __builtin_altivec_vcmpaew_p (vsi, vsi);
     VCMPAEW_P vector_ae_v4si_p {pred}
 
-  const vbc __builtin_altivec_vcmpneb (vsc, vsc);
+  const vsc __builtin_altivec_vcmpneb (vsc, vsc);
     CMPNEB vcmpneb {}
 
   const signed int __builtin_altivec_vcmpneb_p (vsc, vsc);
@@ -2555,31 +2555,31 @@
   const signed int __builtin_altivec_vcmpnefp_p (vf, vf);
     VCMPNEFP_P vector_ne_v4sf_p {pred}
 
-  const vbs __builtin_altivec_vcmpneh (vss, vss);
+  const vss __builtin_altivec_vcmpneh (vss, vss);
     CMPNEH vcmpneh {}
 
   const signed int __builtin_altivec_vcmpneh_p (vss, vss);
     VCMPNEH_P vector_ne_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpnew (vsi, vsi);
+  const vsi __builtin_altivec_vcmpnew (vsi, vsi);
     CMPNEW vcmpnew {}
 
   const signed int __builtin_altivec_vcmpnew_p (vsi, vsi);
     VCMPNEW_P vector_ne_v4si_p {pred}
 
-  const vbc __builtin_altivec_vcmpnezb (vsc, vsc);
+  const vsc __builtin_altivec_vcmpnezb (vsc, vsc);
     CMPNEZB vcmpnezb {}
 
   const signed int __builtin_altivec_vcmpnezb_p (signed int, vsc, vsc);
     VCMPNEZB_P vector_nez_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpnezh (vss, vss);
+  const vss __builtin_altivec_vcmpnezh (vss, vss);
     CMPNEZH vcmpnezh {}
 
   const signed int __builtin_altivec_vcmpnezh_p (signed int, vss, vss);
     VCMPNEZH_P vector_nez_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpnezw (vsi, vsi);
+  const vsi __builtin_altivec_vcmpnezw (vsi, vsi);
     CMPNEZW vcmpnezw {}
 
   const signed int __builtin_altivec_vcmpnezw_p (vsi, vsi);
@@ -3039,7 +3039,7 @@
   const vui __builtin_altivec_mtvsrwm (unsigned long long);
     MTVSRWM vec_mtvsr_v4si {}
 
-  const vuc __builtin_altivec_vcfuged (vuc, vuc);
+  const vull __builtin_altivec_vcfuged (vull, vull);
     VCFUGED vcfuged {}
 
   const vsc __builtin_altivec_vclrlb (vsc, unsigned int);
@@ -3048,10 +3048,10 @@
   const vsc __builtin_altivec_vclrrb (vsc, unsigned int);
     VCLRRB vclrrb {}
 
-  const vuc __builtin_altivec_vclzdm (vuc, vuc);
+  const vull __builtin_altivec_vclzdm (vull, vull);
     VCLZDM vclzdm {}
 
-  const vuc __builtin_altivec_vctzdm (vuc, vuc);
+  const vull __builtin_altivec_vctzdm (vull, vull);
     VCTZDM vctzdm {}
 
   const vuc __builtin_altivec_vexpandmb (vuc);
@@ -3093,19 +3093,19 @@
   const vull __builtin_altivec_vextduwvlx (vui, vui, unsigned char);
     VEXTRACTWL vextractlv4si {}
 
-  const unsigned int __builtin_altivec_vextractmb (vuc);
+  const signed int __builtin_altivec_vextractmb (vuc);
     VEXTRACTMB vec_extract_v16qi {}
 
-  const unsigned int __builtin_altivec_vextractmd (vull);
+  const signed int __builtin_altivec_vextractmd (vull);
     VEXTRACTMD vec_extract_v2di {}
 
-  const unsigned int __builtin_altivec_vextractmh (vus);
+  const signed int __builtin_altivec_vextractmh (vus);
     VEXTRACTMH vec_extract_v8hi {}
 
-  const unsigned int __builtin_altivec_vextractmq (vuq);
+  const signed int __builtin_altivec_vextractmq (vuq);
     VEXTRACTMQ vec_extract_v1ti {}
 
-  const unsigned int __builtin_altivec_vextractmw (vui);
+  const signed int __builtin_altivec_vextractmw (vui);
     VEXTRACTMW vec_extract_v4si {}
 
   const unsigned long long __builtin_altivec_vgnb (vuq, const int <2,7>);
@@ -3153,10 +3153,10 @@
   const vui __builtin_altivec_vinsvuwvrx (vui, vui, unsigned int);
     VINSERTVPRWR vinsertvr_v4si {}
 
-  const vuc __builtin_altivec_vpdepd (vuc, vuc);
+  const vull __builtin_altivec_vpdepd (vull, vull);
     VPDEPD vpdepd {}
 
-  const vuc __builtin_altivec_vpextd (vuc, vuc);
+  const vull __builtin_altivec_vpextd (vull, vull);
     VPEXTD vpextd {}
 
   const vull __builtin_altivec_vreplace_un_uv2di (vull, unsigned long long, const int<4>);
@@ -3360,18 +3360,21 @@
 ; external copy in the built-in table.  This is fragile.  For the
 ; new support, we should transition this to do a name lookup in
 ; the built-in hash table, but to start with we will honor the
-; positioning of the built-ins in the table.
+; positioning of the built-ins in the table.  Note that right now
+; there is going to be breakage with __builtin_mma_disassemble_{acc,pair}
+; since they each require a blank builtin to follow them with icode
+; CODE_FOR_nothing.
 [mma]
   void __builtin_mma_assemble_acc (v512 *, vuc, vuc, vuc, vuc);
     ASSEMBLE_ACC nothing {mma}
 
-  void __builtin_mma_assemble_acc_internal (v512 *, vuc, vuc, vuc, vuc);
+  v512 __builtin_mma_assemble_acc_internal (v512 *, vuc, vuc, vuc, vuc);
     ASSEMBLE_ACC_INTERNAL mma_assemble_acc {mma}
 
   void __builtin_mma_assemble_pair (v256 *, vuc, vuc);
     ASSEMBLE_PAIR nothing {mma}
 
-  void __builtin_mma_assemble_pair_internal (v256 *, vuc, vuc);
+  v256 __builtin_mma_assemble_pair_internal (v256 *, vuc, vuc);
     ASSEMBLE_PAIR_INTERNAL mma_assemble_pair {mma}
 
   void __builtin_mma_disassemble_acc (void *, v512 *);
@@ -3383,367 +3386,367 @@
   void __builtin_mma_pmxvbf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2 nothing {mma}
 
-  void __builtin_mma_pmxvbf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma}
 
   void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NN_INTERNAL mma_pmxvbf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NP_INTERNAL mma_pmxvbf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PN_INTERNAL mma_pmxvbf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PP_INTERNAL mma_pmxvbf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2 nothing {mma}
 
-  void __builtin_mma_pmxvf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2_INTERNAL mma_pmxvf16ger2 {mma}
 
   void __builtin_mma_pmxvf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NN_INTERNAL mma_pmxvf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NP_INTERNAL mma_pmxvf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PN_INTERNAL mma_pmxvf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PP_INTERNAL mma_pmxvf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf32ger (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GER nothing {mma}
 
-  void __builtin_mma_pmxvf32ger_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32ger_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GER_INTERNAL mma_pmxvf32ger {mma}
 
   void __builtin_mma_pmxvf32gernn (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gernn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gernn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNN_INTERNAL mma_pmxvf32gernn {mma,quad}
 
   void __builtin_mma_pmxvf32gernp (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gernp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gernp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNP_INTERNAL mma_pmxvf32gernp {mma,quad}
 
   void __builtin_mma_pmxvf32gerpn (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gerpn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gerpn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPN_INTERNAL mma_pmxvf32gerpn {mma,quad}
 
   void __builtin_mma_pmxvf32gerpp (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gerpp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gerpp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPP_INTERNAL mma_pmxvf32gerpp {mma,quad}
 
   void __builtin_mma_pmxvf64ger (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GER nothing {mma,pair}
 
-  void __builtin_mma_pmxvf64ger_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64ger_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GER_INTERNAL mma_pmxvf64ger {mma,pair}
 
   void __builtin_mma_pmxvf64gernn (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNN nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gernn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gernn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNN_INTERNAL mma_pmxvf64gernn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gernp (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNP nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gernp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gernp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNP_INTERNAL mma_pmxvf64gernp {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpn (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPN nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gerpn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gerpn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPN_INTERNAL mma_pmxvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpp (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPP nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gerpp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gerpp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPP_INTERNAL mma_pmxvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_pmxvi16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2 nothing {mma}
 
-  void __builtin_mma_pmxvi16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2_INTERNAL mma_pmxvi16ger2 {mma}
 
   void __builtin_mma_pmxvi16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2PP_INTERNAL mma_pmxvi16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvi16ger2s (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2S nothing {mma}
 
-  void __builtin_mma_pmxvi16ger2s_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2s_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2S_INTERNAL mma_pmxvi16ger2s {mma}
 
   void __builtin_mma_pmxvi16ger2spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2SPP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi16ger2spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2SPP_INTERNAL mma_pmxvi16ger2spp {mma,quad}
 
   void __builtin_mma_pmxvi4ger8 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
     PMXVI4GER8 nothing {mma}
 
-  void __builtin_mma_pmxvi4ger8_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
+  v512 __builtin_mma_pmxvi4ger8_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
     PMXVI4GER8_INTERNAL mma_pmxvi4ger8 {mma}
 
   void __builtin_mma_pmxvi4ger8pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI4GER8PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi4ger8pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi4ger8pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI4GER8PP_INTERNAL mma_pmxvi4ger8pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4 nothing {mma}
 
-  void __builtin_mma_pmxvi8ger4_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi8ger4_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4_INTERNAL mma_pmxvi8ger4 {mma}
 
   void __builtin_mma_pmxvi8ger4pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi8ger4pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi8ger4pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4PP_INTERNAL mma_pmxvi8ger4pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4SPP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi8ger4spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi8ger4spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4SPP_INTERNAL mma_pmxvi8ger4spp {mma,quad}
 
   void __builtin_mma_xvbf16ger2 (v512 *, vuc, vuc);
     XVBF16GER2 nothing {mma}
 
-  void __builtin_mma_xvbf16ger2_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2_internal (v512 *, vuc, vuc);
     XVBF16GER2_INTERNAL mma_xvbf16ger2 {mma}
 
   void __builtin_mma_xvbf16ger2nn (v512 *, vuc, vuc);
     XVBF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2nn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2nn_internal (v512 *, vuc, vuc);
     XVBF16GER2NN_INTERNAL mma_xvbf16ger2nn {mma,quad}
 
   void __builtin_mma_xvbf16ger2np (v512 *, vuc, vuc);
     XVBF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2np_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2np_internal (v512 *, vuc, vuc);
     XVBF16GER2NP_INTERNAL mma_xvbf16ger2np {mma,quad}
 
   void __builtin_mma_xvbf16ger2pn (v512 *, vuc, vuc);
     XVBF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2pn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2pn_internal (v512 *, vuc, vuc);
     XVBF16GER2PN_INTERNAL mma_xvbf16ger2pn {mma,quad}
 
   void __builtin_mma_xvbf16ger2pp (v512 *, vuc, vuc);
     XVBF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2pp_internal (v512 *, vuc, vuc);
     XVBF16GER2PP_INTERNAL mma_xvbf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf16ger2 (v512 *, vuc, vuc);
     XVF16GER2 nothing {mma}
 
-  void __builtin_mma_xvf16ger2_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2_internal (v512 *, vuc, vuc);
     XVF16GER2_INTERNAL mma_xvf16ger2 {mma}
 
   void __builtin_mma_xvf16ger2nn (v512 *, vuc, vuc);
     XVF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2nn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2nn_internal (v512 *, vuc, vuc);
     XVF16GER2NN_INTERNAL mma_xvf16ger2nn {mma,quad}
 
   void __builtin_mma_xvf16ger2np (v512 *, vuc, vuc);
     XVF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2np_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2np_internal (v512 *, vuc, vuc);
     XVF16GER2NP_INTERNAL mma_xvf16ger2np {mma,quad}
 
   void __builtin_mma_xvf16ger2pn (v512 *, vuc, vuc);
     XVF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2pn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2pn_internal (v512 *, vuc, vuc);
     XVF16GER2PN_INTERNAL mma_xvf16ger2pn {mma,quad}
 
   void __builtin_mma_xvf16ger2pp (v512 *, vuc, vuc);
     XVF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2pp_internal (v512 *, vuc, vuc);
     XVF16GER2PP_INTERNAL mma_xvf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf32ger (v512 *, vuc, vuc);
     XVF32GER nothing {mma}
 
-  void __builtin_mma_xvf32ger_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32ger_internal (v512 *, vuc, vuc);
     XVF32GER_INTERNAL mma_xvf32ger {mma}
 
   void __builtin_mma_xvf32gernn (v512 *, vuc, vuc);
     XVF32GERNN nothing {mma,quad}
 
-  void __builtin_mma_xvf32gernn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gernn_internal (v512 *, vuc, vuc);
     XVF32GERNN_INTERNAL mma_xvf32gernn {mma,quad}
 
   void __builtin_mma_xvf32gernp (v512 *, vuc, vuc);
     XVF32GERNP nothing {mma,quad}
 
-  void __builtin_mma_xvf32gernp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gernp_internal (v512 *, vuc, vuc);
     XVF32GERNP_INTERNAL mma_xvf32gernp {mma,quad}
 
   void __builtin_mma_xvf32gerpn (v512 *, vuc, vuc);
     XVF32GERPN nothing {mma,quad}
 
-  void __builtin_mma_xvf32gerpn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gerpn_internal (v512 *, vuc, vuc);
     XVF32GERPN_INTERNAL mma_xvf32gerpn {mma,quad}
 
   void __builtin_mma_xvf32gerpp (v512 *, vuc, vuc);
     XVF32GERPP nothing {mma,quad}
 
-  void __builtin_mma_xvf32gerpp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gerpp_internal (v512 *, vuc, vuc);
     XVF32GERPP_INTERNAL mma_xvf32gerpp {mma,quad}
 
   void __builtin_mma_xvf64ger (v512 *, v256, vuc);
     XVF64GER nothing {mma,pair}
 
-  void __builtin_mma_xvf64ger_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64ger_internal (v512 *, v256, vuc);
     XVF64GER_INTERNAL mma_xvf64ger {mma,pair}
 
   void __builtin_mma_xvf64gernn (v512 *, v256, vuc);
     XVF64GERNN nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gernn_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gernn_internal (v512 *, v256, vuc);
     XVF64GERNN_INTERNAL mma_xvf64gernn {mma,pair,quad}
 
   void __builtin_mma_xvf64gernp (v512 *, v256, vuc);
     XVF64GERNP nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gernp_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gernp_internal (v512 *, v256, vuc);
     XVF64GERNP_INTERNAL mma_xvf64gernp {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpn (v512 *, v256, vuc);
     XVF64GERPN nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gerpn_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gerpn_internal (v512 *, v256, vuc);
     XVF64GERPN_INTERNAL mma_xvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpp (v512 *, v256, vuc);
     XVF64GERPP nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gerpp_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gerpp_internal (v512 *, v256, vuc);
     XVF64GERPP_INTERNAL mma_xvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_xvi16ger2 (v512 *, vuc, vuc);
     XVI16GER2 nothing {mma}
 
-  void __builtin_mma_xvi16ger2_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2_internal (v512 *, vuc, vuc);
     XVI16GER2_INTERNAL mma_xvi16ger2 {mma}
 
   void __builtin_mma_xvi16ger2pp (v512 *, vuc, vuc);
     XVI16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_xvi16ger2pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2pp_internal (v512 *, vuc, vuc);
     XVI16GER2PP_INTERNAL mma_xvi16ger2pp {mma,quad}
 
   void __builtin_mma_xvi16ger2s (v512 *, vuc, vuc);
     XVI16GER2S nothing {mma}
 
-  void __builtin_mma_xvi16ger2s_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2s_internal (v512 *, vuc, vuc);
     XVI16GER2S_INTERNAL mma_xvi16ger2s {mma}
 
   void __builtin_mma_xvi16ger2spp (v512 *, vuc, vuc);
     XVI16GER2SPP nothing {mma,quad}
 
-  void __builtin_mma_xvi16ger2spp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2spp_internal (v512 *, vuc, vuc);
     XVI16GER2SPP_INTERNAL mma_xvi16ger2spp {mma,quad}
 
   void __builtin_mma_xvi4ger8 (v512 *, vuc, vuc);
     XVI4GER8 nothing {mma}
 
-  void __builtin_mma_xvi4ger8_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi4ger8_internal (v512 *, vuc, vuc);
     XVI4GER8_INTERNAL mma_xvi4ger8 {mma}
 
   void __builtin_mma_xvi4ger8pp (v512 *, vuc, vuc);
     XVI4GER8PP nothing {mma,quad}
 
-  void __builtin_mma_xvi4ger8pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi4ger8pp_internal (v512 *, vuc, vuc);
     XVI4GER8PP_INTERNAL mma_xvi4ger8pp {mma,quad}
 
   void __builtin_mma_xvi8ger4 (v512 *, vuc, vuc);
     XVI8GER4 nothing {mma}
 
-  void __builtin_mma_xvi8ger4_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi8ger4_internal (v512 *, vuc, vuc);
     XVI8GER4_INTERNAL mma_xvi8ger4 {mma}
 
   void __builtin_mma_xvi8ger4pp (v512 *, vuc, vuc);
     XVI8GER4PP nothing {mma,quad}
 
-  void __builtin_mma_xvi8ger4pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi8ger4pp_internal (v512 *, vuc, vuc);
     XVI8GER4PP_INTERNAL mma_xvi8ger4pp {mma,quad}
 
   void __builtin_mma_xvi8ger4spp (v512 *, vuc, vuc);
     XVI8GER4SPP nothing {mma,quad}
 
-  void __builtin_mma_xvi8ger4spp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi8ger4spp_internal (v512 *, vuc, vuc);
     XVI8GER4SPP_INTERNAL mma_xvi8ger4spp {mma,quad}
 
   void __builtin_mma_xxmfacc (v512 *);
     XXMFACC nothing {mma,quad}
 
-  void __builtin_mma_xxmfacc_internal (v512 *);
+  v512 __builtin_mma_xxmfacc_internal (v512 *);
     XXMFACC_INTERNAL mma_xxmfacc {mma,quad}
 
   void __builtin_mma_xxmtacc (v512 *);
     XXMTACC nothing {mma,quad}
 
-  void __builtin_mma_xxmtacc_internal (v512 *);
+  v512 __builtin_mma_xxmtacc_internal (v512 *);
     XXMTACC_INTERNAL mma_xxmtacc {mma,quad}
 
   void __builtin_mma_xxsetaccz (v512 *);
     XXSETACCZ nothing {mma}
 
-  void __builtin_mma_xxsetaccz_internal (v512 *);
+  v512 __builtin_mma_xxsetaccz_internal (v512 *);
     XXSETACCZ_INTERNAL mma_xxsetaccz {mma}


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-10-27 16:30 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-10-27 16:30 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:3be456fa82e73cf20fe8ab02760b0b2087c73944

commit 3be456fa82e73cf20fe8ab02760b0b2087c73944
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Sun Sep 13 11:06:36 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-09-13  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.
            * config/rs6000/rs6000-call.c (altivec_init_builtins): Correct
            name of __builtin_altivec_xst_len_r.

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 549 ++++++++++++++++++++++++++-----
 1 file changed, 475 insertions(+), 74 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index 565c14f9f4c..755399b1843 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -1494,6 +1494,45 @@
   const vf __builtin_vsx_uns_floato_v2di (vull);
     UNS_FLOATO_V2DI unsfloatov2di {}
 
+; I have no idea why we have __builtin_vsx_* duplicates of these when
+; the __builtin_altivec_* counterparts are already present.  Keeping
+; them for compatibility, but...oy.
+  const vsc __builtin_vsx_vperm_16qi (vsc, vsc, vuc);
+    VPERM_16QI_X altivec_vperm_v16qi {}
+
+  const vuc __builtin_vsx_vperm_16qi_uns (vuc, vuc, vuc);
+    VPERM_16QI_UNS_X altivec_vperm_v16qi_uns {}
+
+  const vsq __builtin_vsx_vperm_1ti (vsq, vsq, vuc);
+    VPERM_1TI_X altivec_vperm_v1ti {}
+
+  const vuq __builtin_vsx_vperm_1ti_uns (vuq, vuq, vuc);
+    VPERM_1TI_UNS_X altivec_vperm_v1ti_uns {}
+
+  const vd __builtin_vsx_vperm_2df (vd, vd, vuc);
+    VPERM_2DF_X altivec_vperm_v2df {}
+
+  const vsll __builtin_vsx_vperm_2di (vsll, vsll, vuc);
+    VPERM_2DI_X altivec_vperm_v2di {}
+
+  const vull __builtin_vsx_vperm_2di_uns (vull, vull, vuc);
+    VPERM_2DI_UNS_X altivec_vperm_v2di_uns {}
+
+  const vf __builtin_vsx_vperm_4sf (vf, vf, vuc);
+    VPERM_4SF_X altivec_vperm_v4sf {}
+
+  const vsi __builtin_vsx_vperm_4si (vsi, vsi, vuc);
+    VPERM_4SI_X altivec_vperm_v4si {}
+
+  const vui __builtin_vsx_vperm_4si_uns (vui, vui, vuc);
+    VPERM_4SI_UNS_X altivec_vperm_v4si_uns {}
+
+  const vss __builtin_vsx_vperm_8hi (vss, vss, vuc);
+    VPERM_8HI_X altivec_vperm_v8hi {}
+
+  const vus __builtin_vsx_vperm_8hi_uns (vus, vus, vuc);
+    VPERM_8HI_UNS_X altivec_vperm_v8hi_uns {}
+
   const vsll __builtin_vsx_vsigned_v2df (vd);
     VEC_VSIGNED_V2DF vsx_xvcvdpsxds {}
 
@@ -1617,6 +1656,9 @@
   const vui __builtin_vsx_xvcmpgtsp_p (vf, vf);
     XVCMPGTSP_P vector_gt_v4sf_p {pred}
 
+  const vf __builtin_vsx_xvcvdpsp (vd);
+    XVCVDPSP vsx_xvcvdpsp {}
+
   const vsll __builtin_vsx_xvcvdpsxds (vd);
     XVCVDPSXDS vsx_fix_truncv2dfv2di2 {}
 
@@ -1639,6 +1681,9 @@
   const vull __builtin_vsx_xvcvdpuxws (vd);
     XVCVDPUXWS vsx_xvcvdpuxws {}
 
+  const vd __builtin_vsx_xvcvspdp (vf);
+    XVCVSPDP vsx_xvcvspdp {}
+
   const vsll __builtin_vsx_xvcvspsxds (vf);
     XVCVSPSXDS vsx_xvcvspsxds {}
 
@@ -2687,28 +2732,31 @@
   signed long long __builtin_darn_raw ();
     DARN_RAW darn_raw {}
 
-  const signed int __builtin_dtstsfi_eq_dd (unsigned int, _Decimal64);
+  double __builtin_mffsl ();
+    MFFSL rs6000_mffsl {}
+
+  const signed int __builtin_dfp_dtstsfi_eq_dd (unsigned int, _Decimal64);
     TSTSFI_EQ_DD dfptstsfi_eq_dd {}
 
-  const signed int __builtin_dtstsfi_eq_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_eq_td (unsigned int, _Decimal128);
     TSTSFI_EQ_TD dfptstsfi_eq_td {}
 
-  const signed int __builtin_dtstsfi_gt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dfp_dtstsfi_gt_dd (unsigned int, _Decimal64);
     TSTSFI_GT_DD dfptstsfi_gt_dd {}
 
-  const signed int __builtin_dtstsfi_gt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_gt_td (unsigned int, _Decimal128);
     TSTSFI_GT_TD dfptstsfi_gt_td {}
 
-  const signed int __builtin_dtstsfi_lt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dfp_dtstsfi_lt_dd (unsigned int, _Decimal64);
     TSTSFI_LT_DD dfptstsfi_lt_dd {}
 
-  const signed int __builtin_dtstsfi_lt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_lt_td (unsigned int, _Decimal128);
     TSTSFI_LT_TD dfptstsfi_lt_td {}
 
-  const signed int __builtin_dtstsfi_ov_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dfp_dtstsfi_ov_dd (unsigned int, _Decimal64);
     TSTSFI_OV_DD dfptstsfi_unordered_dd {}
 
-  const signed int __builtin_dtstsfi_ov_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_ov_td (unsigned int, _Decimal128);
     TSTSFI_OV_TD dfptstsfi_unordered_td {}
 
 
@@ -2720,10 +2768,10 @@
 ; 32 bits, and the return value is DImode, so it seems that
 ; TARGET_64BIT (actually TARGET_POWERPC64) is justified.  TBD. ####
 [power9-64]
-; The following two are inexplicably named __builtin_{alti,}vec_* while
+; The following two are inexplicably named __builtin_altivec_* while
 ; their load counterparts are __builtin_vsx_*.  Need to deprecate
 ; these interfaces in favor of the other naming scheme (or vice versa).
-  void __builtin_vec_xst_len_r (vop, void *, unsigned long long);
+  void __builtin_altivec_xst_len_r (vop, void *, unsigned long long);
     XST_LEN_R xst_len_r {}
 
   void __builtin_altivec_stxvl (vop, void *, unsigned long long);
@@ -3063,12 +3111,114 @@
   const unsigned long long __builtin_altivec_vgnb (vuq, const int <2,7>);
     VGNB vgnb {}
 
+  const vuc __builtin_altivec_vinsgubvlx (unsigned char, vuc, unsigned int);
+    VINSERTGPRBL vinsertgl_v16qi {}
+
+  const vuc __builtin_altivec_vinsgubvrx (unsigned char, vuc, unsigned int);
+    VINSERTGPRBR vinsertgr_v16qi {}
+
+  const vull __builtin_altivec_vinsgudvlx (unsigned long long, vull, unsigned int);
+    VINSERTGPRDL vinsertgl_v2di {}
+
+  const vull __builtin_altivec_vinsgudvrx (unsigned long long, vull, unsigned int);
+    VINSERTGPRDR vinsertgr_v2di {}
+
+  const vus __builtin_altivec_vinsguhvlx (unsigned short, vus, unsigned int);
+    VINSERTGPRHL vinsertgl_v8hi {}
+
+  const vus __builtin_altivec_vinsguhvrx (unsigned short, vus, unsigned int);
+    VINSERTGPRHR vinsertgr_v8hi {}
+
+  const vui __builtin_altivec_vinsguwvlx (unsigned int, vui, unsigned int);
+    VINSERTGPRWL vinsertgl_v4si {}
+
+  const vui __builtin_altivec_vinsguwvrx (unsigned int, vui, unsigned int);
+    VINSERTGPRWR vinsertgr_v4si {}
+
+  const vuc __builtin_altivec_vinsvubvlx (vuc, vuc, unsigned int);
+    VINSERTVPRBL vinsertvl_v16qi {}
+
+  const vuc __builtin_altivec_vinsvubvrx (vuc, vuc, unsigned int);
+    VINSERTVPRBR vinsertvr_v16qi {}
+
+  const vus __builtin_altivec_vinsvuhvlx (vus, vus, unsigned int);
+    VINSERTVPRHL vinsertvl_v8hi {}
+
+  const vus __builtin_altivec_vinsvuhvrx (vus, vus, unsigned int);
+    VINSERTVPRHR vinsertvr_v8hi {}
+
+  const vui __builtin_altivec_vinsvuwvlx (vui, vui, unsigned int);
+    VINSERTVPRWL vinsertvl_v4si {}
+
+  const vui __builtin_altivec_vinsvuwvrx (vui, vui, unsigned int);
+    VINSERTVPRWR vinsertvr_v4si {}
+
   const vuc __builtin_altivec_vpdepd (vuc, vuc);
     VPDEPD vpdepd {}
 
   const vuc __builtin_altivec_vpextd (vuc, vuc);
     VPEXTD vpextd {}
 
+  const vull __builtin_altivec_vreplace_un_uv2di (vull, unsigned long long, const int<4>);
+    VREPLACE_UN_UV2DI vreplace_un_v2di {}
+
+  const vui __builtin_altivec_vreplace_un_uv4si (vui, unsigned int, const int<4>);
+    VREPLACE_UN_UV4SI vreplace_un_v4si {}
+
+  const vd __builtin_altivec_vreplace_un_v2df (vd, double, const int<4>);
+    VREPLACE_UN_V2DF vreplace_un_v2df {}
+
+  const vsll __builtin_altivec_vreplace_un_v2di (vsll, unsigned long long, const int<4>);
+    VREPLACE_UN_V2DI vreplace_un_v2di {}
+
+  const vf __builtin_altivec_vreplace_un_v4sf (vf, float, const int<4>);
+    VREPLACE_UN_V4SF vreplace_un_v4sf {}
+
+  const vsi __builtin_altivec_vreplace_un_v4si (vsi, signed int, const int<4>);
+    VREPLACE_UN_V4SI vreplace_un_v4si {}
+
+  const vull __builtin_altivec_vreplace_uv2di (vull, unsigned long long, const int<1>);
+    VREPLACE_ELT_UV2DI vreplace_elt_v2di {}
+
+  const vui __builtin_altivec_vreplace_uv4si (vui, unsigned int, const int<2>);
+    VREPLACE_ELT_UV4SI vreplace_elt_v4si {}
+
+  const vd __builtin_altivec_vreplace_v2df (vd, double, const int<1>);
+    VREPLACE_ELT_V2DF vreplace_elt_v2df {}
+
+  const vsll __builtin_altivec_vreplace_v2di (vsll, unsigned long long, const int<1>);
+    VREPLACE_ELT_V2DI vreplace_elt_v2di {}
+
+  const vf __builtin_altivec_vreplace_v4sf (vf, float, const int<2>);
+    VREPLACE_ELT_V4SF vreplace_elt_v4sf {}
+
+  const vsi __builtin_altivec_vreplace_v4si (vsi, signed int, const int<2>);
+    VREPLACE_ELT_V4SI vreplace_elt_v4si {}
+
+  const vsc __builtin_altivec_vsldb_v16qi (vsc, vsc, const int<3>);
+    VSLDB_V16QI vsldb_v16qi {}
+
+  const vsll __builtin_altivec_vsldb_v2di (vsll, vsll, const int<3>);
+    VSLDB_V2DI vsldb_v2di {}
+
+  const vsi __builtin_altivec_vsldb_v4si (vsi, vsi, const int<3>);
+    VSLDB_V4SI vsldb_v4si {}
+
+  const vss __builtin_altivec_vsldb_v8hi (vss, vss, const int<3>);
+    VSLDB_V8HI vsldb_v8hi {}
+
+  const vsc __builtin_altivec_vsrdb_v16qi (vsc, vsc, const int<3>);
+    VSRDB_V16QI vsrdb_v16qi {}
+
+  const vsll __builtin_altivec_vsrdb_v2di (vsll, vsll, const int<3>);
+    VSRDB_V2DI vsrdb_v2di {}
+
+  const vsi __builtin_altivec_vsrdb_v4si (vsi, vsi, const int<3>);
+    VSRDB_V4SI vsrdb_v4si {}
+
+  const vss __builtin_altivec_vsrdb_v8hi (vss, vss, const int<3>);
+    VSRDB_V8HI vsrdb_v8hi {}
+
   const vuc __builtin_altivec_vstribl (vuc);
     VSTRIBL vstril_v16qi {}
 
@@ -3093,6 +3243,51 @@
   const signed int __builtin_altivec_vstrihr_p (vus);
     VSTRIHR_P vstrir_p_v8hi {}
 
+  const signed int __builtin_vsx_xvtlsbb_all_ones (vuc);
+    XVTLSBB_ONES xvtlsbbo {}
+
+  const signed int __builtin_vsx_xvtlsbb_all_zeros (vuc);
+    XVTLSBB_ZEROS xvtlsbbz {}
+
+  const vf __builtin_vsx_vxxsplti32dx_v4sf (vf, const int<1>, float);
+    VXXSPLTI32DX_V4SF xxsplti32dx_v4sf {}
+
+  const vsi __builtin_vsx_vxxsplti32dx_v4si (vsi, const int<1>, signed int);
+    VXXSPLTI32DX_V4SI xxsplti32dx_v4si {}
+
+  const vd __builtin_vsx_vxxspltidp (float);
+    VXXSPLTIDP xxspltidp_v2df {}
+
+  const vf __builtin_vsx_vxxspltiw_v4sf (float);
+    VXXSPLTIW_V4SF xxspltiw_v4sf {}
+
+  const vsi __builtin_vsx_vxxspltiw_v4si (signed int);
+    VXXSPLTIW_V4SI xxspltiw_v4si {}
+
+  const vuc __builtin_vsx_xvcvbf16spn (vuc);
+    XVCVBF16SPN vsx_xvcvbf16spn {}
+
+  const vuc __builtin_vsx_xvcvspbf16 (vuc);
+    XVCVSPBF16 vsx_xvcvspbf16 {}
+
+  const vsc __builtin_vsx_xxblend_v16qi (vsc, vsc, vuc);
+    VXXBLEND_V16QI xxblend_v16qi {}
+
+  const vd __builtin_vsx_xxblend_v2df (vd, vd, vull);
+    VXXBLEND_V2DF xxblend_v2df {}
+
+  const vsll __builtin_vsx_xxblend_v2di (vsll, vsll, vull);
+    VXXBLEND_V2DI xxblend_v2di {}
+
+  const vf __builtin_vsx_xxblend_v4sf (vf, vf, vui);
+    VXXBLEND_V4SF xxblend_v4sf {}
+
+  const vsi __builtin_vsx_xxblend_v4si (vsi, vsi, vui);
+    VXXBLEND_V4SI xxblend_v4si {}
+
+  const vss __builtin_vsx_xxblend_v8hi (vss, vss, vus);
+    VXXBLEND_V8HI xxblend_v8hi {}
+
   const vop __builtin_vsx_xxeval (vop, vop, vop, const int <8>);
     XXEVAL xxeval {}
 
@@ -3108,11 +3303,37 @@
   const vus __builtin_vsx_xxgenpcvm_v8hi (vus, const int <2>);
     XXGENPCVM_V8HI xxgenpcvm_v8hi {}
 
-  const vuc __builtin_vsx_xvcvbf16spn (vuc);
-    XVCVBF16SPN vsx_xvcvbf16spn {}
+; TODO: This was quite hackish in the original code, and we may need to add
+; mode-specific expansions rather than using CODE_FOR_xxpermx throughout.
+  const vuc __builtin_vsx_xxpermx_uv16qi (vuc, vuc, vuc, const int<3>);
+    XXPERMX_UV16QI xxpermx {}
 
-  const vuc __builtin_vsx_xvcvspbf16 (vuc);
-    XVCVSPBF16 vsx_xvcvspbf16 {}
+  const vull __builtin_vsx_xxpermx_uv2di (vull, vull, vuc, const int<3>);
+    XXPERMX_UV2DI xxpermx {}
+
+  const vui __builtin_vsx_xxpermx_uv4si (vui, vui, vuc, const int<3>);
+    XXPERMX_UV4SI xxpermx {}
+
+  const vus __builtin_vsx_xxpermx_uv8hi (vus, vus, vuc, const int<3>);
+    XXPERMX_UV8HI xxpermx {}
+
+  const vsc __builtin_vsx_xxpermx_v16qi (vsc, vsc, vuc, const int<3>);
+    XXPERMX_V16QI xxpermx {}
+
+  const vd __builtin_vsx_xxpermx_v2df (vd, vd, vuc, const int<3>);
+    XXPERMX_V2DF xxpermx {}
+
+  const vsll __builtin_vsx_xxpermx_v2di (vsll, vsll, vuc, const int<3>);
+    XXPERMX_V2DI xxpermx {}
+
+  const vf __builtin_vsx_xxpermx_v4sf (vf, vf, vuc, const int<3>);
+    XXPERMX_V4SF xxpermx {}
+
+  const vsi __builtin_vsx_xxpermx_v4si (vsi, vsi, vuc, const int<3>);
+    XXPERMX_V4SI xxpermx {}
+
+  const vss __builtin_vsx_xxpermx_v8hi (vss, vss, vuc, const int<3>);
+    XXPERMX_V8HI xxpermx {}
 
 
 [power10-64]
@@ -3166,184 +3387,364 @@
     PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma}
 
   void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2NN mma_pmxvbf16ger2nn {mma,quad}
+    PMXVBF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2NN_INTERNAL mma_pmxvbf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2NP mma_pmxvbf16ger2np {mma,quad}
+    PMXVBF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2NP_INTERNAL mma_pmxvbf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2PN mma_pmxvbf16ger2pn {mma,quad}
+    PMXVBF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2PN_INTERNAL mma_pmxvbf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2PP mma_pmxvbf16ger2pp {mma,quad}
+    PMXVBF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2PP_INTERNAL mma_pmxvbf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2 mma_pmxvf16ger2 {mma}
+    PMXVF16GER2 nothing {mma}
+
+  void __builtin_mma_pmxvf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2_INTERNAL mma_pmxvf16ger2 {mma}
 
   void __builtin_mma_pmxvf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2NN mma_pmxvf16ger2nn {mma,quad}
+    PMXVF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2NN_INTERNAL mma_pmxvf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2NP mma_pmxvf16ger2np {mma,quad}
+    PMXVF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2NP_INTERNAL mma_pmxvf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2PN mma_pmxvf16ger2pn {mma,quad}
+    PMXVF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2PN_INTERNAL mma_pmxvf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2PP mma_pmxvf16ger2pp {mma,quad}
+    PMXVF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2PP_INTERNAL mma_pmxvf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf32ger (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GER mma_pmxvf32ger {mma}
+    PMXVF32GER nothing {mma}
+
+  void __builtin_mma_pmxvf32ger_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GER_INTERNAL mma_pmxvf32ger {mma}
 
   void __builtin_mma_pmxvf32gernn (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERNN mma_pmxvf32gernn {mma,quad}
+    PMXVF32GERNN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gernn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERNN_INTERNAL mma_pmxvf32gernn {mma,quad}
 
   void __builtin_mma_pmxvf32gernp (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERNP mma_pmxvf32gernp {mma,quad}
+    PMXVF32GERNP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gernp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERNP_INTERNAL mma_pmxvf32gernp {mma,quad}
 
   void __builtin_mma_pmxvf32gerpn (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERPN mma_pmxvf32gerpn {mma,quad}
+    PMXVF32GERPN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gerpn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERPN_INTERNAL mma_pmxvf32gerpn {mma,quad}
 
   void __builtin_mma_pmxvf32gerpp (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERPP mma_pmxvf32gerpp {mma,quad}
+    PMXVF32GERPP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gerpp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERPP_INTERNAL mma_pmxvf32gerpp {mma,quad}
 
   void __builtin_mma_pmxvf64ger (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GER mma_pmxvf64ger {mma,pair}
+    PMXVF64GER nothing {mma,pair}
+
+  void __builtin_mma_pmxvf64ger_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GER_INTERNAL mma_pmxvf64ger {mma,pair}
 
   void __builtin_mma_pmxvf64gernn (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERNN mma_pmxvf64gernn {mma,pair,quad}
+    PMXVF64GERNN nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gernn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERNN_INTERNAL mma_pmxvf64gernn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gernp (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERNP mma_pmxvf64gernp {mma,pair,quad}
+    PMXVF64GERNP nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gernp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERNP_INTERNAL mma_pmxvf64gernp {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpn (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERPN mma_pmxvf64gerpn {mma,pair,quad}
+    PMXVF64GERPN nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gerpn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERPN_INTERNAL mma_pmxvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpp (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERPP mma_pmxvf64gerpp {mma,pair,quad}
+    PMXVF64GERPP nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gerpp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERPP_INTERNAL mma_pmxvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_pmxvi16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2 mma_pmxvi16ger2 {mma}
+    PMXVI16GER2 nothing {mma}
+
+  void __builtin_mma_pmxvi16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2_INTERNAL mma_pmxvi16ger2 {mma}
 
   void __builtin_mma_pmxvi16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2PP mma_pmxvi16ger2pp {mma,quad}
+    PMXVI16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2PP_INTERNAL mma_pmxvi16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvi16ger2s (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2S mma_pmxvi16ger2s {mma}
+    PMXVI16GER2S nothing {mma}
+
+  void __builtin_mma_pmxvi16ger2s_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2S_INTERNAL mma_pmxvi16ger2s {mma}
 
   void __builtin_mma_pmxvi16ger2spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2SPP mma_pmxvi16ger2spp {mma,quad}
+    PMXVI16GER2SPP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi16ger2spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2SPP_INTERNAL mma_pmxvi16ger2spp {mma,quad}
 
   void __builtin_mma_pmxvi4ger8 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
-    PMXVI4GER8 mma_pmxvi4ger8 {mma}
+    PMXVI4GER8 nothing {mma}
+
+  void __builtin_mma_pmxvi4ger8_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
+    PMXVI4GER8_INTERNAL mma_pmxvi4ger8 {mma}
 
   void __builtin_mma_pmxvi4ger8pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI4GER8PP mma_pmxvi4ger8pp {mma,quad}
+    PMXVI4GER8PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi4ger8pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI4GER8PP_INTERNAL mma_pmxvi4ger8pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI8GER4 mma_pmxvi8ger4 {mma}
+    PMXVI8GER4 nothing {mma}
+
+  void __builtin_mma_pmxvi8ger4_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI8GER4_INTERNAL mma_pmxvi8ger4 {mma}
 
   void __builtin_mma_pmxvi8ger4pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI8GER4PP mma_pmxvi8ger4pp {mma,quad}
+    PMXVI8GER4PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi8ger4pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI8GER4PP_INTERNAL mma_pmxvi8ger4pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI8GER4SPP mma_pmxvi8ger4spp {mma,quad}
+    PMXVI8GER4SPP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi8ger4spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI8GER4SPP_INTERNAL mma_pmxvi8ger4spp {mma,quad}
 
   void __builtin_mma_xvbf16ger2 (v512 *, vuc, vuc);
-    XVBF16GER2 mma_xvbf16ger2 {mma}
+    XVBF16GER2 nothing {mma}
+
+  void __builtin_mma_xvbf16ger2_internal (v512 *, vuc, vuc);
+    XVBF16GER2_INTERNAL mma_xvbf16ger2 {mma}
 
   void __builtin_mma_xvbf16ger2nn (v512 *, vuc, vuc);
-    XVBF16GER2NN mma_xvbf16ger2nn {mma,quad}
+    XVBF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2nn_internal (v512 *, vuc, vuc);
+    XVBF16GER2NN_INTERNAL mma_xvbf16ger2nn {mma,quad}
 
   void __builtin_mma_xvbf16ger2np (v512 *, vuc, vuc);
-    XVBF16GER2NP mma_xvbf16ger2np {mma,quad}
+    XVBF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2np_internal (v512 *, vuc, vuc);
+    XVBF16GER2NP_INTERNAL mma_xvbf16ger2np {mma,quad}
 
   void __builtin_mma_xvbf16ger2pn (v512 *, vuc, vuc);
-    XVBF16GER2PN mma_xvbf16ger2pn {mma,quad}
+    XVBF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2pn_internal (v512 *, vuc, vuc);
+    XVBF16GER2PN_INTERNAL mma_xvbf16ger2pn {mma,quad}
 
   void __builtin_mma_xvbf16ger2pp (v512 *, vuc, vuc);
-    XVBF16GER2PP mma_xvbf16ger2pp {mma,quad}
+    XVBF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2pp_internal (v512 *, vuc, vuc);
+    XVBF16GER2PP_INTERNAL mma_xvbf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf16ger2 (v512 *, vuc, vuc);
-    XVF16GER2 mma_xvf16ger2 {mma}
+    XVF16GER2 nothing {mma}
+
+  void __builtin_mma_xvf16ger2_internal (v512 *, vuc, vuc);
+    XVF16GER2_INTERNAL mma_xvf16ger2 {mma}
 
   void __builtin_mma_xvf16ger2nn (v512 *, vuc, vuc);
-    XVF16GER2NN mma_xvf16ger2nn {mma,quad}
+    XVF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2nn_internal (v512 *, vuc, vuc);
+    XVF16GER2NN_INTERNAL mma_xvf16ger2nn {mma,quad}
 
   void __builtin_mma_xvf16ger2np (v512 *, vuc, vuc);
-    XVF16GER2NP mma_xvf16ger2np {mma,quad}
+    XVF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2np_internal (v512 *, vuc, vuc);
+    XVF16GER2NP_INTERNAL mma_xvf16ger2np {mma,quad}
 
   void __builtin_mma_xvf16ger2pn (v512 *, vuc, vuc);
-    XVF16GER2PN mma_xvf16ger2pn {mma,quad}
+    XVF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2pn_internal (v512 *, vuc, vuc);
+    XVF16GER2PN_INTERNAL mma_xvf16ger2pn {mma,quad}
 
   void __builtin_mma_xvf16ger2pp (v512 *, vuc, vuc);
-    XVF16GER2PP mma_xvf16ger2pp {mma,quad}
+    XVF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2pp_internal (v512 *, vuc, vuc);
+    XVF16GER2PP_INTERNAL mma_xvf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf32ger (v512 *, vuc, vuc);
-    XVF32GER mma_xvf32ger {mma}
+    XVF32GER nothing {mma}
+
+  void __builtin_mma_xvf32ger_internal (v512 *, vuc, vuc);
+    XVF32GER_INTERNAL mma_xvf32ger {mma}
 
   void __builtin_mma_xvf32gernn (v512 *, vuc, vuc);
-    XVF32GERNN mma_xvf32gernn {mma,quad}
+    XVF32GERNN nothing {mma,quad}
+
+  void __builtin_mma_xvf32gernn_internal (v512 *, vuc, vuc);
+    XVF32GERNN_INTERNAL mma_xvf32gernn {mma,quad}
 
   void __builtin_mma_xvf32gernp (v512 *, vuc, vuc);
-    XVF32GERNP mma_xvf32gernp {mma,quad}
+    XVF32GERNP nothing {mma,quad}
+
+  void __builtin_mma_xvf32gernp_internal (v512 *, vuc, vuc);
+    XVF32GERNP_INTERNAL mma_xvf32gernp {mma,quad}
 
   void __builtin_mma_xvf32gerpn (v512 *, vuc, vuc);
-    XVF32GERPN mma_xvf32gerpn {mma,quad}
+    XVF32GERPN nothing {mma,quad}
+
+  void __builtin_mma_xvf32gerpn_internal (v512 *, vuc, vuc);
+    XVF32GERPN_INTERNAL mma_xvf32gerpn {mma,quad}
 
   void __builtin_mma_xvf32gerpp (v512 *, vuc, vuc);
-    XVF32GERPP mma_xvf32gerpp {mma,quad}
+    XVF32GERPP nothing {mma,quad}
+
+  void __builtin_mma_xvf32gerpp_internal (v512 *, vuc, vuc);
+    XVF32GERPP_INTERNAL mma_xvf32gerpp {mma,quad}
 
   void __builtin_mma_xvf64ger (v512 *, v256, vuc);
-    XVF64GER mma_xvf64ger {mma,pair}
+    XVF64GER nothing {mma,pair}
+
+  void __builtin_mma_xvf64ger_internal (v512 *, v256, vuc);
+    XVF64GER_INTERNAL mma_xvf64ger {mma,pair}
 
   void __builtin_mma_xvf64gernn (v512 *, v256, vuc);
-    XVF64GERNN mma_xvf64gernn {mma,pair,quad}
+    XVF64GERNN nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gernn_internal (v512 *, v256, vuc);
+    XVF64GERNN_INTERNAL mma_xvf64gernn {mma,pair,quad}
 
   void __builtin_mma_xvf64gernp (v512 *, v256, vuc);
-    XVF64GERNP mma_xvf64gernp {mma,pair,quad}
+    XVF64GERNP nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gernp_internal (v512 *, v256, vuc);
+    XVF64GERNP_INTERNAL mma_xvf64gernp {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpn (v512 *, v256, vuc);
-    XVF64GERPN mma_xvf64gerpn {mma,pair,quad}
+    XVF64GERPN nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gerpn_internal (v512 *, v256, vuc);
+    XVF64GERPN_INTERNAL mma_xvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpp (v512 *, v256, vuc);
-    XVF64GERPP mma_xvf64gerpp {mma,pair,quad}
+    XVF64GERPP nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gerpp_internal (v512 *, v256, vuc);
+    XVF64GERPP_INTERNAL mma_xvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_xvi16ger2 (v512 *, vuc, vuc);
-    XVI16GER2 mma_xvi16ger2 {mma}
+    XVI16GER2 nothing {mma}
+
+  void __builtin_mma_xvi16ger2_internal (v512 *, vuc, vuc);
+    XVI16GER2_INTERNAL mma_xvi16ger2 {mma}
 
   void __builtin_mma_xvi16ger2pp (v512 *, vuc, vuc);
-    XVI16GER2PP mma_xvi16ger2pp {mma,quad}
+    XVI16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_xvi16ger2pp_internal (v512 *, vuc, vuc);
+    XVI16GER2PP_INTERNAL mma_xvi16ger2pp {mma,quad}
 
   void __builtin_mma_xvi16ger2s (v512 *, vuc, vuc);
-    XVI16GER2S mma_xvi16ger2s {mma}
+    XVI16GER2S nothing {mma}
+
+  void __builtin_mma_xvi16ger2s_internal (v512 *, vuc, vuc);
+    XVI16GER2S_INTERNAL mma_xvi16ger2s {mma}
 
   void __builtin_mma_xvi16ger2spp (v512 *, vuc, vuc);
-    XVI16GER2SPP mma_xvi16ger2spp {mma,quad}
+    XVI16GER2SPP nothing {mma,quad}
+
+  void __builtin_mma_xvi16ger2spp_internal (v512 *, vuc, vuc);
+    XVI16GER2SPP_INTERNAL mma_xvi16ger2spp {mma,quad}
 
   void __builtin_mma_xvi4ger8 (v512 *, vuc, vuc);
-    XVI4GER8 mma_xvi4ger8 {mma}
+    XVI4GER8 nothing {mma}
+
+  void __builtin_mma_xvi4ger8_internal (v512 *, vuc, vuc);
+    XVI4GER8_INTERNAL mma_xvi4ger8 {mma}
 
   void __builtin_mma_xvi4ger8pp (v512 *, vuc, vuc);
-    XVI4GER8PP mma_xvi4ger8pp {mma,quad}
+    XVI4GER8PP nothing {mma,quad}
+
+  void __builtin_mma_xvi4ger8pp_internal (v512 *, vuc, vuc);
+    XVI4GER8PP_INTERNAL mma_xvi4ger8pp {mma,quad}
 
   void __builtin_mma_xvi8ger4 (v512 *, vuc, vuc);
-    XVI8GER4 mma_xvi8ger4 {mma}
+    XVI8GER4 nothing {mma}
+
+  void __builtin_mma_xvi8ger4_internal (v512 *, vuc, vuc);
+    XVI8GER4_INTERNAL mma_xvi8ger4 {mma}
 
   void __builtin_mma_xvi8ger4pp (v512 *, vuc, vuc);
-    XVI8GER4PP mma_xvi8ger4pp {mma,quad}
+    XVI8GER4PP nothing {mma,quad}
+
+  void __builtin_mma_xvi8ger4pp_internal (v512 *, vuc, vuc);
+    XVI8GER4PP_INTERNAL mma_xvi8ger4pp {mma,quad}
 
   void __builtin_mma_xvi8ger4spp (v512 *, vuc, vuc);
-    XVI8GER4SPP mma_xvi8ger4spp {mma,quad}
+    XVI8GER4SPP nothing {mma,quad}
+
+  void __builtin_mma_xvi8ger4spp_internal (v512 *, vuc, vuc);
+    XVI8GER4SPP_INTERNAL mma_xvi8ger4spp {mma,quad}
 
   void __builtin_mma_xxmfacc (v512 *);
-    XXMFACC mma_xxmfacc {mma,quad}
+    XXMFACC nothing {mma,quad}
+
+  void __builtin_mma_xxmfacc_internal (v512 *);
+    XXMFACC_INTERNAL mma_xxmfacc {mma,quad}
 
   void __builtin_mma_xxmtacc (v512 *);
-    XXMTACC mma_xxmtacc {mma,quad}
+    XXMTACC nothing {mma,quad}
+
+  void __builtin_mma_xxmtacc_internal (v512 *);
+    XXMTACC_INTERNAL mma_xxmtacc {mma,quad}
 
   void __builtin_mma_xxsetaccz (v512 *);
-    XXSETACCZ mma_xxsetaccz {mma}
+    XXSETACCZ nothing {mma}
+
+  void __builtin_mma_xxsetaccz_internal (v512 *);
+    XXSETACCZ_INTERNAL mma_xxsetaccz {mma}


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-10-27 16:30 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-10-27 16:30 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4df134cdcf2c01b227f04bdc085f89e3c26ae42b

commit 4df134cdcf2c01b227f04bdc085f89e3c26ae42b
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Sun Aug 30 10:28:28 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-08-30  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 127 ++++++++++++++++++-------------
 1 file changed, 72 insertions(+), 55 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index c0f8821be64..565c14f9f4c 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -321,9 +321,7 @@
   pure vuc __builtin_altivec_lvsr (signed long long, void *);
     LVSR altivec_lvsr {ldvec}
 
-; Following LVX one is redundant, and I don't think we need to
-; keep it.  It only maps to LVX_V4SI.  Probably remove.
-  pure vop __builtin_altivec_lvx (signed long long, void *);
+  pure vsi __builtin_altivec_lvx (signed long long, void *);
     LVX altivec_lvx_v4si {ldvec}
 
   pure vsc __builtin_altivec_lvx_v16qi (signed long long, void *);
@@ -395,7 +393,8 @@
   void __builtin_altivec_stvrxl (vop, signed long long, void *);
     STVRXL altivec_stvrxl {stvec}
 
-; Skipping the STVX one that maps to STVX_V4SI (see above for LVX)
+  void __builtin_altivec_stvx (vsi, signed long long, void *);
+    STVX altivec_stvx_v4si {stvec}
 
   void __builtin_altivec_stvx_v16qi (vsc, signed long long, void *);
     STVX_V16QI altivec_stvx_v16qi {stvec}
@@ -409,7 +408,8 @@
   void __builtin_altivec_stvx_v8hi (vss, signed long long, void *);
     STVX_V8HI altivec_stvx_v8hi {stvec}
 
-; Skipping the STVXL one that maps to STVXL_V4SI (see above for LVX)
+  void __builtin_altivec_stvxl (vsi, signed long long, void *);
+    STVXL altivec_stvxl_v4si {stvec}
 
   void __builtin_altivec_stvxl_v16qi (vsc, signed long long, void *);
     STVXL_V16QI altivec_stvxl_v16qi {stvec}
@@ -1206,6 +1206,33 @@
   const vull __builtin_altivec_vxor_v2di_uns (vull, vull);
     VXOR_V2DI_UNS xorv2di3 {}
 
+  const signed __int128 __builtin_vec_ext_v1ti (vsq, signed int);
+    VEC_EXT_V1TI nothing {extract}
+
+  const double __builtin_vec_ext_v2df (vd, signed int);
+    VEC_EXT_V2DF nothing {extract}
+
+  const signed long long __builtin_vec_ext_v2di (vsll, signed int);
+    VEC_EXT_V2DI nothing {extract}
+
+  const vsq __builtin_vec_init_v1ti (signed __int128);
+    VEC_INIT_V1TI nothing {init}
+
+  const vd __builtin_vec_init_v2df (double, double);
+    VEC_INIT_V2DF nothing {init}
+
+  const vsll __builtin_vec_init_v2di (signed long long, signed long long);
+    VEC_INIT_V2DI nothing {init}
+
+  const vsq __builtin_vec_set_v1ti (vsq, signed __int128, const int<0,0>);
+    VEC_SET_V1TI nothing {set}
+
+  const vd __builtin_vec_set_v2df (vd, double, const int<1>);
+    VEC_SET_V2DF nothing {set}
+
+  const vsll __builtin_vec_set_v2di (vsll, signed long long, const int<1>);
+    VEC_SET_V2DI nothing {set}
+
   const vbc __builtin_vsx_cmpge_16qi (vsc, vsc);
     CMPGE_16QI vector_nltv16qi {}
 
@@ -1348,7 +1375,7 @@
   pure vsll __builtin_vsx_lxvd2x_v2di (signed long long, void *);
     LXVD2X_V2DI vsx_load_v2di {ldvec}
 
-  pure vsc __builtin_vsx_lxvw4x_16qi (signed long long, void *);
+  pure vsc __builtin_vsx_lxvw4x_v16qi (signed long long, void *);
     LXVW4X_V16QI vsx_load_v16qi {ldvec}
 
   pure vf __builtin_vsx_lxvw4x_v4sf (signed long long, void *);
@@ -1467,33 +1494,6 @@
   const vf __builtin_vsx_uns_floato_v2di (vull);
     UNS_FLOATO_V2DI unsfloatov2di {}
 
-  const signed __int128 __builtin_vsx_vec_ext_v1ti (vsq, signed int);
-    VEC_EXT_V1TI nothing {extract}
-
-  const double __builtin_vsx_vec_ext_v2df (vd, signed int);
-    VEC_EXT_V2DF nothing {extract}
-
-  const signed long long __builtin_vsx_vec_ext_v2di (vsll, signed int);
-    VEC_EXT_V2DI nothing {extract}
-
-  const vsq __builtin_vsx_vec_init_v1ti (signed __int128);
-    VEC_INIT_V1TI nothing {init}
-
-  const vd __builtin_vsx_vec_init_v2df (double, double);
-    VEC_INIT_V2DF nothing {init}
-
-  const vsll __builtin_vsx_vec_init_v2di (signed long long, signed long long);
-    VEC_INIT_V2DI nothing {init}
-
-  const vsq __builtin_vsx_vec_set_v1ti (vsq, signed __int128, const int<0,0>);
-    VEC_SET_V1TI nothing {set}
-
-  const vd __builtin_vsx_vec_set_v2df (vd, double, const int<1>);
-    VEC_SET_V2DF nothing {set}
-
-  const vsll __builtin_vsx_vec_set_v2di (vsll, signed long long, const int<1>);
-    VEC_SET_V2DI nothing {set}
-
   const vsll __builtin_vsx_vsigned_v2df (vd);
     VEC_VSIGNED_V2DF vsx_xvcvdpsxds {}
 
@@ -2753,24 +2753,18 @@
 
 ; Builtins requiring hardware support for IEEE-128 floating-point.
 [ieee128-hw]
-  fpmath _Float128 __builtin_vsx_addf128_round_to_odd (_Float128, _Float128);
+  fpmath _Float128 __builtin_addf128_round_to_odd (_Float128, _Float128);
     ADDF128_ODD addkf3_odd {}
 
-  fpmath _Float128 __builtin_vsx_divf128_round_to_odd (_Float128, _Float128);
+  fpmath _Float128 __builtin_divf128_round_to_odd (_Float128, _Float128);
     DIVF128_ODD divkf3_odd {}
 
-  fpmath _Float128 __builtin_vsx_fmaf128_round_to_odd (_Float128, _Float128, _Float128);
+  fpmath _Float128 __builtin_fmaf128_round_to_odd (_Float128, _Float128, _Float128);
     FMAF128_ODD fmakf4_odd {}
 
-  fpmath _Float128 __builtin_vsx_mulf128_round_to_odd (_Float128, _Float128);
+  fpmath _Float128 __builtin_mulf128_round_to_odd (_Float128, _Float128);
     MULF128_ODD mulkf3_odd {}
 
-  const unsigned long long __builtin_vsx_scalar_extract_expq (_Float128);
-    VSEEQP xsxexpqp_kf {}
-
-  const unsigned __int128 __builtin_vsx_scalar_extract_sigq (_Float128);
-    VSESQP xsxsigqp_kf {}
-
   const signed int __builtin_vsx_scalar_cmp_exp_qp_eq (_Float128, _Float128);
     VSCEQPEQ xscmpexpqp_eq_kf {}
 
@@ -2783,6 +2777,21 @@
   const signed int __builtin_vsx_scalar_cmp_exp_qp_unordered (_Float128, _Float128);
     VSCEQPUO xscmpexpqp_unordered_kf {}
 
+  fpmath _Float128 __builtin_sqrtf128_round_to_odd (_Float128);
+    SQRTF128_ODD sqrtkf2_odd {}
+
+  fpmath _Float128 __builtin_subf128_round_to_odd (_Float128, _Float128);
+    SUBF128_ODD subkf3_odd {}
+
+  fpmath double __builtin_truncf128_round_to_odd (_Float128);
+    TRUNCF128_ODD trunckfdf2_odd {}
+
+  const unsigned long long __builtin_vsx_scalar_extract_expq (_Float128);
+    VSEEQP xsxexpqp_kf {}
+
+  const unsigned __int128 __builtin_vsx_scalar_extract_sigq (_Float128);
+    VSESQP xsxsigqp_kf {}
+
   const _Float128 __builtin_vsx_scalar_insert_exp_q (unsigned __int128, unsigned long long);
     VSIEQP xsiexpqp_kf {}
 
@@ -2795,15 +2804,6 @@
   const unsigned int __builtin_vsx_scalar_test_neg_qp (_Float128);
     VSTDCNQP xststdcnegqp_kf {}
 
-  fpmath _Float128 __builtin_vsx_sqrtf128_round_to_odd (_Float128);
-    SQRTF128_ODD sqrtkf2_odd {}
-
-  fpmath _Float128 __builtin_vsx_subf128_round_to_odd (_Float128, _Float128);
-    SUBF128_ODD subkf3_odd {}
-
-  fpmath double __builtin_vsx_truncf128_round_to_odd (_Float128);
-    TRUNCF128_ODD trunckfdf2_odd {}
-
 
 
 ; Decimal floating-point builtins.
@@ -3132,12 +3132,26 @@
     PEXTD pextd {}
 
 
+; TODO: Land-mine alert.
+;
+; The original built-in support has code that assumes the internal
+; copy of an MMA built-in function appears immediately after the
+; external copy in the built-in table.  This is fragile.  For the
+; new support, we should transition this to do a name lookup in
+; the built-in hash table, but to start with we will honor the
+; positioning of the built-ins in the table.
 [mma]
   void __builtin_mma_assemble_acc (v512 *, vuc, vuc, vuc, vuc);
-    ASSEMBLE_ACC mma_assemble_acc {mma}
+    ASSEMBLE_ACC nothing {mma}
+
+  void __builtin_mma_assemble_acc_internal (v512 *, vuc, vuc, vuc, vuc);
+    ASSEMBLE_ACC_INTERNAL mma_assemble_acc {mma}
 
   void __builtin_mma_assemble_pair (v256 *, vuc, vuc);
-    ASSEMBLE_PAIR mma_assemble_pair {mma}
+    ASSEMBLE_PAIR nothing {mma}
+
+  void __builtin_mma_assemble_pair_internal (v256 *, vuc, vuc);
+    ASSEMBLE_PAIR_INTERNAL mma_assemble_pair {mma}
 
   void __builtin_mma_disassemble_acc (void *, v512 *);
     DISASSEMBLE_ACC nothing {mma,quad}
@@ -3146,7 +3160,10 @@
     DISASSEMBLE_PAIR nothing {mma,pair}
 
   void __builtin_mma_pmxvbf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2 mma_pmxvbf16ger2 {mma}
+    PMXVBF16GER2 nothing {mma}
+
+  void __builtin_mma_pmxvbf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma}
 
   void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NN mma_pmxvbf16ger2nn {mma,quad}


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-09-16 21:32 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-09-16 21:32 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:0b9771748ddffedab4b1b0a6a7bfad94551f835c

commit 0b9771748ddffedab4b1b0a6a7bfad94551f835c
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Sun Sep 13 18:21:20 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-09-13  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 195 ++++++++++++++++---------------
 1 file changed, 99 insertions(+), 96 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index 755399b1843..6b66908d7c7 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -531,73 +531,73 @@
   const int __builtin_altivec_vcmpbfp_p (int, vf, vf);
     VCMPBFP_P altivec_vcmpbfp_p {pred}
 
-  const vbi __builtin_altivec_vcmpeqfp (vf, vf);
+  const vf __builtin_altivec_vcmpeqfp (vf, vf);
     VCMPEQFP vector_eqv4sf {}
 
   const int __builtin_altivec_vcmpeqfp_p (int, vf, vf);
     VCMPEQFP_P vector_eq_v4sf_p {pred}
 
-  const vbc __builtin_altivec_vcmpequb (vuc, vuc);
+  const vsc __builtin_altivec_vcmpequb (vuc, vuc);
     VCMPEQUB vector_eqv16qi {}
 
   const int __builtin_altivec_vcmpequb_p (int, vuc, vuc);
     VCMPEQUB_P vector_eq_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpequh (vus, vus);
+  const vss __builtin_altivec_vcmpequh (vus, vus);
     VCMPEQUH vector_eqv8hi {}
 
   const int __builtin_altivec_vcmpequh_p (int, vus, vus);
     VCMPEQUH_P vector_eq_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpequw (vui, vui);
+  const vsi __builtin_altivec_vcmpequw (vui, vui);
     VCMPEQUW vector_eqv4si {}
 
   const int __builtin_altivec_vcmpequw_p (int, vui, vui);
     VCMPEQUW_P vector_eq_v4si_p {pred}
 
-  const vbi __builtin_altivec_vcmpgefp (vf, vf);
+  const vf __builtin_altivec_vcmpgefp (vf, vf);
     VCMPGEFP vector_gev4sf {}
 
   const int __builtin_altivec_vcmpgefp_p (int, vf, vf);
     VCMPGEFP_P vector_ge_v4sf_p {pred}
 
-  const vbi __builtin_altivec_vcmpgtfp (vf, vf);
+  const vf __builtin_altivec_vcmpgtfp (vf, vf);
     VCMPGTFP vector_gtv4sf {}
 
   const int __builtin_altivec_vcmpgtfp_p (int, vf, vf);
     VCMPGTFP_P vector_gt_v4sf_p {pred}
 
-  const vbc __builtin_altivec_vcmpgtsb (vsc, vsc);
+  const vsc __builtin_altivec_vcmpgtsb (vsc, vsc);
     VCMPGTSB vector_gtv16qi {}
 
   const int __builtin_altivec_vcmpgtsb_p (int, vsc, vsc);
     VCMPGTSB_P vector_gt_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpgtsh (vss, vss);
+  const vss __builtin_altivec_vcmpgtsh (vss, vss);
     VCMPGTSH vector_gtv8hi {}
 
   const int __builtin_altivec_vcmpgtsh_p (int, vss, vss);
     VCMPGTSH_P vector_gt_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpgtsw (vsi, vsi);
+  const vsi __builtin_altivec_vcmpgtsw (vsi, vsi);
     VCMPGTSW vector_gtv4si {}
 
   const int __builtin_altivec_vcmpgtsw_p (int, vsi, vsi);
     VCMPGTSW_P vector_gt_v4si_p {pred}
 
-  const vbc __builtin_altivec_vcmpgtub (vuc, vuc);
+  const vsc __builtin_altivec_vcmpgtub (vuc, vuc);
     VCMPGTUB vector_gtuv16qi {}
 
   const int __builtin_altivec_vcmpgtub_p (int, vuc, vuc);
     VCMPGTUB_P vector_gtu_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpgtuh (vus, vus);
+  const vss __builtin_altivec_vcmpgtuh (vus, vus);
     VCMPGTUH vector_gtuv8hi {}
 
   const int __builtin_altivec_vcmpgtuh_p (int, vus, vus);
     VCMPGTUH_P vector_gtu_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpgtuw (vui, vui);
+  const vsi __builtin_altivec_vcmpgtuw (vui, vui);
     VCMPGTUW vector_gtuv4si {}
 
   const int __builtin_altivec_vcmpgtuw_p (int, vui, vui);
@@ -2162,7 +2162,7 @@
   const vsi __builtin_altivec_vclzw (vsi);
     VCLZW clzv4si2 {}
 
-  const vsc __builtin_altivec_vgbbd (vsc);
+  const vuc __builtin_altivec_vgbbd (vuc);
     VGBBD p8v_vgbbd {}
 
   const vsq __builtin_altivec_vaddcuq (vsq, vsq);
@@ -2186,19 +2186,19 @@
   const vuc __builtin_altivec_vbpermq2 (vuc, vuc);
     VBPERMQ2 altivec_vbpermq2 {}
 
-  const vbll __builtin_altivec_vcmpequd (vsll, vsll);
+  const vsll __builtin_altivec_vcmpequd (vull, vull);
     VCMPEQUD vector_eqv2di {}
 
   const int __builtin_altivec_vcmpequd_p (int, vsll, vsll);
     VCMPEQUD_P vector_eq_v2di_p {pred}
 
-  const vbll __builtin_altivec_vcmpgtsd (vsll, vsll);
+  const vsll __builtin_altivec_vcmpgtsd (vsll, vsll);
     VCMPGTSD vector_gtv2di {}
 
   const int __builtin_altivec_vcmpgtsd_p (int, vsll, vsll);
     VCMPGTSD_P vector_gt_v2di_p {pred}
 
-  const vbll __builtin_altivec_vcmpgtud (vull, vull);
+  const vsll __builtin_altivec_vcmpgtud (vull, vull);
     VCMPGTUD vector_gtuv2di {}
 
   const int __builtin_altivec_vcmpgtud_p (vull, vull);
@@ -2540,7 +2540,7 @@
   const signed int __builtin_altivec_vcmpaew_p (vsi, vsi);
     VCMPAEW_P vector_ae_v4si_p {pred}
 
-  const vbc __builtin_altivec_vcmpneb (vsc, vsc);
+  const vsc __builtin_altivec_vcmpneb (vsc, vsc);
     CMPNEB vcmpneb {}
 
   const signed int __builtin_altivec_vcmpneb_p (vsc, vsc);
@@ -2555,31 +2555,31 @@
   const signed int __builtin_altivec_vcmpnefp_p (vf, vf);
     VCMPNEFP_P vector_ne_v4sf_p {pred}
 
-  const vbs __builtin_altivec_vcmpneh (vss, vss);
+  const vss __builtin_altivec_vcmpneh (vss, vss);
     CMPNEH vcmpneh {}
 
   const signed int __builtin_altivec_vcmpneh_p (vss, vss);
     VCMPNEH_P vector_ne_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpnew (vsi, vsi);
+  const vsi __builtin_altivec_vcmpnew (vsi, vsi);
     CMPNEW vcmpnew {}
 
   const signed int __builtin_altivec_vcmpnew_p (vsi, vsi);
     VCMPNEW_P vector_ne_v4si_p {pred}
 
-  const vbc __builtin_altivec_vcmpnezb (vsc, vsc);
+  const vsc __builtin_altivec_vcmpnezb (vsc, vsc);
     CMPNEZB vcmpnezb {}
 
   const signed int __builtin_altivec_vcmpnezb_p (signed int, vsc, vsc);
     VCMPNEZB_P vector_nez_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpnezh (vss, vss);
+  const vss __builtin_altivec_vcmpnezh (vss, vss);
     CMPNEZH vcmpnezh {}
 
   const signed int __builtin_altivec_vcmpnezh_p (signed int, vss, vss);
     VCMPNEZH_P vector_nez_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpnezw (vsi, vsi);
+  const vsi __builtin_altivec_vcmpnezw (vsi, vsi);
     CMPNEZW vcmpnezw {}
 
   const signed int __builtin_altivec_vcmpnezw_p (vsi, vsi);
@@ -3039,7 +3039,7 @@
   const vui __builtin_altivec_mtvsrwm (unsigned long long);
     MTVSRWM vec_mtvsr_v4si {}
 
-  const vuc __builtin_altivec_vcfuged (vuc, vuc);
+  const vull __builtin_altivec_vcfuged (vull, vull);
     VCFUGED vcfuged {}
 
   const vsc __builtin_altivec_vclrlb (vsc, unsigned int);
@@ -3048,10 +3048,10 @@
   const vsc __builtin_altivec_vclrrb (vsc, unsigned int);
     VCLRRB vclrrb {}
 
-  const vuc __builtin_altivec_vclzdm (vuc, vuc);
+  const vull __builtin_altivec_vclzdm (vull, vull);
     VCLZDM vclzdm {}
 
-  const vuc __builtin_altivec_vctzdm (vuc, vuc);
+  const vull __builtin_altivec_vctzdm (vull, vull);
     VCTZDM vctzdm {}
 
   const vuc __builtin_altivec_vexpandmb (vuc);
@@ -3093,19 +3093,19 @@
   const vull __builtin_altivec_vextduwvlx (vui, vui, unsigned char);
     VEXTRACTWL vextractlv4si {}
 
-  const unsigned int __builtin_altivec_vextractmb (vuc);
+  const signed int __builtin_altivec_vextractmb (vuc);
     VEXTRACTMB vec_extract_v16qi {}
 
-  const unsigned int __builtin_altivec_vextractmd (vull);
+  const signed int __builtin_altivec_vextractmd (vull);
     VEXTRACTMD vec_extract_v2di {}
 
-  const unsigned int __builtin_altivec_vextractmh (vus);
+  const signed int __builtin_altivec_vextractmh (vus);
     VEXTRACTMH vec_extract_v8hi {}
 
-  const unsigned int __builtin_altivec_vextractmq (vuq);
+  const signed int __builtin_altivec_vextractmq (vuq);
     VEXTRACTMQ vec_extract_v1ti {}
 
-  const unsigned int __builtin_altivec_vextractmw (vui);
+  const signed int __builtin_altivec_vextractmw (vui);
     VEXTRACTMW vec_extract_v4si {}
 
   const unsigned long long __builtin_altivec_vgnb (vuq, const int <2,7>);
@@ -3153,10 +3153,10 @@
   const vui __builtin_altivec_vinsvuwvrx (vui, vui, unsigned int);
     VINSERTVPRWR vinsertvr_v4si {}
 
-  const vuc __builtin_altivec_vpdepd (vuc, vuc);
+  const vull __builtin_altivec_vpdepd (vull, vull);
     VPDEPD vpdepd {}
 
-  const vuc __builtin_altivec_vpextd (vuc, vuc);
+  const vull __builtin_altivec_vpextd (vull, vull);
     VPEXTD vpextd {}
 
   const vull __builtin_altivec_vreplace_un_uv2di (vull, unsigned long long, const int<4>);
@@ -3360,18 +3360,21 @@
 ; external copy in the built-in table.  This is fragile.  For the
 ; new support, we should transition this to do a name lookup in
 ; the built-in hash table, but to start with we will honor the
-; positioning of the built-ins in the table.
+; positioning of the built-ins in the table.  Note that right now
+; there is going to be breakage with __builtin_mma_disassemble_{acc,pair}
+; since they each require a blank builtin to follow them with icode
+; CODE_FOR_nothing.
 [mma]
   void __builtin_mma_assemble_acc (v512 *, vuc, vuc, vuc, vuc);
     ASSEMBLE_ACC nothing {mma}
 
-  void __builtin_mma_assemble_acc_internal (v512 *, vuc, vuc, vuc, vuc);
+  v512 __builtin_mma_assemble_acc_internal (v512 *, vuc, vuc, vuc, vuc);
     ASSEMBLE_ACC_INTERNAL mma_assemble_acc {mma}
 
   void __builtin_mma_assemble_pair (v256 *, vuc, vuc);
     ASSEMBLE_PAIR nothing {mma}
 
-  void __builtin_mma_assemble_pair_internal (v256 *, vuc, vuc);
+  v256 __builtin_mma_assemble_pair_internal (v256 *, vuc, vuc);
     ASSEMBLE_PAIR_INTERNAL mma_assemble_pair {mma}
 
   void __builtin_mma_disassemble_acc (void *, v512 *);
@@ -3383,367 +3386,367 @@
   void __builtin_mma_pmxvbf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2 nothing {mma}
 
-  void __builtin_mma_pmxvbf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma}
 
   void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NN_INTERNAL mma_pmxvbf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NP_INTERNAL mma_pmxvbf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PN_INTERNAL mma_pmxvbf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PP_INTERNAL mma_pmxvbf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2 nothing {mma}
 
-  void __builtin_mma_pmxvf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2_INTERNAL mma_pmxvf16ger2 {mma}
 
   void __builtin_mma_pmxvf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NN_INTERNAL mma_pmxvf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NP_INTERNAL mma_pmxvf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PN_INTERNAL mma_pmxvf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PP_INTERNAL mma_pmxvf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf32ger (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GER nothing {mma}
 
-  void __builtin_mma_pmxvf32ger_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32ger_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GER_INTERNAL mma_pmxvf32ger {mma}
 
   void __builtin_mma_pmxvf32gernn (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gernn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gernn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNN_INTERNAL mma_pmxvf32gernn {mma,quad}
 
   void __builtin_mma_pmxvf32gernp (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gernp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gernp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNP_INTERNAL mma_pmxvf32gernp {mma,quad}
 
   void __builtin_mma_pmxvf32gerpn (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gerpn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gerpn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPN_INTERNAL mma_pmxvf32gerpn {mma,quad}
 
   void __builtin_mma_pmxvf32gerpp (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gerpp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gerpp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPP_INTERNAL mma_pmxvf32gerpp {mma,quad}
 
   void __builtin_mma_pmxvf64ger (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GER nothing {mma,pair}
 
-  void __builtin_mma_pmxvf64ger_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64ger_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GER_INTERNAL mma_pmxvf64ger {mma,pair}
 
   void __builtin_mma_pmxvf64gernn (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNN nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gernn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gernn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNN_INTERNAL mma_pmxvf64gernn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gernp (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNP nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gernp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gernp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNP_INTERNAL mma_pmxvf64gernp {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpn (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPN nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gerpn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gerpn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPN_INTERNAL mma_pmxvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpp (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPP nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gerpp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gerpp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPP_INTERNAL mma_pmxvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_pmxvi16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2 nothing {mma}
 
-  void __builtin_mma_pmxvi16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2_INTERNAL mma_pmxvi16ger2 {mma}
 
   void __builtin_mma_pmxvi16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2PP_INTERNAL mma_pmxvi16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvi16ger2s (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2S nothing {mma}
 
-  void __builtin_mma_pmxvi16ger2s_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2s_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2S_INTERNAL mma_pmxvi16ger2s {mma}
 
   void __builtin_mma_pmxvi16ger2spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2SPP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi16ger2spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2SPP_INTERNAL mma_pmxvi16ger2spp {mma,quad}
 
   void __builtin_mma_pmxvi4ger8 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
     PMXVI4GER8 nothing {mma}
 
-  void __builtin_mma_pmxvi4ger8_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
+  v512 __builtin_mma_pmxvi4ger8_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
     PMXVI4GER8_INTERNAL mma_pmxvi4ger8 {mma}
 
   void __builtin_mma_pmxvi4ger8pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI4GER8PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi4ger8pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi4ger8pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI4GER8PP_INTERNAL mma_pmxvi4ger8pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4 nothing {mma}
 
-  void __builtin_mma_pmxvi8ger4_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi8ger4_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4_INTERNAL mma_pmxvi8ger4 {mma}
 
   void __builtin_mma_pmxvi8ger4pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi8ger4pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi8ger4pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4PP_INTERNAL mma_pmxvi8ger4pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4SPP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi8ger4spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi8ger4spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4SPP_INTERNAL mma_pmxvi8ger4spp {mma,quad}
 
   void __builtin_mma_xvbf16ger2 (v512 *, vuc, vuc);
     XVBF16GER2 nothing {mma}
 
-  void __builtin_mma_xvbf16ger2_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2_internal (v512 *, vuc, vuc);
     XVBF16GER2_INTERNAL mma_xvbf16ger2 {mma}
 
   void __builtin_mma_xvbf16ger2nn (v512 *, vuc, vuc);
     XVBF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2nn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2nn_internal (v512 *, vuc, vuc);
     XVBF16GER2NN_INTERNAL mma_xvbf16ger2nn {mma,quad}
 
   void __builtin_mma_xvbf16ger2np (v512 *, vuc, vuc);
     XVBF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2np_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2np_internal (v512 *, vuc, vuc);
     XVBF16GER2NP_INTERNAL mma_xvbf16ger2np {mma,quad}
 
   void __builtin_mma_xvbf16ger2pn (v512 *, vuc, vuc);
     XVBF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2pn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2pn_internal (v512 *, vuc, vuc);
     XVBF16GER2PN_INTERNAL mma_xvbf16ger2pn {mma,quad}
 
   void __builtin_mma_xvbf16ger2pp (v512 *, vuc, vuc);
     XVBF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2pp_internal (v512 *, vuc, vuc);
     XVBF16GER2PP_INTERNAL mma_xvbf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf16ger2 (v512 *, vuc, vuc);
     XVF16GER2 nothing {mma}
 
-  void __builtin_mma_xvf16ger2_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2_internal (v512 *, vuc, vuc);
     XVF16GER2_INTERNAL mma_xvf16ger2 {mma}
 
   void __builtin_mma_xvf16ger2nn (v512 *, vuc, vuc);
     XVF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2nn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2nn_internal (v512 *, vuc, vuc);
     XVF16GER2NN_INTERNAL mma_xvf16ger2nn {mma,quad}
 
   void __builtin_mma_xvf16ger2np (v512 *, vuc, vuc);
     XVF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2np_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2np_internal (v512 *, vuc, vuc);
     XVF16GER2NP_INTERNAL mma_xvf16ger2np {mma,quad}
 
   void __builtin_mma_xvf16ger2pn (v512 *, vuc, vuc);
     XVF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2pn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2pn_internal (v512 *, vuc, vuc);
     XVF16GER2PN_INTERNAL mma_xvf16ger2pn {mma,quad}
 
   void __builtin_mma_xvf16ger2pp (v512 *, vuc, vuc);
     XVF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2pp_internal (v512 *, vuc, vuc);
     XVF16GER2PP_INTERNAL mma_xvf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf32ger (v512 *, vuc, vuc);
     XVF32GER nothing {mma}
 
-  void __builtin_mma_xvf32ger_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32ger_internal (v512 *, vuc, vuc);
     XVF32GER_INTERNAL mma_xvf32ger {mma}
 
   void __builtin_mma_xvf32gernn (v512 *, vuc, vuc);
     XVF32GERNN nothing {mma,quad}
 
-  void __builtin_mma_xvf32gernn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gernn_internal (v512 *, vuc, vuc);
     XVF32GERNN_INTERNAL mma_xvf32gernn {mma,quad}
 
   void __builtin_mma_xvf32gernp (v512 *, vuc, vuc);
     XVF32GERNP nothing {mma,quad}
 
-  void __builtin_mma_xvf32gernp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gernp_internal (v512 *, vuc, vuc);
     XVF32GERNP_INTERNAL mma_xvf32gernp {mma,quad}
 
   void __builtin_mma_xvf32gerpn (v512 *, vuc, vuc);
     XVF32GERPN nothing {mma,quad}
 
-  void __builtin_mma_xvf32gerpn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gerpn_internal (v512 *, vuc, vuc);
     XVF32GERPN_INTERNAL mma_xvf32gerpn {mma,quad}
 
   void __builtin_mma_xvf32gerpp (v512 *, vuc, vuc);
     XVF32GERPP nothing {mma,quad}
 
-  void __builtin_mma_xvf32gerpp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gerpp_internal (v512 *, vuc, vuc);
     XVF32GERPP_INTERNAL mma_xvf32gerpp {mma,quad}
 
   void __builtin_mma_xvf64ger (v512 *, v256, vuc);
     XVF64GER nothing {mma,pair}
 
-  void __builtin_mma_xvf64ger_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64ger_internal (v512 *, v256, vuc);
     XVF64GER_INTERNAL mma_xvf64ger {mma,pair}
 
   void __builtin_mma_xvf64gernn (v512 *, v256, vuc);
     XVF64GERNN nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gernn_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gernn_internal (v512 *, v256, vuc);
     XVF64GERNN_INTERNAL mma_xvf64gernn {mma,pair,quad}
 
   void __builtin_mma_xvf64gernp (v512 *, v256, vuc);
     XVF64GERNP nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gernp_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gernp_internal (v512 *, v256, vuc);
     XVF64GERNP_INTERNAL mma_xvf64gernp {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpn (v512 *, v256, vuc);
     XVF64GERPN nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gerpn_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gerpn_internal (v512 *, v256, vuc);
     XVF64GERPN_INTERNAL mma_xvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpp (v512 *, v256, vuc);
     XVF64GERPP nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gerpp_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gerpp_internal (v512 *, v256, vuc);
     XVF64GERPP_INTERNAL mma_xvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_xvi16ger2 (v512 *, vuc, vuc);
     XVI16GER2 nothing {mma}
 
-  void __builtin_mma_xvi16ger2_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2_internal (v512 *, vuc, vuc);
     XVI16GER2_INTERNAL mma_xvi16ger2 {mma}
 
   void __builtin_mma_xvi16ger2pp (v512 *, vuc, vuc);
     XVI16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_xvi16ger2pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2pp_internal (v512 *, vuc, vuc);
     XVI16GER2PP_INTERNAL mma_xvi16ger2pp {mma,quad}
 
   void __builtin_mma_xvi16ger2s (v512 *, vuc, vuc);
     XVI16GER2S nothing {mma}
 
-  void __builtin_mma_xvi16ger2s_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2s_internal (v512 *, vuc, vuc);
     XVI16GER2S_INTERNAL mma_xvi16ger2s {mma}
 
   void __builtin_mma_xvi16ger2spp (v512 *, vuc, vuc);
     XVI16GER2SPP nothing {mma,quad}
 
-  void __builtin_mma_xvi16ger2spp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2spp_internal (v512 *, vuc, vuc);
     XVI16GER2SPP_INTERNAL mma_xvi16ger2spp {mma,quad}
 
   void __builtin_mma_xvi4ger8 (v512 *, vuc, vuc);
     XVI4GER8 nothing {mma}
 
-  void __builtin_mma_xvi4ger8_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi4ger8_internal (v512 *, vuc, vuc);
     XVI4GER8_INTERNAL mma_xvi4ger8 {mma}
 
   void __builtin_mma_xvi4ger8pp (v512 *, vuc, vuc);
     XVI4GER8PP nothing {mma,quad}
 
-  void __builtin_mma_xvi4ger8pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi4ger8pp_internal (v512 *, vuc, vuc);
     XVI4GER8PP_INTERNAL mma_xvi4ger8pp {mma,quad}
 
   void __builtin_mma_xvi8ger4 (v512 *, vuc, vuc);
     XVI8GER4 nothing {mma}
 
-  void __builtin_mma_xvi8ger4_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi8ger4_internal (v512 *, vuc, vuc);
     XVI8GER4_INTERNAL mma_xvi8ger4 {mma}
 
   void __builtin_mma_xvi8ger4pp (v512 *, vuc, vuc);
     XVI8GER4PP nothing {mma,quad}
 
-  void __builtin_mma_xvi8ger4pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi8ger4pp_internal (v512 *, vuc, vuc);
     XVI8GER4PP_INTERNAL mma_xvi8ger4pp {mma,quad}
 
   void __builtin_mma_xvi8ger4spp (v512 *, vuc, vuc);
     XVI8GER4SPP nothing {mma,quad}
 
-  void __builtin_mma_xvi8ger4spp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi8ger4spp_internal (v512 *, vuc, vuc);
     XVI8GER4SPP_INTERNAL mma_xvi8ger4spp {mma,quad}
 
   void __builtin_mma_xxmfacc (v512 *);
     XXMFACC nothing {mma,quad}
 
-  void __builtin_mma_xxmfacc_internal (v512 *);
+  v512 __builtin_mma_xxmfacc_internal (v512 *);
     XXMFACC_INTERNAL mma_xxmfacc {mma,quad}
 
   void __builtin_mma_xxmtacc (v512 *);
     XXMTACC nothing {mma,quad}
 
-  void __builtin_mma_xxmtacc_internal (v512 *);
+  v512 __builtin_mma_xxmtacc_internal (v512 *);
     XXMTACC_INTERNAL mma_xxmtacc {mma,quad}
 
   void __builtin_mma_xxsetaccz (v512 *);
     XXSETACCZ nothing {mma}
 
-  void __builtin_mma_xxsetaccz_internal (v512 *);
+  v512 __builtin_mma_xxsetaccz_internal (v512 *);
     XXSETACCZ_INTERNAL mma_xxsetaccz {mma}


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-09-16 21:32 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-09-16 21:32 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:4fb5c7046792f3b97a1fd153bb8cb7db72b54456

commit 4fb5c7046792f3b97a1fd153bb8cb7db72b54456
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Sun Sep 13 11:06:36 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-09-13  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.
            * config/rs6000/rs6000-call.c (altivec_init_builtins): Correct
            name of __builtin_altivec_xst_len_r.

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 549 ++++++++++++++++++++++++++-----
 1 file changed, 475 insertions(+), 74 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index 565c14f9f4c..755399b1843 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -1494,6 +1494,45 @@
   const vf __builtin_vsx_uns_floato_v2di (vull);
     UNS_FLOATO_V2DI unsfloatov2di {}
 
+; I have no idea why we have __builtin_vsx_* duplicates of these when
+; the __builtin_altivec_* counterparts are already present.  Keeping
+; them for compatibility, but...oy.
+  const vsc __builtin_vsx_vperm_16qi (vsc, vsc, vuc);
+    VPERM_16QI_X altivec_vperm_v16qi {}
+
+  const vuc __builtin_vsx_vperm_16qi_uns (vuc, vuc, vuc);
+    VPERM_16QI_UNS_X altivec_vperm_v16qi_uns {}
+
+  const vsq __builtin_vsx_vperm_1ti (vsq, vsq, vuc);
+    VPERM_1TI_X altivec_vperm_v1ti {}
+
+  const vuq __builtin_vsx_vperm_1ti_uns (vuq, vuq, vuc);
+    VPERM_1TI_UNS_X altivec_vperm_v1ti_uns {}
+
+  const vd __builtin_vsx_vperm_2df (vd, vd, vuc);
+    VPERM_2DF_X altivec_vperm_v2df {}
+
+  const vsll __builtin_vsx_vperm_2di (vsll, vsll, vuc);
+    VPERM_2DI_X altivec_vperm_v2di {}
+
+  const vull __builtin_vsx_vperm_2di_uns (vull, vull, vuc);
+    VPERM_2DI_UNS_X altivec_vperm_v2di_uns {}
+
+  const vf __builtin_vsx_vperm_4sf (vf, vf, vuc);
+    VPERM_4SF_X altivec_vperm_v4sf {}
+
+  const vsi __builtin_vsx_vperm_4si (vsi, vsi, vuc);
+    VPERM_4SI_X altivec_vperm_v4si {}
+
+  const vui __builtin_vsx_vperm_4si_uns (vui, vui, vuc);
+    VPERM_4SI_UNS_X altivec_vperm_v4si_uns {}
+
+  const vss __builtin_vsx_vperm_8hi (vss, vss, vuc);
+    VPERM_8HI_X altivec_vperm_v8hi {}
+
+  const vus __builtin_vsx_vperm_8hi_uns (vus, vus, vuc);
+    VPERM_8HI_UNS_X altivec_vperm_v8hi_uns {}
+
   const vsll __builtin_vsx_vsigned_v2df (vd);
     VEC_VSIGNED_V2DF vsx_xvcvdpsxds {}
 
@@ -1617,6 +1656,9 @@
   const vui __builtin_vsx_xvcmpgtsp_p (vf, vf);
     XVCMPGTSP_P vector_gt_v4sf_p {pred}
 
+  const vf __builtin_vsx_xvcvdpsp (vd);
+    XVCVDPSP vsx_xvcvdpsp {}
+
   const vsll __builtin_vsx_xvcvdpsxds (vd);
     XVCVDPSXDS vsx_fix_truncv2dfv2di2 {}
 
@@ -1639,6 +1681,9 @@
   const vull __builtin_vsx_xvcvdpuxws (vd);
     XVCVDPUXWS vsx_xvcvdpuxws {}
 
+  const vd __builtin_vsx_xvcvspdp (vf);
+    XVCVSPDP vsx_xvcvspdp {}
+
   const vsll __builtin_vsx_xvcvspsxds (vf);
     XVCVSPSXDS vsx_xvcvspsxds {}
 
@@ -2687,28 +2732,31 @@
   signed long long __builtin_darn_raw ();
     DARN_RAW darn_raw {}
 
-  const signed int __builtin_dtstsfi_eq_dd (unsigned int, _Decimal64);
+  double __builtin_mffsl ();
+    MFFSL rs6000_mffsl {}
+
+  const signed int __builtin_dfp_dtstsfi_eq_dd (unsigned int, _Decimal64);
     TSTSFI_EQ_DD dfptstsfi_eq_dd {}
 
-  const signed int __builtin_dtstsfi_eq_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_eq_td (unsigned int, _Decimal128);
     TSTSFI_EQ_TD dfptstsfi_eq_td {}
 
-  const signed int __builtin_dtstsfi_gt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dfp_dtstsfi_gt_dd (unsigned int, _Decimal64);
     TSTSFI_GT_DD dfptstsfi_gt_dd {}
 
-  const signed int __builtin_dtstsfi_gt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_gt_td (unsigned int, _Decimal128);
     TSTSFI_GT_TD dfptstsfi_gt_td {}
 
-  const signed int __builtin_dtstsfi_lt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dfp_dtstsfi_lt_dd (unsigned int, _Decimal64);
     TSTSFI_LT_DD dfptstsfi_lt_dd {}
 
-  const signed int __builtin_dtstsfi_lt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_lt_td (unsigned int, _Decimal128);
     TSTSFI_LT_TD dfptstsfi_lt_td {}
 
-  const signed int __builtin_dtstsfi_ov_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dfp_dtstsfi_ov_dd (unsigned int, _Decimal64);
     TSTSFI_OV_DD dfptstsfi_unordered_dd {}
 
-  const signed int __builtin_dtstsfi_ov_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_ov_td (unsigned int, _Decimal128);
     TSTSFI_OV_TD dfptstsfi_unordered_td {}
 
 
@@ -2720,10 +2768,10 @@
 ; 32 bits, and the return value is DImode, so it seems that
 ; TARGET_64BIT (actually TARGET_POWERPC64) is justified.  TBD. ####
 [power9-64]
-; The following two are inexplicably named __builtin_{alti,}vec_* while
+; The following two are inexplicably named __builtin_altivec_* while
 ; their load counterparts are __builtin_vsx_*.  Need to deprecate
 ; these interfaces in favor of the other naming scheme (or vice versa).
-  void __builtin_vec_xst_len_r (vop, void *, unsigned long long);
+  void __builtin_altivec_xst_len_r (vop, void *, unsigned long long);
     XST_LEN_R xst_len_r {}
 
   void __builtin_altivec_stxvl (vop, void *, unsigned long long);
@@ -3063,12 +3111,114 @@
   const unsigned long long __builtin_altivec_vgnb (vuq, const int <2,7>);
     VGNB vgnb {}
 
+  const vuc __builtin_altivec_vinsgubvlx (unsigned char, vuc, unsigned int);
+    VINSERTGPRBL vinsertgl_v16qi {}
+
+  const vuc __builtin_altivec_vinsgubvrx (unsigned char, vuc, unsigned int);
+    VINSERTGPRBR vinsertgr_v16qi {}
+
+  const vull __builtin_altivec_vinsgudvlx (unsigned long long, vull, unsigned int);
+    VINSERTGPRDL vinsertgl_v2di {}
+
+  const vull __builtin_altivec_vinsgudvrx (unsigned long long, vull, unsigned int);
+    VINSERTGPRDR vinsertgr_v2di {}
+
+  const vus __builtin_altivec_vinsguhvlx (unsigned short, vus, unsigned int);
+    VINSERTGPRHL vinsertgl_v8hi {}
+
+  const vus __builtin_altivec_vinsguhvrx (unsigned short, vus, unsigned int);
+    VINSERTGPRHR vinsertgr_v8hi {}
+
+  const vui __builtin_altivec_vinsguwvlx (unsigned int, vui, unsigned int);
+    VINSERTGPRWL vinsertgl_v4si {}
+
+  const vui __builtin_altivec_vinsguwvrx (unsigned int, vui, unsigned int);
+    VINSERTGPRWR vinsertgr_v4si {}
+
+  const vuc __builtin_altivec_vinsvubvlx (vuc, vuc, unsigned int);
+    VINSERTVPRBL vinsertvl_v16qi {}
+
+  const vuc __builtin_altivec_vinsvubvrx (vuc, vuc, unsigned int);
+    VINSERTVPRBR vinsertvr_v16qi {}
+
+  const vus __builtin_altivec_vinsvuhvlx (vus, vus, unsigned int);
+    VINSERTVPRHL vinsertvl_v8hi {}
+
+  const vus __builtin_altivec_vinsvuhvrx (vus, vus, unsigned int);
+    VINSERTVPRHR vinsertvr_v8hi {}
+
+  const vui __builtin_altivec_vinsvuwvlx (vui, vui, unsigned int);
+    VINSERTVPRWL vinsertvl_v4si {}
+
+  const vui __builtin_altivec_vinsvuwvrx (vui, vui, unsigned int);
+    VINSERTVPRWR vinsertvr_v4si {}
+
   const vuc __builtin_altivec_vpdepd (vuc, vuc);
     VPDEPD vpdepd {}
 
   const vuc __builtin_altivec_vpextd (vuc, vuc);
     VPEXTD vpextd {}
 
+  const vull __builtin_altivec_vreplace_un_uv2di (vull, unsigned long long, const int<4>);
+    VREPLACE_UN_UV2DI vreplace_un_v2di {}
+
+  const vui __builtin_altivec_vreplace_un_uv4si (vui, unsigned int, const int<4>);
+    VREPLACE_UN_UV4SI vreplace_un_v4si {}
+
+  const vd __builtin_altivec_vreplace_un_v2df (vd, double, const int<4>);
+    VREPLACE_UN_V2DF vreplace_un_v2df {}
+
+  const vsll __builtin_altivec_vreplace_un_v2di (vsll, unsigned long long, const int<4>);
+    VREPLACE_UN_V2DI vreplace_un_v2di {}
+
+  const vf __builtin_altivec_vreplace_un_v4sf (vf, float, const int<4>);
+    VREPLACE_UN_V4SF vreplace_un_v4sf {}
+
+  const vsi __builtin_altivec_vreplace_un_v4si (vsi, signed int, const int<4>);
+    VREPLACE_UN_V4SI vreplace_un_v4si {}
+
+  const vull __builtin_altivec_vreplace_uv2di (vull, unsigned long long, const int<1>);
+    VREPLACE_ELT_UV2DI vreplace_elt_v2di {}
+
+  const vui __builtin_altivec_vreplace_uv4si (vui, unsigned int, const int<2>);
+    VREPLACE_ELT_UV4SI vreplace_elt_v4si {}
+
+  const vd __builtin_altivec_vreplace_v2df (vd, double, const int<1>);
+    VREPLACE_ELT_V2DF vreplace_elt_v2df {}
+
+  const vsll __builtin_altivec_vreplace_v2di (vsll, unsigned long long, const int<1>);
+    VREPLACE_ELT_V2DI vreplace_elt_v2di {}
+
+  const vf __builtin_altivec_vreplace_v4sf (vf, float, const int<2>);
+    VREPLACE_ELT_V4SF vreplace_elt_v4sf {}
+
+  const vsi __builtin_altivec_vreplace_v4si (vsi, signed int, const int<2>);
+    VREPLACE_ELT_V4SI vreplace_elt_v4si {}
+
+  const vsc __builtin_altivec_vsldb_v16qi (vsc, vsc, const int<3>);
+    VSLDB_V16QI vsldb_v16qi {}
+
+  const vsll __builtin_altivec_vsldb_v2di (vsll, vsll, const int<3>);
+    VSLDB_V2DI vsldb_v2di {}
+
+  const vsi __builtin_altivec_vsldb_v4si (vsi, vsi, const int<3>);
+    VSLDB_V4SI vsldb_v4si {}
+
+  const vss __builtin_altivec_vsldb_v8hi (vss, vss, const int<3>);
+    VSLDB_V8HI vsldb_v8hi {}
+
+  const vsc __builtin_altivec_vsrdb_v16qi (vsc, vsc, const int<3>);
+    VSRDB_V16QI vsrdb_v16qi {}
+
+  const vsll __builtin_altivec_vsrdb_v2di (vsll, vsll, const int<3>);
+    VSRDB_V2DI vsrdb_v2di {}
+
+  const vsi __builtin_altivec_vsrdb_v4si (vsi, vsi, const int<3>);
+    VSRDB_V4SI vsrdb_v4si {}
+
+  const vss __builtin_altivec_vsrdb_v8hi (vss, vss, const int<3>);
+    VSRDB_V8HI vsrdb_v8hi {}
+
   const vuc __builtin_altivec_vstribl (vuc);
     VSTRIBL vstril_v16qi {}
 
@@ -3093,6 +3243,51 @@
   const signed int __builtin_altivec_vstrihr_p (vus);
     VSTRIHR_P vstrir_p_v8hi {}
 
+  const signed int __builtin_vsx_xvtlsbb_all_ones (vuc);
+    XVTLSBB_ONES xvtlsbbo {}
+
+  const signed int __builtin_vsx_xvtlsbb_all_zeros (vuc);
+    XVTLSBB_ZEROS xvtlsbbz {}
+
+  const vf __builtin_vsx_vxxsplti32dx_v4sf (vf, const int<1>, float);
+    VXXSPLTI32DX_V4SF xxsplti32dx_v4sf {}
+
+  const vsi __builtin_vsx_vxxsplti32dx_v4si (vsi, const int<1>, signed int);
+    VXXSPLTI32DX_V4SI xxsplti32dx_v4si {}
+
+  const vd __builtin_vsx_vxxspltidp (float);
+    VXXSPLTIDP xxspltidp_v2df {}
+
+  const vf __builtin_vsx_vxxspltiw_v4sf (float);
+    VXXSPLTIW_V4SF xxspltiw_v4sf {}
+
+  const vsi __builtin_vsx_vxxspltiw_v4si (signed int);
+    VXXSPLTIW_V4SI xxspltiw_v4si {}
+
+  const vuc __builtin_vsx_xvcvbf16spn (vuc);
+    XVCVBF16SPN vsx_xvcvbf16spn {}
+
+  const vuc __builtin_vsx_xvcvspbf16 (vuc);
+    XVCVSPBF16 vsx_xvcvspbf16 {}
+
+  const vsc __builtin_vsx_xxblend_v16qi (vsc, vsc, vuc);
+    VXXBLEND_V16QI xxblend_v16qi {}
+
+  const vd __builtin_vsx_xxblend_v2df (vd, vd, vull);
+    VXXBLEND_V2DF xxblend_v2df {}
+
+  const vsll __builtin_vsx_xxblend_v2di (vsll, vsll, vull);
+    VXXBLEND_V2DI xxblend_v2di {}
+
+  const vf __builtin_vsx_xxblend_v4sf (vf, vf, vui);
+    VXXBLEND_V4SF xxblend_v4sf {}
+
+  const vsi __builtin_vsx_xxblend_v4si (vsi, vsi, vui);
+    VXXBLEND_V4SI xxblend_v4si {}
+
+  const vss __builtin_vsx_xxblend_v8hi (vss, vss, vus);
+    VXXBLEND_V8HI xxblend_v8hi {}
+
   const vop __builtin_vsx_xxeval (vop, vop, vop, const int <8>);
     XXEVAL xxeval {}
 
@@ -3108,11 +3303,37 @@
   const vus __builtin_vsx_xxgenpcvm_v8hi (vus, const int <2>);
     XXGENPCVM_V8HI xxgenpcvm_v8hi {}
 
-  const vuc __builtin_vsx_xvcvbf16spn (vuc);
-    XVCVBF16SPN vsx_xvcvbf16spn {}
+; TODO: This was quite hackish in the original code, and we may need to add
+; mode-specific expansions rather than using CODE_FOR_xxpermx throughout.
+  const vuc __builtin_vsx_xxpermx_uv16qi (vuc, vuc, vuc, const int<3>);
+    XXPERMX_UV16QI xxpermx {}
 
-  const vuc __builtin_vsx_xvcvspbf16 (vuc);
-    XVCVSPBF16 vsx_xvcvspbf16 {}
+  const vull __builtin_vsx_xxpermx_uv2di (vull, vull, vuc, const int<3>);
+    XXPERMX_UV2DI xxpermx {}
+
+  const vui __builtin_vsx_xxpermx_uv4si (vui, vui, vuc, const int<3>);
+    XXPERMX_UV4SI xxpermx {}
+
+  const vus __builtin_vsx_xxpermx_uv8hi (vus, vus, vuc, const int<3>);
+    XXPERMX_UV8HI xxpermx {}
+
+  const vsc __builtin_vsx_xxpermx_v16qi (vsc, vsc, vuc, const int<3>);
+    XXPERMX_V16QI xxpermx {}
+
+  const vd __builtin_vsx_xxpermx_v2df (vd, vd, vuc, const int<3>);
+    XXPERMX_V2DF xxpermx {}
+
+  const vsll __builtin_vsx_xxpermx_v2di (vsll, vsll, vuc, const int<3>);
+    XXPERMX_V2DI xxpermx {}
+
+  const vf __builtin_vsx_xxpermx_v4sf (vf, vf, vuc, const int<3>);
+    XXPERMX_V4SF xxpermx {}
+
+  const vsi __builtin_vsx_xxpermx_v4si (vsi, vsi, vuc, const int<3>);
+    XXPERMX_V4SI xxpermx {}
+
+  const vss __builtin_vsx_xxpermx_v8hi (vss, vss, vuc, const int<3>);
+    XXPERMX_V8HI xxpermx {}
 
 
 [power10-64]
@@ -3166,184 +3387,364 @@
     PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma}
 
   void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2NN mma_pmxvbf16ger2nn {mma,quad}
+    PMXVBF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2NN_INTERNAL mma_pmxvbf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2NP mma_pmxvbf16ger2np {mma,quad}
+    PMXVBF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2NP_INTERNAL mma_pmxvbf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2PN mma_pmxvbf16ger2pn {mma,quad}
+    PMXVBF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2PN_INTERNAL mma_pmxvbf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2PP mma_pmxvbf16ger2pp {mma,quad}
+    PMXVBF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2PP_INTERNAL mma_pmxvbf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2 mma_pmxvf16ger2 {mma}
+    PMXVF16GER2 nothing {mma}
+
+  void __builtin_mma_pmxvf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2_INTERNAL mma_pmxvf16ger2 {mma}
 
   void __builtin_mma_pmxvf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2NN mma_pmxvf16ger2nn {mma,quad}
+    PMXVF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2NN_INTERNAL mma_pmxvf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2NP mma_pmxvf16ger2np {mma,quad}
+    PMXVF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2NP_INTERNAL mma_pmxvf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2PN mma_pmxvf16ger2pn {mma,quad}
+    PMXVF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2PN_INTERNAL mma_pmxvf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2PP mma_pmxvf16ger2pp {mma,quad}
+    PMXVF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2PP_INTERNAL mma_pmxvf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf32ger (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GER mma_pmxvf32ger {mma}
+    PMXVF32GER nothing {mma}
+
+  void __builtin_mma_pmxvf32ger_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GER_INTERNAL mma_pmxvf32ger {mma}
 
   void __builtin_mma_pmxvf32gernn (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERNN mma_pmxvf32gernn {mma,quad}
+    PMXVF32GERNN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gernn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERNN_INTERNAL mma_pmxvf32gernn {mma,quad}
 
   void __builtin_mma_pmxvf32gernp (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERNP mma_pmxvf32gernp {mma,quad}
+    PMXVF32GERNP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gernp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERNP_INTERNAL mma_pmxvf32gernp {mma,quad}
 
   void __builtin_mma_pmxvf32gerpn (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERPN mma_pmxvf32gerpn {mma,quad}
+    PMXVF32GERPN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gerpn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERPN_INTERNAL mma_pmxvf32gerpn {mma,quad}
 
   void __builtin_mma_pmxvf32gerpp (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERPP mma_pmxvf32gerpp {mma,quad}
+    PMXVF32GERPP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gerpp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERPP_INTERNAL mma_pmxvf32gerpp {mma,quad}
 
   void __builtin_mma_pmxvf64ger (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GER mma_pmxvf64ger {mma,pair}
+    PMXVF64GER nothing {mma,pair}
+
+  void __builtin_mma_pmxvf64ger_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GER_INTERNAL mma_pmxvf64ger {mma,pair}
 
   void __builtin_mma_pmxvf64gernn (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERNN mma_pmxvf64gernn {mma,pair,quad}
+    PMXVF64GERNN nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gernn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERNN_INTERNAL mma_pmxvf64gernn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gernp (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERNP mma_pmxvf64gernp {mma,pair,quad}
+    PMXVF64GERNP nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gernp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERNP_INTERNAL mma_pmxvf64gernp {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpn (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERPN mma_pmxvf64gerpn {mma,pair,quad}
+    PMXVF64GERPN nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gerpn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERPN_INTERNAL mma_pmxvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpp (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERPP mma_pmxvf64gerpp {mma,pair,quad}
+    PMXVF64GERPP nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gerpp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERPP_INTERNAL mma_pmxvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_pmxvi16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2 mma_pmxvi16ger2 {mma}
+    PMXVI16GER2 nothing {mma}
+
+  void __builtin_mma_pmxvi16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2_INTERNAL mma_pmxvi16ger2 {mma}
 
   void __builtin_mma_pmxvi16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2PP mma_pmxvi16ger2pp {mma,quad}
+    PMXVI16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2PP_INTERNAL mma_pmxvi16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvi16ger2s (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2S mma_pmxvi16ger2s {mma}
+    PMXVI16GER2S nothing {mma}
+
+  void __builtin_mma_pmxvi16ger2s_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2S_INTERNAL mma_pmxvi16ger2s {mma}
 
   void __builtin_mma_pmxvi16ger2spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2SPP mma_pmxvi16ger2spp {mma,quad}
+    PMXVI16GER2SPP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi16ger2spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2SPP_INTERNAL mma_pmxvi16ger2spp {mma,quad}
 
   void __builtin_mma_pmxvi4ger8 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
-    PMXVI4GER8 mma_pmxvi4ger8 {mma}
+    PMXVI4GER8 nothing {mma}
+
+  void __builtin_mma_pmxvi4ger8_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
+    PMXVI4GER8_INTERNAL mma_pmxvi4ger8 {mma}
 
   void __builtin_mma_pmxvi4ger8pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI4GER8PP mma_pmxvi4ger8pp {mma,quad}
+    PMXVI4GER8PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi4ger8pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI4GER8PP_INTERNAL mma_pmxvi4ger8pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI8GER4 mma_pmxvi8ger4 {mma}
+    PMXVI8GER4 nothing {mma}
+
+  void __builtin_mma_pmxvi8ger4_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI8GER4_INTERNAL mma_pmxvi8ger4 {mma}
 
   void __builtin_mma_pmxvi8ger4pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI8GER4PP mma_pmxvi8ger4pp {mma,quad}
+    PMXVI8GER4PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi8ger4pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI8GER4PP_INTERNAL mma_pmxvi8ger4pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI8GER4SPP mma_pmxvi8ger4spp {mma,quad}
+    PMXVI8GER4SPP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi8ger4spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI8GER4SPP_INTERNAL mma_pmxvi8ger4spp {mma,quad}
 
   void __builtin_mma_xvbf16ger2 (v512 *, vuc, vuc);
-    XVBF16GER2 mma_xvbf16ger2 {mma}
+    XVBF16GER2 nothing {mma}
+
+  void __builtin_mma_xvbf16ger2_internal (v512 *, vuc, vuc);
+    XVBF16GER2_INTERNAL mma_xvbf16ger2 {mma}
 
   void __builtin_mma_xvbf16ger2nn (v512 *, vuc, vuc);
-    XVBF16GER2NN mma_xvbf16ger2nn {mma,quad}
+    XVBF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2nn_internal (v512 *, vuc, vuc);
+    XVBF16GER2NN_INTERNAL mma_xvbf16ger2nn {mma,quad}
 
   void __builtin_mma_xvbf16ger2np (v512 *, vuc, vuc);
-    XVBF16GER2NP mma_xvbf16ger2np {mma,quad}
+    XVBF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2np_internal (v512 *, vuc, vuc);
+    XVBF16GER2NP_INTERNAL mma_xvbf16ger2np {mma,quad}
 
   void __builtin_mma_xvbf16ger2pn (v512 *, vuc, vuc);
-    XVBF16GER2PN mma_xvbf16ger2pn {mma,quad}
+    XVBF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2pn_internal (v512 *, vuc, vuc);
+    XVBF16GER2PN_INTERNAL mma_xvbf16ger2pn {mma,quad}
 
   void __builtin_mma_xvbf16ger2pp (v512 *, vuc, vuc);
-    XVBF16GER2PP mma_xvbf16ger2pp {mma,quad}
+    XVBF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2pp_internal (v512 *, vuc, vuc);
+    XVBF16GER2PP_INTERNAL mma_xvbf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf16ger2 (v512 *, vuc, vuc);
-    XVF16GER2 mma_xvf16ger2 {mma}
+    XVF16GER2 nothing {mma}
+
+  void __builtin_mma_xvf16ger2_internal (v512 *, vuc, vuc);
+    XVF16GER2_INTERNAL mma_xvf16ger2 {mma}
 
   void __builtin_mma_xvf16ger2nn (v512 *, vuc, vuc);
-    XVF16GER2NN mma_xvf16ger2nn {mma,quad}
+    XVF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2nn_internal (v512 *, vuc, vuc);
+    XVF16GER2NN_INTERNAL mma_xvf16ger2nn {mma,quad}
 
   void __builtin_mma_xvf16ger2np (v512 *, vuc, vuc);
-    XVF16GER2NP mma_xvf16ger2np {mma,quad}
+    XVF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2np_internal (v512 *, vuc, vuc);
+    XVF16GER2NP_INTERNAL mma_xvf16ger2np {mma,quad}
 
   void __builtin_mma_xvf16ger2pn (v512 *, vuc, vuc);
-    XVF16GER2PN mma_xvf16ger2pn {mma,quad}
+    XVF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2pn_internal (v512 *, vuc, vuc);
+    XVF16GER2PN_INTERNAL mma_xvf16ger2pn {mma,quad}
 
   void __builtin_mma_xvf16ger2pp (v512 *, vuc, vuc);
-    XVF16GER2PP mma_xvf16ger2pp {mma,quad}
+    XVF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2pp_internal (v512 *, vuc, vuc);
+    XVF16GER2PP_INTERNAL mma_xvf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf32ger (v512 *, vuc, vuc);
-    XVF32GER mma_xvf32ger {mma}
+    XVF32GER nothing {mma}
+
+  void __builtin_mma_xvf32ger_internal (v512 *, vuc, vuc);
+    XVF32GER_INTERNAL mma_xvf32ger {mma}
 
   void __builtin_mma_xvf32gernn (v512 *, vuc, vuc);
-    XVF32GERNN mma_xvf32gernn {mma,quad}
+    XVF32GERNN nothing {mma,quad}
+
+  void __builtin_mma_xvf32gernn_internal (v512 *, vuc, vuc);
+    XVF32GERNN_INTERNAL mma_xvf32gernn {mma,quad}
 
   void __builtin_mma_xvf32gernp (v512 *, vuc, vuc);
-    XVF32GERNP mma_xvf32gernp {mma,quad}
+    XVF32GERNP nothing {mma,quad}
+
+  void __builtin_mma_xvf32gernp_internal (v512 *, vuc, vuc);
+    XVF32GERNP_INTERNAL mma_xvf32gernp {mma,quad}
 
   void __builtin_mma_xvf32gerpn (v512 *, vuc, vuc);
-    XVF32GERPN mma_xvf32gerpn {mma,quad}
+    XVF32GERPN nothing {mma,quad}
+
+  void __builtin_mma_xvf32gerpn_internal (v512 *, vuc, vuc);
+    XVF32GERPN_INTERNAL mma_xvf32gerpn {mma,quad}
 
   void __builtin_mma_xvf32gerpp (v512 *, vuc, vuc);
-    XVF32GERPP mma_xvf32gerpp {mma,quad}
+    XVF32GERPP nothing {mma,quad}
+
+  void __builtin_mma_xvf32gerpp_internal (v512 *, vuc, vuc);
+    XVF32GERPP_INTERNAL mma_xvf32gerpp {mma,quad}
 
   void __builtin_mma_xvf64ger (v512 *, v256, vuc);
-    XVF64GER mma_xvf64ger {mma,pair}
+    XVF64GER nothing {mma,pair}
+
+  void __builtin_mma_xvf64ger_internal (v512 *, v256, vuc);
+    XVF64GER_INTERNAL mma_xvf64ger {mma,pair}
 
   void __builtin_mma_xvf64gernn (v512 *, v256, vuc);
-    XVF64GERNN mma_xvf64gernn {mma,pair,quad}
+    XVF64GERNN nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gernn_internal (v512 *, v256, vuc);
+    XVF64GERNN_INTERNAL mma_xvf64gernn {mma,pair,quad}
 
   void __builtin_mma_xvf64gernp (v512 *, v256, vuc);
-    XVF64GERNP mma_xvf64gernp {mma,pair,quad}
+    XVF64GERNP nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gernp_internal (v512 *, v256, vuc);
+    XVF64GERNP_INTERNAL mma_xvf64gernp {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpn (v512 *, v256, vuc);
-    XVF64GERPN mma_xvf64gerpn {mma,pair,quad}
+    XVF64GERPN nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gerpn_internal (v512 *, v256, vuc);
+    XVF64GERPN_INTERNAL mma_xvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpp (v512 *, v256, vuc);
-    XVF64GERPP mma_xvf64gerpp {mma,pair,quad}
+    XVF64GERPP nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gerpp_internal (v512 *, v256, vuc);
+    XVF64GERPP_INTERNAL mma_xvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_xvi16ger2 (v512 *, vuc, vuc);
-    XVI16GER2 mma_xvi16ger2 {mma}
+    XVI16GER2 nothing {mma}
+
+  void __builtin_mma_xvi16ger2_internal (v512 *, vuc, vuc);
+    XVI16GER2_INTERNAL mma_xvi16ger2 {mma}
 
   void __builtin_mma_xvi16ger2pp (v512 *, vuc, vuc);
-    XVI16GER2PP mma_xvi16ger2pp {mma,quad}
+    XVI16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_xvi16ger2pp_internal (v512 *, vuc, vuc);
+    XVI16GER2PP_INTERNAL mma_xvi16ger2pp {mma,quad}
 
   void __builtin_mma_xvi16ger2s (v512 *, vuc, vuc);
-    XVI16GER2S mma_xvi16ger2s {mma}
+    XVI16GER2S nothing {mma}
+
+  void __builtin_mma_xvi16ger2s_internal (v512 *, vuc, vuc);
+    XVI16GER2S_INTERNAL mma_xvi16ger2s {mma}
 
   void __builtin_mma_xvi16ger2spp (v512 *, vuc, vuc);
-    XVI16GER2SPP mma_xvi16ger2spp {mma,quad}
+    XVI16GER2SPP nothing {mma,quad}
+
+  void __builtin_mma_xvi16ger2spp_internal (v512 *, vuc, vuc);
+    XVI16GER2SPP_INTERNAL mma_xvi16ger2spp {mma,quad}
 
   void __builtin_mma_xvi4ger8 (v512 *, vuc, vuc);
-    XVI4GER8 mma_xvi4ger8 {mma}
+    XVI4GER8 nothing {mma}
+
+  void __builtin_mma_xvi4ger8_internal (v512 *, vuc, vuc);
+    XVI4GER8_INTERNAL mma_xvi4ger8 {mma}
 
   void __builtin_mma_xvi4ger8pp (v512 *, vuc, vuc);
-    XVI4GER8PP mma_xvi4ger8pp {mma,quad}
+    XVI4GER8PP nothing {mma,quad}
+
+  void __builtin_mma_xvi4ger8pp_internal (v512 *, vuc, vuc);
+    XVI4GER8PP_INTERNAL mma_xvi4ger8pp {mma,quad}
 
   void __builtin_mma_xvi8ger4 (v512 *, vuc, vuc);
-    XVI8GER4 mma_xvi8ger4 {mma}
+    XVI8GER4 nothing {mma}
+
+  void __builtin_mma_xvi8ger4_internal (v512 *, vuc, vuc);
+    XVI8GER4_INTERNAL mma_xvi8ger4 {mma}
 
   void __builtin_mma_xvi8ger4pp (v512 *, vuc, vuc);
-    XVI8GER4PP mma_xvi8ger4pp {mma,quad}
+    XVI8GER4PP nothing {mma,quad}
+
+  void __builtin_mma_xvi8ger4pp_internal (v512 *, vuc, vuc);
+    XVI8GER4PP_INTERNAL mma_xvi8ger4pp {mma,quad}
 
   void __builtin_mma_xvi8ger4spp (v512 *, vuc, vuc);
-    XVI8GER4SPP mma_xvi8ger4spp {mma,quad}
+    XVI8GER4SPP nothing {mma,quad}
+
+  void __builtin_mma_xvi8ger4spp_internal (v512 *, vuc, vuc);
+    XVI8GER4SPP_INTERNAL mma_xvi8ger4spp {mma,quad}
 
   void __builtin_mma_xxmfacc (v512 *);
-    XXMFACC mma_xxmfacc {mma,quad}
+    XXMFACC nothing {mma,quad}
+
+  void __builtin_mma_xxmfacc_internal (v512 *);
+    XXMFACC_INTERNAL mma_xxmfacc {mma,quad}
 
   void __builtin_mma_xxmtacc (v512 *);
-    XXMTACC mma_xxmtacc {mma,quad}
+    XXMTACC nothing {mma,quad}
+
+  void __builtin_mma_xxmtacc_internal (v512 *);
+    XXMTACC_INTERNAL mma_xxmtacc {mma,quad}
 
   void __builtin_mma_xxsetaccz (v512 *);
-    XXSETACCZ mma_xxsetaccz {mma}
+    XXSETACCZ nothing {mma}
+
+  void __builtin_mma_xxsetaccz_internal (v512 *);
+    XXSETACCZ_INTERNAL mma_xxsetaccz {mma}


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-09-16 21:31 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-09-16 21:31 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8b1c0816bf9eb968b1a37a7889518f497e2bbbb8

commit 8b1c0816bf9eb968b1a37a7889518f497e2bbbb8
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Sun Aug 30 10:28:28 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-08-30  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 127 ++++++++++++++++++-------------
 1 file changed, 72 insertions(+), 55 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index c0f8821be64..565c14f9f4c 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -321,9 +321,7 @@
   pure vuc __builtin_altivec_lvsr (signed long long, void *);
     LVSR altivec_lvsr {ldvec}
 
-; Following LVX one is redundant, and I don't think we need to
-; keep it.  It only maps to LVX_V4SI.  Probably remove.
-  pure vop __builtin_altivec_lvx (signed long long, void *);
+  pure vsi __builtin_altivec_lvx (signed long long, void *);
     LVX altivec_lvx_v4si {ldvec}
 
   pure vsc __builtin_altivec_lvx_v16qi (signed long long, void *);
@@ -395,7 +393,8 @@
   void __builtin_altivec_stvrxl (vop, signed long long, void *);
     STVRXL altivec_stvrxl {stvec}
 
-; Skipping the STVX one that maps to STVX_V4SI (see above for LVX)
+  void __builtin_altivec_stvx (vsi, signed long long, void *);
+    STVX altivec_stvx_v4si {stvec}
 
   void __builtin_altivec_stvx_v16qi (vsc, signed long long, void *);
     STVX_V16QI altivec_stvx_v16qi {stvec}
@@ -409,7 +408,8 @@
   void __builtin_altivec_stvx_v8hi (vss, signed long long, void *);
     STVX_V8HI altivec_stvx_v8hi {stvec}
 
-; Skipping the STVXL one that maps to STVXL_V4SI (see above for LVX)
+  void __builtin_altivec_stvxl (vsi, signed long long, void *);
+    STVXL altivec_stvxl_v4si {stvec}
 
   void __builtin_altivec_stvxl_v16qi (vsc, signed long long, void *);
     STVXL_V16QI altivec_stvxl_v16qi {stvec}
@@ -1206,6 +1206,33 @@
   const vull __builtin_altivec_vxor_v2di_uns (vull, vull);
     VXOR_V2DI_UNS xorv2di3 {}
 
+  const signed __int128 __builtin_vec_ext_v1ti (vsq, signed int);
+    VEC_EXT_V1TI nothing {extract}
+
+  const double __builtin_vec_ext_v2df (vd, signed int);
+    VEC_EXT_V2DF nothing {extract}
+
+  const signed long long __builtin_vec_ext_v2di (vsll, signed int);
+    VEC_EXT_V2DI nothing {extract}
+
+  const vsq __builtin_vec_init_v1ti (signed __int128);
+    VEC_INIT_V1TI nothing {init}
+
+  const vd __builtin_vec_init_v2df (double, double);
+    VEC_INIT_V2DF nothing {init}
+
+  const vsll __builtin_vec_init_v2di (signed long long, signed long long);
+    VEC_INIT_V2DI nothing {init}
+
+  const vsq __builtin_vec_set_v1ti (vsq, signed __int128, const int<0,0>);
+    VEC_SET_V1TI nothing {set}
+
+  const vd __builtin_vec_set_v2df (vd, double, const int<1>);
+    VEC_SET_V2DF nothing {set}
+
+  const vsll __builtin_vec_set_v2di (vsll, signed long long, const int<1>);
+    VEC_SET_V2DI nothing {set}
+
   const vbc __builtin_vsx_cmpge_16qi (vsc, vsc);
     CMPGE_16QI vector_nltv16qi {}
 
@@ -1348,7 +1375,7 @@
   pure vsll __builtin_vsx_lxvd2x_v2di (signed long long, void *);
     LXVD2X_V2DI vsx_load_v2di {ldvec}
 
-  pure vsc __builtin_vsx_lxvw4x_16qi (signed long long, void *);
+  pure vsc __builtin_vsx_lxvw4x_v16qi (signed long long, void *);
     LXVW4X_V16QI vsx_load_v16qi {ldvec}
 
   pure vf __builtin_vsx_lxvw4x_v4sf (signed long long, void *);
@@ -1467,33 +1494,6 @@
   const vf __builtin_vsx_uns_floato_v2di (vull);
     UNS_FLOATO_V2DI unsfloatov2di {}
 
-  const signed __int128 __builtin_vsx_vec_ext_v1ti (vsq, signed int);
-    VEC_EXT_V1TI nothing {extract}
-
-  const double __builtin_vsx_vec_ext_v2df (vd, signed int);
-    VEC_EXT_V2DF nothing {extract}
-
-  const signed long long __builtin_vsx_vec_ext_v2di (vsll, signed int);
-    VEC_EXT_V2DI nothing {extract}
-
-  const vsq __builtin_vsx_vec_init_v1ti (signed __int128);
-    VEC_INIT_V1TI nothing {init}
-
-  const vd __builtin_vsx_vec_init_v2df (double, double);
-    VEC_INIT_V2DF nothing {init}
-
-  const vsll __builtin_vsx_vec_init_v2di (signed long long, signed long long);
-    VEC_INIT_V2DI nothing {init}
-
-  const vsq __builtin_vsx_vec_set_v1ti (vsq, signed __int128, const int<0,0>);
-    VEC_SET_V1TI nothing {set}
-
-  const vd __builtin_vsx_vec_set_v2df (vd, double, const int<1>);
-    VEC_SET_V2DF nothing {set}
-
-  const vsll __builtin_vsx_vec_set_v2di (vsll, signed long long, const int<1>);
-    VEC_SET_V2DI nothing {set}
-
   const vsll __builtin_vsx_vsigned_v2df (vd);
     VEC_VSIGNED_V2DF vsx_xvcvdpsxds {}
 
@@ -2753,24 +2753,18 @@
 
 ; Builtins requiring hardware support for IEEE-128 floating-point.
 [ieee128-hw]
-  fpmath _Float128 __builtin_vsx_addf128_round_to_odd (_Float128, _Float128);
+  fpmath _Float128 __builtin_addf128_round_to_odd (_Float128, _Float128);
     ADDF128_ODD addkf3_odd {}
 
-  fpmath _Float128 __builtin_vsx_divf128_round_to_odd (_Float128, _Float128);
+  fpmath _Float128 __builtin_divf128_round_to_odd (_Float128, _Float128);
     DIVF128_ODD divkf3_odd {}
 
-  fpmath _Float128 __builtin_vsx_fmaf128_round_to_odd (_Float128, _Float128, _Float128);
+  fpmath _Float128 __builtin_fmaf128_round_to_odd (_Float128, _Float128, _Float128);
     FMAF128_ODD fmakf4_odd {}
 
-  fpmath _Float128 __builtin_vsx_mulf128_round_to_odd (_Float128, _Float128);
+  fpmath _Float128 __builtin_mulf128_round_to_odd (_Float128, _Float128);
     MULF128_ODD mulkf3_odd {}
 
-  const unsigned long long __builtin_vsx_scalar_extract_expq (_Float128);
-    VSEEQP xsxexpqp_kf {}
-
-  const unsigned __int128 __builtin_vsx_scalar_extract_sigq (_Float128);
-    VSESQP xsxsigqp_kf {}
-
   const signed int __builtin_vsx_scalar_cmp_exp_qp_eq (_Float128, _Float128);
     VSCEQPEQ xscmpexpqp_eq_kf {}
 
@@ -2783,6 +2777,21 @@
   const signed int __builtin_vsx_scalar_cmp_exp_qp_unordered (_Float128, _Float128);
     VSCEQPUO xscmpexpqp_unordered_kf {}
 
+  fpmath _Float128 __builtin_sqrtf128_round_to_odd (_Float128);
+    SQRTF128_ODD sqrtkf2_odd {}
+
+  fpmath _Float128 __builtin_subf128_round_to_odd (_Float128, _Float128);
+    SUBF128_ODD subkf3_odd {}
+
+  fpmath double __builtin_truncf128_round_to_odd (_Float128);
+    TRUNCF128_ODD trunckfdf2_odd {}
+
+  const unsigned long long __builtin_vsx_scalar_extract_expq (_Float128);
+    VSEEQP xsxexpqp_kf {}
+
+  const unsigned __int128 __builtin_vsx_scalar_extract_sigq (_Float128);
+    VSESQP xsxsigqp_kf {}
+
   const _Float128 __builtin_vsx_scalar_insert_exp_q (unsigned __int128, unsigned long long);
     VSIEQP xsiexpqp_kf {}
 
@@ -2795,15 +2804,6 @@
   const unsigned int __builtin_vsx_scalar_test_neg_qp (_Float128);
     VSTDCNQP xststdcnegqp_kf {}
 
-  fpmath _Float128 __builtin_vsx_sqrtf128_round_to_odd (_Float128);
-    SQRTF128_ODD sqrtkf2_odd {}
-
-  fpmath _Float128 __builtin_vsx_subf128_round_to_odd (_Float128, _Float128);
-    SUBF128_ODD subkf3_odd {}
-
-  fpmath double __builtin_vsx_truncf128_round_to_odd (_Float128);
-    TRUNCF128_ODD trunckfdf2_odd {}
-
 
 
 ; Decimal floating-point builtins.
@@ -3132,12 +3132,26 @@
     PEXTD pextd {}
 
 
+; TODO: Land-mine alert.
+;
+; The original built-in support has code that assumes the internal
+; copy of an MMA built-in function appears immediately after the
+; external copy in the built-in table.  This is fragile.  For the
+; new support, we should transition this to do a name lookup in
+; the built-in hash table, but to start with we will honor the
+; positioning of the built-ins in the table.
 [mma]
   void __builtin_mma_assemble_acc (v512 *, vuc, vuc, vuc, vuc);
-    ASSEMBLE_ACC mma_assemble_acc {mma}
+    ASSEMBLE_ACC nothing {mma}
+
+  void __builtin_mma_assemble_acc_internal (v512 *, vuc, vuc, vuc, vuc);
+    ASSEMBLE_ACC_INTERNAL mma_assemble_acc {mma}
 
   void __builtin_mma_assemble_pair (v256 *, vuc, vuc);
-    ASSEMBLE_PAIR mma_assemble_pair {mma}
+    ASSEMBLE_PAIR nothing {mma}
+
+  void __builtin_mma_assemble_pair_internal (v256 *, vuc, vuc);
+    ASSEMBLE_PAIR_INTERNAL mma_assemble_pair {mma}
 
   void __builtin_mma_disassemble_acc (void *, v512 *);
     DISASSEMBLE_ACC nothing {mma,quad}
@@ -3146,7 +3160,10 @@
     DISASSEMBLE_PAIR nothing {mma,pair}
 
   void __builtin_mma_pmxvbf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2 mma_pmxvbf16ger2 {mma}
+    PMXVBF16GER2 nothing {mma}
+
+  void __builtin_mma_pmxvbf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma}
 
   void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NN mma_pmxvbf16ger2nn {mma,quad}


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-09-15 16:54 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-09-15 16:54 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:5e04ba35bd56319e887f8c142abe5aaaaad2ba82

commit 5e04ba35bd56319e887f8c142abe5aaaaad2ba82
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Tue Sep 15 11:53:33 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-09-15  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.
            * config/rs6000/rs6000-gen-builtins.c (type_map): Change
            "long_double" to "float128".

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 135 +++++++++++++++----------------
 gcc/config/rs6000/rs6000-gen-builtins.c  |   2 +-
 2 files changed, 65 insertions(+), 72 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index 6b66908d7c7..5e41e9e3316 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -169,6 +169,7 @@
   void __builtin_cpu_init ();
     CPU_INIT nothing {cpu}
 
+  ; TODO: Following two should return bool?
   unsigned int __builtin_cpu_is (const char *);
     CPU_IS nothing {cpu}
 
@@ -234,7 +235,7 @@
 
 ; Power6 builtins.
 [power6]
-  const signed int __builtin_p6_cmpb (signed int, signed int);
+  const signed long long __builtin_p6_cmpb (signed long long, signed long long);
     CMPB cmpbdi3 {}
 
   const signed int __builtin_p6_cmpb_32 (signed int, signed int);
@@ -810,7 +811,7 @@
   const vuc __builtin_altivec_vpkshus (vss, vss);
     VPKSHUS altivec_vpkshus {}
 
-  const vsi __builtin_altivec_vpkswss (vsi, vsi);
+  const vss __builtin_altivec_vpkswss (vsi, vsi);
     VPKSWSS altivec_vpkswss {}
 
   const vus __builtin_altivec_vpkswus (vsi, vsi);
@@ -1539,10 +1540,10 @@
   const vsi __builtin_vsx_vsigned_v4sf (vf);
     VEC_VSIGNED_V4SF vsx_xvcvspsxws {}
 
-  const vsll __builtin_vsx_vsignede_v2df (vd);
+  const vsi __builtin_vsx_vsignede_v2df (vd);
     VEC_VSIGNEDE_V2DF vsignede_v2df {}
 
-  const vsll __builtin_vsx_vsignedo_v2df (vd);
+  const vsi __builtin_vsx_vsignedo_v2df (vd);
     VEC_VSIGNEDO_V2DF vsignedo_v2df {}
 
   const vull __builtin_vsx_vunsigned_v2df (vd);
@@ -1560,7 +1561,7 @@
   const vf __builtin_vsx_xscvdpsp (vd);
     XSCVDPSP vsx_xscvdpsp {}
 
-  const vd __builtin_vsx_xscvspdp (vf);
+  const double __builtin_vsx_xscvspdp (float);
     XSCVSPDP vsx_xscvspdp {}
 
   const double __builtin_vsx_xsmaxdp (double, double);
@@ -1569,19 +1570,19 @@
   const double __builtin_vsx_xsmindp (double, double);
     XSMINDP smindf3 {}
 
-  const vd __builtin_vsx_xsrdpi (vd);
+  const double __builtin_vsx_xsrdpi (double);
     XSRDPI vsx_xsrdpi {}
 
-  const vd __builtin_vsx_xsrdpic (vd);
+  const double __builtin_vsx_xsrdpic (double);
     XSRDPIC vsx_xsrdpic {}
 
-  const vd __builtin_vsx_xsrdpim (vd);
+  const double __builtin_vsx_xsrdpim (double);
     XSRDPIM floordf2 {}
 
-  const vd __builtin_vsx_xsrdpip (vd);
+  const double __builtin_vsx_xsrdpip (double);
     XSRDPIP ceildf2 {}
 
-  const vd __builtin_vsx_xsrdpiz (vd);
+  const double __builtin_vsx_xsrdpiz (double);
     XSRDPIZ btruncdf2 {}
 
   const unsigned int __builtin_vsx_xstdivdp_fe (vd, vd);
@@ -1611,49 +1612,37 @@
   const vbll __builtin_vsx_xvcmpeqdp (vd, vd);
     XVCMPEQDP vector_eqv2df {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vull __builtin_vsx_xvcmpeqdp_p (vd);
+  const signed int __builtin_vsx_xvcmpeqdp_p (vd);
     XVCMPEQDP_P vector_eq_v2df_p {pred}
 
   const vbi __builtin_vsx_xvcmpeqsp (vf, vf);
     XVCMPEQSP vector_eqv4sf {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vui __builtin_vsx_xvcmpeqsp_p (vf);
+  const signed int __builtin_vsx_xvcmpeqsp_p (vf);
     XVCMPEQSP_P vector_eq_v4sf_p {pred}
 
   const vbll __builtin_vsx_xvcmpgedp (vd, vd);
     XVCMPGEDP vector_gev2df {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vull __builtin_vsx_xvcmpgedp_p (vd);
+  const signed int __builtin_vsx_xvcmpgedp_p (vd);
     XVCMPGEDP_P vector_ge_v2df_p {pred}
 
   const vbi __builtin_vsx_xvcmpgesp (vf, vf);
     XVCMPGESP vector_gev4sf {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vui __builtin_vsx_xvcmpgesp_p (vf);
+  const signed int __builtin_vsx_xvcmpgesp_p (vf);
     XVCMPGESP_P vector_ge_v4sf_p {pred}
 
   const vbll __builtin_vsx_xvcmpgtdp (vd, vd);
     XVCMPGTDP vector_gtv2df {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vull __builtin_vsx_xvcmpgtdp_p (vd);
+  const signed int __builtin_vsx_xvcmpgtdp_p (vd);
     XVCMPGTDP_P vector_gt_v2df_p {pred}
 
   const vbi __builtin_vsx_xvcmpgtsp (vf, vf);
     XVCMPGTSP vector_gtv4sf {}
 
-; This predicate isn't used in the ALL or ANY interfaces; it appears
-; to return a vector rather than an integer as other predicates do.
-  const vui __builtin_vsx_xvcmpgtsp_p (vf, vf);
+  const signed int __builtin_vsx_xvcmpgtsp_p (vf, vf);
     XVCMPGTSP_P vector_gt_v4sf_p {pred}
 
   const vf __builtin_vsx_xvcvdpsp (vd);
@@ -1665,7 +1654,7 @@
   const vsll __builtin_vsx_xvcvdpsxds_scale (vd, const int);
     XVCVDPSXDS_SCALE vsx_xvcvdpsxds_scale {}
 
-  const vsll __builtin_vsx_xvcvdpsxws (vd);
+  const vsi __builtin_vsx_xvcvdpsxws (vd);
     XVCVDPSXWS vsx_xvcvdpsxws {}
 
   const vull __builtin_vsx_xvcvdpuxds (vd);
@@ -1678,7 +1667,7 @@
   const vull __builtin_vsx_xvcvdpuxds_uns (vd);
     XVCVDPUXDS_UNS vsx_fixuns_truncv2dfv2di2 {}
 
-  const vull __builtin_vsx_xvcvdpuxws (vd);
+  const vui __builtin_vsx_xvcvdpuxws (vd);
     XVCVDPUXWS vsx_xvcvdpuxws {}
 
   const vd __builtin_vsx_xvcvspdp (vf);
@@ -1736,7 +1725,7 @@
   const vf __builtin_vsx_xvcvuxwsp (vui);
     XVCVUXWSP_V4SF vsx_xvcvuxwsp {}
 
-  fpmath vf __builtin_vsx_xvdivdp (vf, vf);
+  fpmath vd __builtin_vsx_xvdivdp (vd, vd);
     XVDIVDP divv2df3 {}
 
   fpmath vf __builtin_vsx_xvdivsp (vf, vf);
@@ -1895,7 +1884,7 @@
   const vf __builtin_vsx_xxmrglw (vf, vf);
     XXMRGLW_4SF vsx_xxmrglw_v4sf {}
 
-  const vss __builtin_vsx_xxmrglw_4si (vsi, vsi);
+  const vsi __builtin_vsx_xxmrglw_4si (vsi, vsi);
     XXMRGLW_4SI vsx_xxmrglw_v4si {}
 
   const vsc __builtin_vsx_xxpermdi_16qi (vsc, vsc, const int<1>);
@@ -2268,7 +2257,8 @@
     VPKUDUS altivec_vpkudus {}
 
 ; #### Following are duplicates of __builtin_crypto_vpmsum*.  This
-; can't have ever worked properly!
+; can't have ever worked properly!  However, these have the right
+; result types, and the others have wrong ones!!  Whatwhatwhat...
 ;
 ;  const vus __builtin_altivec_vpmsumb (vuc, vuc);
 ;    VPMSUMB crypto_vpmsumb {}
@@ -2282,13 +2272,13 @@
 ;  const vull __builtin_altivec_vpmsumw (vui, vui);
 ;    VPMSUMW crypto_vpmsumw {}
 
-  const vuc __builtin_altivec_vpopcntb (vsc);
+  const vsc __builtin_altivec_vpopcntb (vsc);
     VPOPCNTB popcountv16qi2 {}
 
-  const vull __builtin_altivec_vpopcntd (vsll);
+  const vsll __builtin_altivec_vpopcntd (vsll);
     VPOPCNTD popcountv2di2 {}
 
-  const vus __builtin_altivec_vpopcnth (vss);
+  const vss __builtin_altivec_vpopcnth (vss);
     VPOPCNTH popcountv8hi2 {}
 
   const vuc __builtin_altivec_vpopcntub (vuc);
@@ -2303,7 +2293,7 @@
   const vui __builtin_altivec_vpopcntuw (vui);
     VPOPCNTUW popcountv4si2 {}
 
-  const vui __builtin_altivec_vpopcntw (vsi);
+  const vsi __builtin_altivec_vpopcntw (vsi);
     VPOPCNTW popcountv4si2 {}
 
   const vsll __builtin_altivec_vrld (vsll, vull);
@@ -2342,31 +2332,31 @@
   const vsq __builtin_bcdadd (vsq, vsq, const int<1>);
     BCDADD bcdadd {}
 
-  const unsigned int __builtin_bcdadd_eq (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_eq (vsq, vsq, const int<1>);
     BCDADD_EQ bcdadd_eq {}
 
-  const unsigned int __builtin_bcdadd_gt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_gt (vsq, vsq, const int<1>);
     BCDADD_GT bcdadd_gt {}
 
-  const unsigned int __builtin_bcdadd_lt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_lt (vsq, vsq, const int<1>);
     BCDADD_LT bcdadd_lt {}
 
-  const unsigned int __builtin_bcdadd_ov (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdadd_ov (vsq, vsq, const int<1>);
     BCDADD_OV bcdadd_unordered {}
 
   const vsq __builtin_bcdsub (vsq, vsq, const int<1>);
     BCDSUB bcdsub {}
 
-  const unsigned int __builtin_bcdsub_eq (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_eq (vsq, vsq, const int<1>);
     BCDSUB_EQ bcdsub_eq {}
 
-  const unsigned int __builtin_bcdsub_gt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_gt (vsq, vsq, const int<1>);
     BCDSUB_GT bcdsub_gt {}
 
-  const unsigned int __builtin_bcdsub_lt (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_lt (vsq, vsq, const int<1>);
     BCDSUB_LT bcdsub_lt {}
 
-  const unsigned int __builtin_bcdsub_ov (vsq, vsq, const int<1>);
+  const signed int __builtin_bcdsub_ov (vsq, vsq, const int<1>);
     BCDSUB_OV bcdsub_unordered {}
 
   const vuc __builtin_crypto_vpermxor_v16qi (vuc, vuc, vuc);
@@ -2381,16 +2371,19 @@
   const vus __builtin_crypto_vpermxor_v8hi (vus, vus, vus);
     VPERMXOR_V8HI crypto_vpermxor_v8hi {}
 
-  const vus __builtin_crypto_vpmsumb (vuc, vuc);
+; Note: these four have incorrect return types per the way the
+; instructions work, but this matches the old signatures that
+; have been around for too long.
+  const vuc __builtin_crypto_vpmsumb (vuc, vuc);
     VPMSUMB crypto_vpmsumb {}
 
-  const vuq __builtin_crypto_vpmsumd (vull, vull);
+  const vull __builtin_crypto_vpmsumd (vull, vull);
     VPMSUMD crypto_vpmsumd {}
 
-  const vui __builtin_crypto_vpmsumh (vus, vus);
+  const vus __builtin_crypto_vpmsumh (vus, vus);
     VPMSUMH crypto_vpmsumh {}
 
-  const vull __builtin_crypto_vpmsumw (vui, vui);
+  const vui __builtin_crypto_vpmsumw (vui, vui);
     VPMSUMW crypto_vpmsumw {}
 
   const vf __builtin_vsx_float2_v2df (vd, vd);
@@ -2615,16 +2608,16 @@
   const vsi __builtin_altivec_vprtybw (vsi);
     VPRTYBW parityv4si2 {}
 
-  const vull __builtin_altivec_vrldmi (vull, vull, vull);
+  const vsll __builtin_altivec_vrldmi (vsll, vsll, vsll);
     VRLDMI altivec_vrldmi {}
 
-  const vull __builtin_altivec_vrldnm (vull, vull);
+  const vsll __builtin_altivec_vrldnm (vsll, vsll);
     VRLDNM altivec_vrldnm {}
 
-  const vui __builtin_altivec_vrlwmi (vui, vui, vui);
+  const vsi __builtin_altivec_vrlwmi (vsi, vsi, vsi);
     VRLWMI altivec_vrlwmi {}
 
-  const vui __builtin_altivec_vrlwnm (vui, vui);
+  const vsi __builtin_altivec_vrlwnm (vsi, vsi);
     VRLWNM altivec_vrlwnm {}
 
   const vuc __builtin_altivec_vslv (vuc, vuc);
@@ -2687,10 +2680,10 @@
   const unsigned int __builtin_vsx_scalar_test_neg_sp (float);
     VSTDCNSP xststdcnegsp {}
 
-  const unsigned long long __builtin_vsx_test_data_class_dp (vd, signed int);
+  const vbll __builtin_vsx_test_data_class_dp (vd, signed int);
     VTDCDP xvtstdcdp {}
 
-  const unsigned int __builtin_vsx_test_data_class_sp (vf, signed int);
+  const vbi __builtin_vsx_test_data_class_sp (vf, signed int);
     VTDCSP xvtstdcsp {}
 
   const vf __builtin_vsx_vextract_fp_from_shorth (vus);
@@ -2735,28 +2728,28 @@
   double __builtin_mffsl ();
     MFFSL rs6000_mffsl {}
 
-  const signed int __builtin_dfp_dtstsfi_eq_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_eq_dd (unsigned int, _Decimal64);
     TSTSFI_EQ_DD dfptstsfi_eq_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_eq_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_eq_td (unsigned int, _Decimal128);
     TSTSFI_EQ_TD dfptstsfi_eq_td {}
 
-  const signed int __builtin_dfp_dtstsfi_gt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_gt_dd (unsigned int, _Decimal64);
     TSTSFI_GT_DD dfptstsfi_gt_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_gt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_gt_td (unsigned int, _Decimal128);
     TSTSFI_GT_TD dfptstsfi_gt_td {}
 
-  const signed int __builtin_dfp_dtstsfi_lt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_lt_dd (unsigned int, _Decimal64);
     TSTSFI_LT_DD dfptstsfi_lt_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_lt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_lt_td (unsigned int, _Decimal128);
     TSTSFI_LT_TD dfptstsfi_lt_td {}
 
-  const signed int __builtin_dfp_dtstsfi_ov_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dtstsfi_ov_dd (unsigned int, _Decimal64);
     TSTSFI_OV_DD dfptstsfi_unordered_dd {}
 
-  const signed int __builtin_dfp_dtstsfi_ov_td (unsigned int, _Decimal128);
+  const signed int __builtin_dtstsfi_ov_td (unsigned int, _Decimal128);
     TSTSFI_OV_TD dfptstsfi_unordered_td {}
 
 
@@ -3219,25 +3212,25 @@
   const vss __builtin_altivec_vsrdb_v8hi (vss, vss, const int<3>);
     VSRDB_V8HI vsrdb_v8hi {}
 
-  const vuc __builtin_altivec_vstribl (vuc);
+  const vsc __builtin_altivec_vstribl (vsc);
     VSTRIBL vstril_v16qi {}
 
   const signed int __builtin_altivec_vstribl_p (vuc);
     VSTRIBL_P vstril_p_v16qi {}
 
-  const vuc __builtin_altivec_vstribr (vuc);
+  const vsc __builtin_altivec_vstribr (vsc);
     VSTRIBR vstrir_v16qi {}
 
   const signed int __builtin_altivec_vstribr_p (vuc);
     VSTRIBR_P vstrir_p_v16qi {}
 
-  const vus __builtin_altivec_vstrihl (vus);
+  const vss __builtin_altivec_vstrihl (vss);
     VSTRIHL vstril_v8hi {}
 
   const signed int __builtin_altivec_vstrihl_p (vus);
     VSTRIHL_P vstril_p_v8hi {}
 
-  const vus __builtin_altivec_vstrihr (vus);
+  const vss __builtin_altivec_vstrihr (vss);
     VSTRIHR vstrir_v8hi {}
 
   const signed int __builtin_altivec_vstrihr_p (vus);
@@ -3270,22 +3263,22 @@
   const vuc __builtin_vsx_xvcvspbf16 (vuc);
     XVCVSPBF16 vsx_xvcvspbf16 {}
 
-  const vsc __builtin_vsx_xxblend_v16qi (vsc, vsc, vuc);
+  const vuc __builtin_vsx_xxblend_v16qi (vuc, vuc, vuc);
     VXXBLEND_V16QI xxblend_v16qi {}
 
   const vd __builtin_vsx_xxblend_v2df (vd, vd, vull);
     VXXBLEND_V2DF xxblend_v2df {}
 
-  const vsll __builtin_vsx_xxblend_v2di (vsll, vsll, vull);
+  const vull __builtin_vsx_xxblend_v2di (vull, vull, vull);
     VXXBLEND_V2DI xxblend_v2di {}
 
   const vf __builtin_vsx_xxblend_v4sf (vf, vf, vui);
     VXXBLEND_V4SF xxblend_v4sf {}
 
-  const vsi __builtin_vsx_xxblend_v4si (vsi, vsi, vui);
+  const vui __builtin_vsx_xxblend_v4si (vui, vui, vui);
     VXXBLEND_V4SI xxblend_v4si {}
 
-  const vss __builtin_vsx_xxblend_v8hi (vss, vss, vus);
+  const vus __builtin_vsx_xxblend_v8hi (vus, vus, vus);
     VXXBLEND_V8HI xxblend_v8hi {}
 
   const vop __builtin_vsx_xxeval (vop, vop, vop, const int <8>);
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.c b/gcc/config/rs6000/rs6000-gen-builtins.c
index ba64d90cda3..2f317d761d7 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.c
+++ b/gcc/config/rs6000/rs6000-gen-builtins.c
@@ -458,7 +458,7 @@ static typemap type_map[TYPE_MAP_SIZE] =
     { "sf",	"float" },
     { "si",	"intSI" },
     { "td",	"dfloat128" },
-    { "tf",	"long_double" },
+    { "tf",	"float128" },
     { "ti",	"intTI" },
     { "udi",	"unsigned_intDI" },
     { "uhi",	"unsigned_intHI" },


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-09-14 14:00 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-09-14 14:00 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:7f6b6c13666810447431303f04111c83f7296330

commit 7f6b6c13666810447431303f04111c83f7296330
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Sun Sep 13 18:21:20 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-09-13  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 195 ++++++++++++++++---------------
 1 file changed, 99 insertions(+), 96 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index 755399b1843..6b66908d7c7 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -531,73 +531,73 @@
   const int __builtin_altivec_vcmpbfp_p (int, vf, vf);
     VCMPBFP_P altivec_vcmpbfp_p {pred}
 
-  const vbi __builtin_altivec_vcmpeqfp (vf, vf);
+  const vf __builtin_altivec_vcmpeqfp (vf, vf);
     VCMPEQFP vector_eqv4sf {}
 
   const int __builtin_altivec_vcmpeqfp_p (int, vf, vf);
     VCMPEQFP_P vector_eq_v4sf_p {pred}
 
-  const vbc __builtin_altivec_vcmpequb (vuc, vuc);
+  const vsc __builtin_altivec_vcmpequb (vuc, vuc);
     VCMPEQUB vector_eqv16qi {}
 
   const int __builtin_altivec_vcmpequb_p (int, vuc, vuc);
     VCMPEQUB_P vector_eq_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpequh (vus, vus);
+  const vss __builtin_altivec_vcmpequh (vus, vus);
     VCMPEQUH vector_eqv8hi {}
 
   const int __builtin_altivec_vcmpequh_p (int, vus, vus);
     VCMPEQUH_P vector_eq_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpequw (vui, vui);
+  const vsi __builtin_altivec_vcmpequw (vui, vui);
     VCMPEQUW vector_eqv4si {}
 
   const int __builtin_altivec_vcmpequw_p (int, vui, vui);
     VCMPEQUW_P vector_eq_v4si_p {pred}
 
-  const vbi __builtin_altivec_vcmpgefp (vf, vf);
+  const vf __builtin_altivec_vcmpgefp (vf, vf);
     VCMPGEFP vector_gev4sf {}
 
   const int __builtin_altivec_vcmpgefp_p (int, vf, vf);
     VCMPGEFP_P vector_ge_v4sf_p {pred}
 
-  const vbi __builtin_altivec_vcmpgtfp (vf, vf);
+  const vf __builtin_altivec_vcmpgtfp (vf, vf);
     VCMPGTFP vector_gtv4sf {}
 
   const int __builtin_altivec_vcmpgtfp_p (int, vf, vf);
     VCMPGTFP_P vector_gt_v4sf_p {pred}
 
-  const vbc __builtin_altivec_vcmpgtsb (vsc, vsc);
+  const vsc __builtin_altivec_vcmpgtsb (vsc, vsc);
     VCMPGTSB vector_gtv16qi {}
 
   const int __builtin_altivec_vcmpgtsb_p (int, vsc, vsc);
     VCMPGTSB_P vector_gt_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpgtsh (vss, vss);
+  const vss __builtin_altivec_vcmpgtsh (vss, vss);
     VCMPGTSH vector_gtv8hi {}
 
   const int __builtin_altivec_vcmpgtsh_p (int, vss, vss);
     VCMPGTSH_P vector_gt_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpgtsw (vsi, vsi);
+  const vsi __builtin_altivec_vcmpgtsw (vsi, vsi);
     VCMPGTSW vector_gtv4si {}
 
   const int __builtin_altivec_vcmpgtsw_p (int, vsi, vsi);
     VCMPGTSW_P vector_gt_v4si_p {pred}
 
-  const vbc __builtin_altivec_vcmpgtub (vuc, vuc);
+  const vsc __builtin_altivec_vcmpgtub (vuc, vuc);
     VCMPGTUB vector_gtuv16qi {}
 
   const int __builtin_altivec_vcmpgtub_p (int, vuc, vuc);
     VCMPGTUB_P vector_gtu_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpgtuh (vus, vus);
+  const vss __builtin_altivec_vcmpgtuh (vus, vus);
     VCMPGTUH vector_gtuv8hi {}
 
   const int __builtin_altivec_vcmpgtuh_p (int, vus, vus);
     VCMPGTUH_P vector_gtu_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpgtuw (vui, vui);
+  const vsi __builtin_altivec_vcmpgtuw (vui, vui);
     VCMPGTUW vector_gtuv4si {}
 
   const int __builtin_altivec_vcmpgtuw_p (int, vui, vui);
@@ -2162,7 +2162,7 @@
   const vsi __builtin_altivec_vclzw (vsi);
     VCLZW clzv4si2 {}
 
-  const vsc __builtin_altivec_vgbbd (vsc);
+  const vuc __builtin_altivec_vgbbd (vuc);
     VGBBD p8v_vgbbd {}
 
   const vsq __builtin_altivec_vaddcuq (vsq, vsq);
@@ -2186,19 +2186,19 @@
   const vuc __builtin_altivec_vbpermq2 (vuc, vuc);
     VBPERMQ2 altivec_vbpermq2 {}
 
-  const vbll __builtin_altivec_vcmpequd (vsll, vsll);
+  const vsll __builtin_altivec_vcmpequd (vull, vull);
     VCMPEQUD vector_eqv2di {}
 
   const int __builtin_altivec_vcmpequd_p (int, vsll, vsll);
     VCMPEQUD_P vector_eq_v2di_p {pred}
 
-  const vbll __builtin_altivec_vcmpgtsd (vsll, vsll);
+  const vsll __builtin_altivec_vcmpgtsd (vsll, vsll);
     VCMPGTSD vector_gtv2di {}
 
   const int __builtin_altivec_vcmpgtsd_p (int, vsll, vsll);
     VCMPGTSD_P vector_gt_v2di_p {pred}
 
-  const vbll __builtin_altivec_vcmpgtud (vull, vull);
+  const vsll __builtin_altivec_vcmpgtud (vull, vull);
     VCMPGTUD vector_gtuv2di {}
 
   const int __builtin_altivec_vcmpgtud_p (vull, vull);
@@ -2540,7 +2540,7 @@
   const signed int __builtin_altivec_vcmpaew_p (vsi, vsi);
     VCMPAEW_P vector_ae_v4si_p {pred}
 
-  const vbc __builtin_altivec_vcmpneb (vsc, vsc);
+  const vsc __builtin_altivec_vcmpneb (vsc, vsc);
     CMPNEB vcmpneb {}
 
   const signed int __builtin_altivec_vcmpneb_p (vsc, vsc);
@@ -2555,31 +2555,31 @@
   const signed int __builtin_altivec_vcmpnefp_p (vf, vf);
     VCMPNEFP_P vector_ne_v4sf_p {pred}
 
-  const vbs __builtin_altivec_vcmpneh (vss, vss);
+  const vss __builtin_altivec_vcmpneh (vss, vss);
     CMPNEH vcmpneh {}
 
   const signed int __builtin_altivec_vcmpneh_p (vss, vss);
     VCMPNEH_P vector_ne_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpnew (vsi, vsi);
+  const vsi __builtin_altivec_vcmpnew (vsi, vsi);
     CMPNEW vcmpnew {}
 
   const signed int __builtin_altivec_vcmpnew_p (vsi, vsi);
     VCMPNEW_P vector_ne_v4si_p {pred}
 
-  const vbc __builtin_altivec_vcmpnezb (vsc, vsc);
+  const vsc __builtin_altivec_vcmpnezb (vsc, vsc);
     CMPNEZB vcmpnezb {}
 
   const signed int __builtin_altivec_vcmpnezb_p (signed int, vsc, vsc);
     VCMPNEZB_P vector_nez_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpnezh (vss, vss);
+  const vss __builtin_altivec_vcmpnezh (vss, vss);
     CMPNEZH vcmpnezh {}
 
   const signed int __builtin_altivec_vcmpnezh_p (signed int, vss, vss);
     VCMPNEZH_P vector_nez_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpnezw (vsi, vsi);
+  const vsi __builtin_altivec_vcmpnezw (vsi, vsi);
     CMPNEZW vcmpnezw {}
 
   const signed int __builtin_altivec_vcmpnezw_p (vsi, vsi);
@@ -3039,7 +3039,7 @@
   const vui __builtin_altivec_mtvsrwm (unsigned long long);
     MTVSRWM vec_mtvsr_v4si {}
 
-  const vuc __builtin_altivec_vcfuged (vuc, vuc);
+  const vull __builtin_altivec_vcfuged (vull, vull);
     VCFUGED vcfuged {}
 
   const vsc __builtin_altivec_vclrlb (vsc, unsigned int);
@@ -3048,10 +3048,10 @@
   const vsc __builtin_altivec_vclrrb (vsc, unsigned int);
     VCLRRB vclrrb {}
 
-  const vuc __builtin_altivec_vclzdm (vuc, vuc);
+  const vull __builtin_altivec_vclzdm (vull, vull);
     VCLZDM vclzdm {}
 
-  const vuc __builtin_altivec_vctzdm (vuc, vuc);
+  const vull __builtin_altivec_vctzdm (vull, vull);
     VCTZDM vctzdm {}
 
   const vuc __builtin_altivec_vexpandmb (vuc);
@@ -3093,19 +3093,19 @@
   const vull __builtin_altivec_vextduwvlx (vui, vui, unsigned char);
     VEXTRACTWL vextractlv4si {}
 
-  const unsigned int __builtin_altivec_vextractmb (vuc);
+  const signed int __builtin_altivec_vextractmb (vuc);
     VEXTRACTMB vec_extract_v16qi {}
 
-  const unsigned int __builtin_altivec_vextractmd (vull);
+  const signed int __builtin_altivec_vextractmd (vull);
     VEXTRACTMD vec_extract_v2di {}
 
-  const unsigned int __builtin_altivec_vextractmh (vus);
+  const signed int __builtin_altivec_vextractmh (vus);
     VEXTRACTMH vec_extract_v8hi {}
 
-  const unsigned int __builtin_altivec_vextractmq (vuq);
+  const signed int __builtin_altivec_vextractmq (vuq);
     VEXTRACTMQ vec_extract_v1ti {}
 
-  const unsigned int __builtin_altivec_vextractmw (vui);
+  const signed int __builtin_altivec_vextractmw (vui);
     VEXTRACTMW vec_extract_v4si {}
 
   const unsigned long long __builtin_altivec_vgnb (vuq, const int <2,7>);
@@ -3153,10 +3153,10 @@
   const vui __builtin_altivec_vinsvuwvrx (vui, vui, unsigned int);
     VINSERTVPRWR vinsertvr_v4si {}
 
-  const vuc __builtin_altivec_vpdepd (vuc, vuc);
+  const vull __builtin_altivec_vpdepd (vull, vull);
     VPDEPD vpdepd {}
 
-  const vuc __builtin_altivec_vpextd (vuc, vuc);
+  const vull __builtin_altivec_vpextd (vull, vull);
     VPEXTD vpextd {}
 
   const vull __builtin_altivec_vreplace_un_uv2di (vull, unsigned long long, const int<4>);
@@ -3360,18 +3360,21 @@
 ; external copy in the built-in table.  This is fragile.  For the
 ; new support, we should transition this to do a name lookup in
 ; the built-in hash table, but to start with we will honor the
-; positioning of the built-ins in the table.
+; positioning of the built-ins in the table.  Note that right now
+; there is going to be breakage with __builtin_mma_disassemble_{acc,pair}
+; since they each require a blank builtin to follow them with icode
+; CODE_FOR_nothing.
 [mma]
   void __builtin_mma_assemble_acc (v512 *, vuc, vuc, vuc, vuc);
     ASSEMBLE_ACC nothing {mma}
 
-  void __builtin_mma_assemble_acc_internal (v512 *, vuc, vuc, vuc, vuc);
+  v512 __builtin_mma_assemble_acc_internal (v512 *, vuc, vuc, vuc, vuc);
     ASSEMBLE_ACC_INTERNAL mma_assemble_acc {mma}
 
   void __builtin_mma_assemble_pair (v256 *, vuc, vuc);
     ASSEMBLE_PAIR nothing {mma}
 
-  void __builtin_mma_assemble_pair_internal (v256 *, vuc, vuc);
+  v256 __builtin_mma_assemble_pair_internal (v256 *, vuc, vuc);
     ASSEMBLE_PAIR_INTERNAL mma_assemble_pair {mma}
 
   void __builtin_mma_disassemble_acc (void *, v512 *);
@@ -3383,367 +3386,367 @@
   void __builtin_mma_pmxvbf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2 nothing {mma}
 
-  void __builtin_mma_pmxvbf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma}
 
   void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NN_INTERNAL mma_pmxvbf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NP_INTERNAL mma_pmxvbf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PN_INTERNAL mma_pmxvbf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PP_INTERNAL mma_pmxvbf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2 nothing {mma}
 
-  void __builtin_mma_pmxvf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2_INTERNAL mma_pmxvf16ger2 {mma}
 
   void __builtin_mma_pmxvf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NN_INTERNAL mma_pmxvf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NP_INTERNAL mma_pmxvf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PN_INTERNAL mma_pmxvf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PP_INTERNAL mma_pmxvf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf32ger (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GER nothing {mma}
 
-  void __builtin_mma_pmxvf32ger_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32ger_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GER_INTERNAL mma_pmxvf32ger {mma}
 
   void __builtin_mma_pmxvf32gernn (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gernn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gernn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNN_INTERNAL mma_pmxvf32gernn {mma,quad}
 
   void __builtin_mma_pmxvf32gernp (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gernp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gernp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNP_INTERNAL mma_pmxvf32gernp {mma,quad}
 
   void __builtin_mma_pmxvf32gerpn (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gerpn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gerpn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPN_INTERNAL mma_pmxvf32gerpn {mma,quad}
 
   void __builtin_mma_pmxvf32gerpp (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gerpp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gerpp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPP_INTERNAL mma_pmxvf32gerpp {mma,quad}
 
   void __builtin_mma_pmxvf64ger (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GER nothing {mma,pair}
 
-  void __builtin_mma_pmxvf64ger_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64ger_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GER_INTERNAL mma_pmxvf64ger {mma,pair}
 
   void __builtin_mma_pmxvf64gernn (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNN nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gernn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gernn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNN_INTERNAL mma_pmxvf64gernn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gernp (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNP nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gernp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gernp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNP_INTERNAL mma_pmxvf64gernp {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpn (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPN nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gerpn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gerpn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPN_INTERNAL mma_pmxvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpp (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPP nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gerpp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gerpp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPP_INTERNAL mma_pmxvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_pmxvi16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2 nothing {mma}
 
-  void __builtin_mma_pmxvi16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2_INTERNAL mma_pmxvi16ger2 {mma}
 
   void __builtin_mma_pmxvi16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2PP_INTERNAL mma_pmxvi16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvi16ger2s (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2S nothing {mma}
 
-  void __builtin_mma_pmxvi16ger2s_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2s_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2S_INTERNAL mma_pmxvi16ger2s {mma}
 
   void __builtin_mma_pmxvi16ger2spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2SPP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi16ger2spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2SPP_INTERNAL mma_pmxvi16ger2spp {mma,quad}
 
   void __builtin_mma_pmxvi4ger8 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
     PMXVI4GER8 nothing {mma}
 
-  void __builtin_mma_pmxvi4ger8_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
+  v512 __builtin_mma_pmxvi4ger8_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
     PMXVI4GER8_INTERNAL mma_pmxvi4ger8 {mma}
 
   void __builtin_mma_pmxvi4ger8pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI4GER8PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi4ger8pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi4ger8pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI4GER8PP_INTERNAL mma_pmxvi4ger8pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4 nothing {mma}
 
-  void __builtin_mma_pmxvi8ger4_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi8ger4_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4_INTERNAL mma_pmxvi8ger4 {mma}
 
   void __builtin_mma_pmxvi8ger4pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi8ger4pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi8ger4pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4PP_INTERNAL mma_pmxvi8ger4pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4SPP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi8ger4spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi8ger4spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4SPP_INTERNAL mma_pmxvi8ger4spp {mma,quad}
 
   void __builtin_mma_xvbf16ger2 (v512 *, vuc, vuc);
     XVBF16GER2 nothing {mma}
 
-  void __builtin_mma_xvbf16ger2_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2_internal (v512 *, vuc, vuc);
     XVBF16GER2_INTERNAL mma_xvbf16ger2 {mma}
 
   void __builtin_mma_xvbf16ger2nn (v512 *, vuc, vuc);
     XVBF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2nn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2nn_internal (v512 *, vuc, vuc);
     XVBF16GER2NN_INTERNAL mma_xvbf16ger2nn {mma,quad}
 
   void __builtin_mma_xvbf16ger2np (v512 *, vuc, vuc);
     XVBF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2np_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2np_internal (v512 *, vuc, vuc);
     XVBF16GER2NP_INTERNAL mma_xvbf16ger2np {mma,quad}
 
   void __builtin_mma_xvbf16ger2pn (v512 *, vuc, vuc);
     XVBF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2pn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2pn_internal (v512 *, vuc, vuc);
     XVBF16GER2PN_INTERNAL mma_xvbf16ger2pn {mma,quad}
 
   void __builtin_mma_xvbf16ger2pp (v512 *, vuc, vuc);
     XVBF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2pp_internal (v512 *, vuc, vuc);
     XVBF16GER2PP_INTERNAL mma_xvbf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf16ger2 (v512 *, vuc, vuc);
     XVF16GER2 nothing {mma}
 
-  void __builtin_mma_xvf16ger2_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2_internal (v512 *, vuc, vuc);
     XVF16GER2_INTERNAL mma_xvf16ger2 {mma}
 
   void __builtin_mma_xvf16ger2nn (v512 *, vuc, vuc);
     XVF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2nn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2nn_internal (v512 *, vuc, vuc);
     XVF16GER2NN_INTERNAL mma_xvf16ger2nn {mma,quad}
 
   void __builtin_mma_xvf16ger2np (v512 *, vuc, vuc);
     XVF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2np_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2np_internal (v512 *, vuc, vuc);
     XVF16GER2NP_INTERNAL mma_xvf16ger2np {mma,quad}
 
   void __builtin_mma_xvf16ger2pn (v512 *, vuc, vuc);
     XVF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2pn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2pn_internal (v512 *, vuc, vuc);
     XVF16GER2PN_INTERNAL mma_xvf16ger2pn {mma,quad}
 
   void __builtin_mma_xvf16ger2pp (v512 *, vuc, vuc);
     XVF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2pp_internal (v512 *, vuc, vuc);
     XVF16GER2PP_INTERNAL mma_xvf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf32ger (v512 *, vuc, vuc);
     XVF32GER nothing {mma}
 
-  void __builtin_mma_xvf32ger_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32ger_internal (v512 *, vuc, vuc);
     XVF32GER_INTERNAL mma_xvf32ger {mma}
 
   void __builtin_mma_xvf32gernn (v512 *, vuc, vuc);
     XVF32GERNN nothing {mma,quad}
 
-  void __builtin_mma_xvf32gernn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gernn_internal (v512 *, vuc, vuc);
     XVF32GERNN_INTERNAL mma_xvf32gernn {mma,quad}
 
   void __builtin_mma_xvf32gernp (v512 *, vuc, vuc);
     XVF32GERNP nothing {mma,quad}
 
-  void __builtin_mma_xvf32gernp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gernp_internal (v512 *, vuc, vuc);
     XVF32GERNP_INTERNAL mma_xvf32gernp {mma,quad}
 
   void __builtin_mma_xvf32gerpn (v512 *, vuc, vuc);
     XVF32GERPN nothing {mma,quad}
 
-  void __builtin_mma_xvf32gerpn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gerpn_internal (v512 *, vuc, vuc);
     XVF32GERPN_INTERNAL mma_xvf32gerpn {mma,quad}
 
   void __builtin_mma_xvf32gerpp (v512 *, vuc, vuc);
     XVF32GERPP nothing {mma,quad}
 
-  void __builtin_mma_xvf32gerpp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gerpp_internal (v512 *, vuc, vuc);
     XVF32GERPP_INTERNAL mma_xvf32gerpp {mma,quad}
 
   void __builtin_mma_xvf64ger (v512 *, v256, vuc);
     XVF64GER nothing {mma,pair}
 
-  void __builtin_mma_xvf64ger_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64ger_internal (v512 *, v256, vuc);
     XVF64GER_INTERNAL mma_xvf64ger {mma,pair}
 
   void __builtin_mma_xvf64gernn (v512 *, v256, vuc);
     XVF64GERNN nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gernn_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gernn_internal (v512 *, v256, vuc);
     XVF64GERNN_INTERNAL mma_xvf64gernn {mma,pair,quad}
 
   void __builtin_mma_xvf64gernp (v512 *, v256, vuc);
     XVF64GERNP nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gernp_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gernp_internal (v512 *, v256, vuc);
     XVF64GERNP_INTERNAL mma_xvf64gernp {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpn (v512 *, v256, vuc);
     XVF64GERPN nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gerpn_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gerpn_internal (v512 *, v256, vuc);
     XVF64GERPN_INTERNAL mma_xvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpp (v512 *, v256, vuc);
     XVF64GERPP nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gerpp_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gerpp_internal (v512 *, v256, vuc);
     XVF64GERPP_INTERNAL mma_xvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_xvi16ger2 (v512 *, vuc, vuc);
     XVI16GER2 nothing {mma}
 
-  void __builtin_mma_xvi16ger2_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2_internal (v512 *, vuc, vuc);
     XVI16GER2_INTERNAL mma_xvi16ger2 {mma}
 
   void __builtin_mma_xvi16ger2pp (v512 *, vuc, vuc);
     XVI16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_xvi16ger2pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2pp_internal (v512 *, vuc, vuc);
     XVI16GER2PP_INTERNAL mma_xvi16ger2pp {mma,quad}
 
   void __builtin_mma_xvi16ger2s (v512 *, vuc, vuc);
     XVI16GER2S nothing {mma}
 
-  void __builtin_mma_xvi16ger2s_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2s_internal (v512 *, vuc, vuc);
     XVI16GER2S_INTERNAL mma_xvi16ger2s {mma}
 
   void __builtin_mma_xvi16ger2spp (v512 *, vuc, vuc);
     XVI16GER2SPP nothing {mma,quad}
 
-  void __builtin_mma_xvi16ger2spp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2spp_internal (v512 *, vuc, vuc);
     XVI16GER2SPP_INTERNAL mma_xvi16ger2spp {mma,quad}
 
   void __builtin_mma_xvi4ger8 (v512 *, vuc, vuc);
     XVI4GER8 nothing {mma}
 
-  void __builtin_mma_xvi4ger8_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi4ger8_internal (v512 *, vuc, vuc);
     XVI4GER8_INTERNAL mma_xvi4ger8 {mma}
 
   void __builtin_mma_xvi4ger8pp (v512 *, vuc, vuc);
     XVI4GER8PP nothing {mma,quad}
 
-  void __builtin_mma_xvi4ger8pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi4ger8pp_internal (v512 *, vuc, vuc);
     XVI4GER8PP_INTERNAL mma_xvi4ger8pp {mma,quad}
 
   void __builtin_mma_xvi8ger4 (v512 *, vuc, vuc);
     XVI8GER4 nothing {mma}
 
-  void __builtin_mma_xvi8ger4_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi8ger4_internal (v512 *, vuc, vuc);
     XVI8GER4_INTERNAL mma_xvi8ger4 {mma}
 
   void __builtin_mma_xvi8ger4pp (v512 *, vuc, vuc);
     XVI8GER4PP nothing {mma,quad}
 
-  void __builtin_mma_xvi8ger4pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi8ger4pp_internal (v512 *, vuc, vuc);
     XVI8GER4PP_INTERNAL mma_xvi8ger4pp {mma,quad}
 
   void __builtin_mma_xvi8ger4spp (v512 *, vuc, vuc);
     XVI8GER4SPP nothing {mma,quad}
 
-  void __builtin_mma_xvi8ger4spp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi8ger4spp_internal (v512 *, vuc, vuc);
     XVI8GER4SPP_INTERNAL mma_xvi8ger4spp {mma,quad}
 
   void __builtin_mma_xxmfacc (v512 *);
     XXMFACC nothing {mma,quad}
 
-  void __builtin_mma_xxmfacc_internal (v512 *);
+  v512 __builtin_mma_xxmfacc_internal (v512 *);
     XXMFACC_INTERNAL mma_xxmfacc {mma,quad}
 
   void __builtin_mma_xxmtacc (v512 *);
     XXMTACC nothing {mma,quad}
 
-  void __builtin_mma_xxmtacc_internal (v512 *);
+  v512 __builtin_mma_xxmtacc_internal (v512 *);
     XXMTACC_INTERNAL mma_xxmtacc {mma,quad}
 
   void __builtin_mma_xxsetaccz (v512 *);
     XXSETACCZ nothing {mma}
 
-  void __builtin_mma_xxsetaccz_internal (v512 *);
+  v512 __builtin_mma_xxsetaccz_internal (v512 *);
     XXSETACCZ_INTERNAL mma_xxsetaccz {mma}


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-09-14 14:00 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-09-14 14:00 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:86fe7b679e624c465a4e744be98ab54b73c3c9cf

commit 86fe7b679e624c465a4e744be98ab54b73c3c9cf
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Sun Sep 13 11:06:36 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-09-13  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.
            * config/rs6000/rs6000-call.c (altivec_init_builtins): Correct
            name of __builtin_altivec_xst_len_r.

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 549 ++++++++++++++++++++++++++-----
 gcc/config/rs6000/rs6000-call.c          |   2 +-
 2 files changed, 476 insertions(+), 75 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index 565c14f9f4c..755399b1843 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -1494,6 +1494,45 @@
   const vf __builtin_vsx_uns_floato_v2di (vull);
     UNS_FLOATO_V2DI unsfloatov2di {}
 
+; I have no idea why we have __builtin_vsx_* duplicates of these when
+; the __builtin_altivec_* counterparts are already present.  Keeping
+; them for compatibility, but...oy.
+  const vsc __builtin_vsx_vperm_16qi (vsc, vsc, vuc);
+    VPERM_16QI_X altivec_vperm_v16qi {}
+
+  const vuc __builtin_vsx_vperm_16qi_uns (vuc, vuc, vuc);
+    VPERM_16QI_UNS_X altivec_vperm_v16qi_uns {}
+
+  const vsq __builtin_vsx_vperm_1ti (vsq, vsq, vuc);
+    VPERM_1TI_X altivec_vperm_v1ti {}
+
+  const vuq __builtin_vsx_vperm_1ti_uns (vuq, vuq, vuc);
+    VPERM_1TI_UNS_X altivec_vperm_v1ti_uns {}
+
+  const vd __builtin_vsx_vperm_2df (vd, vd, vuc);
+    VPERM_2DF_X altivec_vperm_v2df {}
+
+  const vsll __builtin_vsx_vperm_2di (vsll, vsll, vuc);
+    VPERM_2DI_X altivec_vperm_v2di {}
+
+  const vull __builtin_vsx_vperm_2di_uns (vull, vull, vuc);
+    VPERM_2DI_UNS_X altivec_vperm_v2di_uns {}
+
+  const vf __builtin_vsx_vperm_4sf (vf, vf, vuc);
+    VPERM_4SF_X altivec_vperm_v4sf {}
+
+  const vsi __builtin_vsx_vperm_4si (vsi, vsi, vuc);
+    VPERM_4SI_X altivec_vperm_v4si {}
+
+  const vui __builtin_vsx_vperm_4si_uns (vui, vui, vuc);
+    VPERM_4SI_UNS_X altivec_vperm_v4si_uns {}
+
+  const vss __builtin_vsx_vperm_8hi (vss, vss, vuc);
+    VPERM_8HI_X altivec_vperm_v8hi {}
+
+  const vus __builtin_vsx_vperm_8hi_uns (vus, vus, vuc);
+    VPERM_8HI_UNS_X altivec_vperm_v8hi_uns {}
+
   const vsll __builtin_vsx_vsigned_v2df (vd);
     VEC_VSIGNED_V2DF vsx_xvcvdpsxds {}
 
@@ -1617,6 +1656,9 @@
   const vui __builtin_vsx_xvcmpgtsp_p (vf, vf);
     XVCMPGTSP_P vector_gt_v4sf_p {pred}
 
+  const vf __builtin_vsx_xvcvdpsp (vd);
+    XVCVDPSP vsx_xvcvdpsp {}
+
   const vsll __builtin_vsx_xvcvdpsxds (vd);
     XVCVDPSXDS vsx_fix_truncv2dfv2di2 {}
 
@@ -1639,6 +1681,9 @@
   const vull __builtin_vsx_xvcvdpuxws (vd);
     XVCVDPUXWS vsx_xvcvdpuxws {}
 
+  const vd __builtin_vsx_xvcvspdp (vf);
+    XVCVSPDP vsx_xvcvspdp {}
+
   const vsll __builtin_vsx_xvcvspsxds (vf);
     XVCVSPSXDS vsx_xvcvspsxds {}
 
@@ -2687,28 +2732,31 @@
   signed long long __builtin_darn_raw ();
     DARN_RAW darn_raw {}
 
-  const signed int __builtin_dtstsfi_eq_dd (unsigned int, _Decimal64);
+  double __builtin_mffsl ();
+    MFFSL rs6000_mffsl {}
+
+  const signed int __builtin_dfp_dtstsfi_eq_dd (unsigned int, _Decimal64);
     TSTSFI_EQ_DD dfptstsfi_eq_dd {}
 
-  const signed int __builtin_dtstsfi_eq_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_eq_td (unsigned int, _Decimal128);
     TSTSFI_EQ_TD dfptstsfi_eq_td {}
 
-  const signed int __builtin_dtstsfi_gt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dfp_dtstsfi_gt_dd (unsigned int, _Decimal64);
     TSTSFI_GT_DD dfptstsfi_gt_dd {}
 
-  const signed int __builtin_dtstsfi_gt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_gt_td (unsigned int, _Decimal128);
     TSTSFI_GT_TD dfptstsfi_gt_td {}
 
-  const signed int __builtin_dtstsfi_lt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dfp_dtstsfi_lt_dd (unsigned int, _Decimal64);
     TSTSFI_LT_DD dfptstsfi_lt_dd {}
 
-  const signed int __builtin_dtstsfi_lt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_lt_td (unsigned int, _Decimal128);
     TSTSFI_LT_TD dfptstsfi_lt_td {}
 
-  const signed int __builtin_dtstsfi_ov_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dfp_dtstsfi_ov_dd (unsigned int, _Decimal64);
     TSTSFI_OV_DD dfptstsfi_unordered_dd {}
 
-  const signed int __builtin_dtstsfi_ov_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_ov_td (unsigned int, _Decimal128);
     TSTSFI_OV_TD dfptstsfi_unordered_td {}
 
 
@@ -2720,10 +2768,10 @@
 ; 32 bits, and the return value is DImode, so it seems that
 ; TARGET_64BIT (actually TARGET_POWERPC64) is justified.  TBD. ####
 [power9-64]
-; The following two are inexplicably named __builtin_{alti,}vec_* while
+; The following two are inexplicably named __builtin_altivec_* while
 ; their load counterparts are __builtin_vsx_*.  Need to deprecate
 ; these interfaces in favor of the other naming scheme (or vice versa).
-  void __builtin_vec_xst_len_r (vop, void *, unsigned long long);
+  void __builtin_altivec_xst_len_r (vop, void *, unsigned long long);
     XST_LEN_R xst_len_r {}
 
   void __builtin_altivec_stxvl (vop, void *, unsigned long long);
@@ -3063,12 +3111,114 @@
   const unsigned long long __builtin_altivec_vgnb (vuq, const int <2,7>);
     VGNB vgnb {}
 
+  const vuc __builtin_altivec_vinsgubvlx (unsigned char, vuc, unsigned int);
+    VINSERTGPRBL vinsertgl_v16qi {}
+
+  const vuc __builtin_altivec_vinsgubvrx (unsigned char, vuc, unsigned int);
+    VINSERTGPRBR vinsertgr_v16qi {}
+
+  const vull __builtin_altivec_vinsgudvlx (unsigned long long, vull, unsigned int);
+    VINSERTGPRDL vinsertgl_v2di {}
+
+  const vull __builtin_altivec_vinsgudvrx (unsigned long long, vull, unsigned int);
+    VINSERTGPRDR vinsertgr_v2di {}
+
+  const vus __builtin_altivec_vinsguhvlx (unsigned short, vus, unsigned int);
+    VINSERTGPRHL vinsertgl_v8hi {}
+
+  const vus __builtin_altivec_vinsguhvrx (unsigned short, vus, unsigned int);
+    VINSERTGPRHR vinsertgr_v8hi {}
+
+  const vui __builtin_altivec_vinsguwvlx (unsigned int, vui, unsigned int);
+    VINSERTGPRWL vinsertgl_v4si {}
+
+  const vui __builtin_altivec_vinsguwvrx (unsigned int, vui, unsigned int);
+    VINSERTGPRWR vinsertgr_v4si {}
+
+  const vuc __builtin_altivec_vinsvubvlx (vuc, vuc, unsigned int);
+    VINSERTVPRBL vinsertvl_v16qi {}
+
+  const vuc __builtin_altivec_vinsvubvrx (vuc, vuc, unsigned int);
+    VINSERTVPRBR vinsertvr_v16qi {}
+
+  const vus __builtin_altivec_vinsvuhvlx (vus, vus, unsigned int);
+    VINSERTVPRHL vinsertvl_v8hi {}
+
+  const vus __builtin_altivec_vinsvuhvrx (vus, vus, unsigned int);
+    VINSERTVPRHR vinsertvr_v8hi {}
+
+  const vui __builtin_altivec_vinsvuwvlx (vui, vui, unsigned int);
+    VINSERTVPRWL vinsertvl_v4si {}
+
+  const vui __builtin_altivec_vinsvuwvrx (vui, vui, unsigned int);
+    VINSERTVPRWR vinsertvr_v4si {}
+
   const vuc __builtin_altivec_vpdepd (vuc, vuc);
     VPDEPD vpdepd {}
 
   const vuc __builtin_altivec_vpextd (vuc, vuc);
     VPEXTD vpextd {}
 
+  const vull __builtin_altivec_vreplace_un_uv2di (vull, unsigned long long, const int<4>);
+    VREPLACE_UN_UV2DI vreplace_un_v2di {}
+
+  const vui __builtin_altivec_vreplace_un_uv4si (vui, unsigned int, const int<4>);
+    VREPLACE_UN_UV4SI vreplace_un_v4si {}
+
+  const vd __builtin_altivec_vreplace_un_v2df (vd, double, const int<4>);
+    VREPLACE_UN_V2DF vreplace_un_v2df {}
+
+  const vsll __builtin_altivec_vreplace_un_v2di (vsll, unsigned long long, const int<4>);
+    VREPLACE_UN_V2DI vreplace_un_v2di {}
+
+  const vf __builtin_altivec_vreplace_un_v4sf (vf, float, const int<4>);
+    VREPLACE_UN_V4SF vreplace_un_v4sf {}
+
+  const vsi __builtin_altivec_vreplace_un_v4si (vsi, signed int, const int<4>);
+    VREPLACE_UN_V4SI vreplace_un_v4si {}
+
+  const vull __builtin_altivec_vreplace_uv2di (vull, unsigned long long, const int<1>);
+    VREPLACE_ELT_UV2DI vreplace_elt_v2di {}
+
+  const vui __builtin_altivec_vreplace_uv4si (vui, unsigned int, const int<2>);
+    VREPLACE_ELT_UV4SI vreplace_elt_v4si {}
+
+  const vd __builtin_altivec_vreplace_v2df (vd, double, const int<1>);
+    VREPLACE_ELT_V2DF vreplace_elt_v2df {}
+
+  const vsll __builtin_altivec_vreplace_v2di (vsll, unsigned long long, const int<1>);
+    VREPLACE_ELT_V2DI vreplace_elt_v2di {}
+
+  const vf __builtin_altivec_vreplace_v4sf (vf, float, const int<2>);
+    VREPLACE_ELT_V4SF vreplace_elt_v4sf {}
+
+  const vsi __builtin_altivec_vreplace_v4si (vsi, signed int, const int<2>);
+    VREPLACE_ELT_V4SI vreplace_elt_v4si {}
+
+  const vsc __builtin_altivec_vsldb_v16qi (vsc, vsc, const int<3>);
+    VSLDB_V16QI vsldb_v16qi {}
+
+  const vsll __builtin_altivec_vsldb_v2di (vsll, vsll, const int<3>);
+    VSLDB_V2DI vsldb_v2di {}
+
+  const vsi __builtin_altivec_vsldb_v4si (vsi, vsi, const int<3>);
+    VSLDB_V4SI vsldb_v4si {}
+
+  const vss __builtin_altivec_vsldb_v8hi (vss, vss, const int<3>);
+    VSLDB_V8HI vsldb_v8hi {}
+
+  const vsc __builtin_altivec_vsrdb_v16qi (vsc, vsc, const int<3>);
+    VSRDB_V16QI vsrdb_v16qi {}
+
+  const vsll __builtin_altivec_vsrdb_v2di (vsll, vsll, const int<3>);
+    VSRDB_V2DI vsrdb_v2di {}
+
+  const vsi __builtin_altivec_vsrdb_v4si (vsi, vsi, const int<3>);
+    VSRDB_V4SI vsrdb_v4si {}
+
+  const vss __builtin_altivec_vsrdb_v8hi (vss, vss, const int<3>);
+    VSRDB_V8HI vsrdb_v8hi {}
+
   const vuc __builtin_altivec_vstribl (vuc);
     VSTRIBL vstril_v16qi {}
 
@@ -3093,6 +3243,51 @@
   const signed int __builtin_altivec_vstrihr_p (vus);
     VSTRIHR_P vstrir_p_v8hi {}
 
+  const signed int __builtin_vsx_xvtlsbb_all_ones (vuc);
+    XVTLSBB_ONES xvtlsbbo {}
+
+  const signed int __builtin_vsx_xvtlsbb_all_zeros (vuc);
+    XVTLSBB_ZEROS xvtlsbbz {}
+
+  const vf __builtin_vsx_vxxsplti32dx_v4sf (vf, const int<1>, float);
+    VXXSPLTI32DX_V4SF xxsplti32dx_v4sf {}
+
+  const vsi __builtin_vsx_vxxsplti32dx_v4si (vsi, const int<1>, signed int);
+    VXXSPLTI32DX_V4SI xxsplti32dx_v4si {}
+
+  const vd __builtin_vsx_vxxspltidp (float);
+    VXXSPLTIDP xxspltidp_v2df {}
+
+  const vf __builtin_vsx_vxxspltiw_v4sf (float);
+    VXXSPLTIW_V4SF xxspltiw_v4sf {}
+
+  const vsi __builtin_vsx_vxxspltiw_v4si (signed int);
+    VXXSPLTIW_V4SI xxspltiw_v4si {}
+
+  const vuc __builtin_vsx_xvcvbf16spn (vuc);
+    XVCVBF16SPN vsx_xvcvbf16spn {}
+
+  const vuc __builtin_vsx_xvcvspbf16 (vuc);
+    XVCVSPBF16 vsx_xvcvspbf16 {}
+
+  const vsc __builtin_vsx_xxblend_v16qi (vsc, vsc, vuc);
+    VXXBLEND_V16QI xxblend_v16qi {}
+
+  const vd __builtin_vsx_xxblend_v2df (vd, vd, vull);
+    VXXBLEND_V2DF xxblend_v2df {}
+
+  const vsll __builtin_vsx_xxblend_v2di (vsll, vsll, vull);
+    VXXBLEND_V2DI xxblend_v2di {}
+
+  const vf __builtin_vsx_xxblend_v4sf (vf, vf, vui);
+    VXXBLEND_V4SF xxblend_v4sf {}
+
+  const vsi __builtin_vsx_xxblend_v4si (vsi, vsi, vui);
+    VXXBLEND_V4SI xxblend_v4si {}
+
+  const vss __builtin_vsx_xxblend_v8hi (vss, vss, vus);
+    VXXBLEND_V8HI xxblend_v8hi {}
+
   const vop __builtin_vsx_xxeval (vop, vop, vop, const int <8>);
     XXEVAL xxeval {}
 
@@ -3108,11 +3303,37 @@
   const vus __builtin_vsx_xxgenpcvm_v8hi (vus, const int <2>);
     XXGENPCVM_V8HI xxgenpcvm_v8hi {}
 
-  const vuc __builtin_vsx_xvcvbf16spn (vuc);
-    XVCVBF16SPN vsx_xvcvbf16spn {}
+; TODO: This was quite hackish in the original code, and we may need to add
+; mode-specific expansions rather than using CODE_FOR_xxpermx throughout.
+  const vuc __builtin_vsx_xxpermx_uv16qi (vuc, vuc, vuc, const int<3>);
+    XXPERMX_UV16QI xxpermx {}
 
-  const vuc __builtin_vsx_xvcvspbf16 (vuc);
-    XVCVSPBF16 vsx_xvcvspbf16 {}
+  const vull __builtin_vsx_xxpermx_uv2di (vull, vull, vuc, const int<3>);
+    XXPERMX_UV2DI xxpermx {}
+
+  const vui __builtin_vsx_xxpermx_uv4si (vui, vui, vuc, const int<3>);
+    XXPERMX_UV4SI xxpermx {}
+
+  const vus __builtin_vsx_xxpermx_uv8hi (vus, vus, vuc, const int<3>);
+    XXPERMX_UV8HI xxpermx {}
+
+  const vsc __builtin_vsx_xxpermx_v16qi (vsc, vsc, vuc, const int<3>);
+    XXPERMX_V16QI xxpermx {}
+
+  const vd __builtin_vsx_xxpermx_v2df (vd, vd, vuc, const int<3>);
+    XXPERMX_V2DF xxpermx {}
+
+  const vsll __builtin_vsx_xxpermx_v2di (vsll, vsll, vuc, const int<3>);
+    XXPERMX_V2DI xxpermx {}
+
+  const vf __builtin_vsx_xxpermx_v4sf (vf, vf, vuc, const int<3>);
+    XXPERMX_V4SF xxpermx {}
+
+  const vsi __builtin_vsx_xxpermx_v4si (vsi, vsi, vuc, const int<3>);
+    XXPERMX_V4SI xxpermx {}
+
+  const vss __builtin_vsx_xxpermx_v8hi (vss, vss, vuc, const int<3>);
+    XXPERMX_V8HI xxpermx {}
 
 
 [power10-64]
@@ -3166,184 +3387,364 @@
     PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma}
 
   void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2NN mma_pmxvbf16ger2nn {mma,quad}
+    PMXVBF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2NN_INTERNAL mma_pmxvbf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2NP mma_pmxvbf16ger2np {mma,quad}
+    PMXVBF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2NP_INTERNAL mma_pmxvbf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2PN mma_pmxvbf16ger2pn {mma,quad}
+    PMXVBF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2PN_INTERNAL mma_pmxvbf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2PP mma_pmxvbf16ger2pp {mma,quad}
+    PMXVBF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2PP_INTERNAL mma_pmxvbf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2 mma_pmxvf16ger2 {mma}
+    PMXVF16GER2 nothing {mma}
+
+  void __builtin_mma_pmxvf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2_INTERNAL mma_pmxvf16ger2 {mma}
 
   void __builtin_mma_pmxvf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2NN mma_pmxvf16ger2nn {mma,quad}
+    PMXVF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2NN_INTERNAL mma_pmxvf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2NP mma_pmxvf16ger2np {mma,quad}
+    PMXVF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2NP_INTERNAL mma_pmxvf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2PN mma_pmxvf16ger2pn {mma,quad}
+    PMXVF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2PN_INTERNAL mma_pmxvf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2PP mma_pmxvf16ger2pp {mma,quad}
+    PMXVF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2PP_INTERNAL mma_pmxvf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf32ger (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GER mma_pmxvf32ger {mma}
+    PMXVF32GER nothing {mma}
+
+  void __builtin_mma_pmxvf32ger_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GER_INTERNAL mma_pmxvf32ger {mma}
 
   void __builtin_mma_pmxvf32gernn (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERNN mma_pmxvf32gernn {mma,quad}
+    PMXVF32GERNN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gernn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERNN_INTERNAL mma_pmxvf32gernn {mma,quad}
 
   void __builtin_mma_pmxvf32gernp (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERNP mma_pmxvf32gernp {mma,quad}
+    PMXVF32GERNP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gernp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERNP_INTERNAL mma_pmxvf32gernp {mma,quad}
 
   void __builtin_mma_pmxvf32gerpn (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERPN mma_pmxvf32gerpn {mma,quad}
+    PMXVF32GERPN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gerpn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERPN_INTERNAL mma_pmxvf32gerpn {mma,quad}
 
   void __builtin_mma_pmxvf32gerpp (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERPP mma_pmxvf32gerpp {mma,quad}
+    PMXVF32GERPP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gerpp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERPP_INTERNAL mma_pmxvf32gerpp {mma,quad}
 
   void __builtin_mma_pmxvf64ger (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GER mma_pmxvf64ger {mma,pair}
+    PMXVF64GER nothing {mma,pair}
+
+  void __builtin_mma_pmxvf64ger_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GER_INTERNAL mma_pmxvf64ger {mma,pair}
 
   void __builtin_mma_pmxvf64gernn (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERNN mma_pmxvf64gernn {mma,pair,quad}
+    PMXVF64GERNN nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gernn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERNN_INTERNAL mma_pmxvf64gernn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gernp (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERNP mma_pmxvf64gernp {mma,pair,quad}
+    PMXVF64GERNP nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gernp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERNP_INTERNAL mma_pmxvf64gernp {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpn (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERPN mma_pmxvf64gerpn {mma,pair,quad}
+    PMXVF64GERPN nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gerpn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERPN_INTERNAL mma_pmxvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpp (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERPP mma_pmxvf64gerpp {mma,pair,quad}
+    PMXVF64GERPP nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gerpp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERPP_INTERNAL mma_pmxvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_pmxvi16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2 mma_pmxvi16ger2 {mma}
+    PMXVI16GER2 nothing {mma}
+
+  void __builtin_mma_pmxvi16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2_INTERNAL mma_pmxvi16ger2 {mma}
 
   void __builtin_mma_pmxvi16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2PP mma_pmxvi16ger2pp {mma,quad}
+    PMXVI16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2PP_INTERNAL mma_pmxvi16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvi16ger2s (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2S mma_pmxvi16ger2s {mma}
+    PMXVI16GER2S nothing {mma}
+
+  void __builtin_mma_pmxvi16ger2s_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2S_INTERNAL mma_pmxvi16ger2s {mma}
 
   void __builtin_mma_pmxvi16ger2spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2SPP mma_pmxvi16ger2spp {mma,quad}
+    PMXVI16GER2SPP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi16ger2spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2SPP_INTERNAL mma_pmxvi16ger2spp {mma,quad}
 
   void __builtin_mma_pmxvi4ger8 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
-    PMXVI4GER8 mma_pmxvi4ger8 {mma}
+    PMXVI4GER8 nothing {mma}
+
+  void __builtin_mma_pmxvi4ger8_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
+    PMXVI4GER8_INTERNAL mma_pmxvi4ger8 {mma}
 
   void __builtin_mma_pmxvi4ger8pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI4GER8PP mma_pmxvi4ger8pp {mma,quad}
+    PMXVI4GER8PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi4ger8pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI4GER8PP_INTERNAL mma_pmxvi4ger8pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI8GER4 mma_pmxvi8ger4 {mma}
+    PMXVI8GER4 nothing {mma}
+
+  void __builtin_mma_pmxvi8ger4_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI8GER4_INTERNAL mma_pmxvi8ger4 {mma}
 
   void __builtin_mma_pmxvi8ger4pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI8GER4PP mma_pmxvi8ger4pp {mma,quad}
+    PMXVI8GER4PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi8ger4pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI8GER4PP_INTERNAL mma_pmxvi8ger4pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI8GER4SPP mma_pmxvi8ger4spp {mma,quad}
+    PMXVI8GER4SPP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi8ger4spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI8GER4SPP_INTERNAL mma_pmxvi8ger4spp {mma,quad}
 
   void __builtin_mma_xvbf16ger2 (v512 *, vuc, vuc);
-    XVBF16GER2 mma_xvbf16ger2 {mma}
+    XVBF16GER2 nothing {mma}
+
+  void __builtin_mma_xvbf16ger2_internal (v512 *, vuc, vuc);
+    XVBF16GER2_INTERNAL mma_xvbf16ger2 {mma}
 
   void __builtin_mma_xvbf16ger2nn (v512 *, vuc, vuc);
-    XVBF16GER2NN mma_xvbf16ger2nn {mma,quad}
+    XVBF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2nn_internal (v512 *, vuc, vuc);
+    XVBF16GER2NN_INTERNAL mma_xvbf16ger2nn {mma,quad}
 
   void __builtin_mma_xvbf16ger2np (v512 *, vuc, vuc);
-    XVBF16GER2NP mma_xvbf16ger2np {mma,quad}
+    XVBF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2np_internal (v512 *, vuc, vuc);
+    XVBF16GER2NP_INTERNAL mma_xvbf16ger2np {mma,quad}
 
   void __builtin_mma_xvbf16ger2pn (v512 *, vuc, vuc);
-    XVBF16GER2PN mma_xvbf16ger2pn {mma,quad}
+    XVBF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2pn_internal (v512 *, vuc, vuc);
+    XVBF16GER2PN_INTERNAL mma_xvbf16ger2pn {mma,quad}
 
   void __builtin_mma_xvbf16ger2pp (v512 *, vuc, vuc);
-    XVBF16GER2PP mma_xvbf16ger2pp {mma,quad}
+    XVBF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2pp_internal (v512 *, vuc, vuc);
+    XVBF16GER2PP_INTERNAL mma_xvbf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf16ger2 (v512 *, vuc, vuc);
-    XVF16GER2 mma_xvf16ger2 {mma}
+    XVF16GER2 nothing {mma}
+
+  void __builtin_mma_xvf16ger2_internal (v512 *, vuc, vuc);
+    XVF16GER2_INTERNAL mma_xvf16ger2 {mma}
 
   void __builtin_mma_xvf16ger2nn (v512 *, vuc, vuc);
-    XVF16GER2NN mma_xvf16ger2nn {mma,quad}
+    XVF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2nn_internal (v512 *, vuc, vuc);
+    XVF16GER2NN_INTERNAL mma_xvf16ger2nn {mma,quad}
 
   void __builtin_mma_xvf16ger2np (v512 *, vuc, vuc);
-    XVF16GER2NP mma_xvf16ger2np {mma,quad}
+    XVF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2np_internal (v512 *, vuc, vuc);
+    XVF16GER2NP_INTERNAL mma_xvf16ger2np {mma,quad}
 
   void __builtin_mma_xvf16ger2pn (v512 *, vuc, vuc);
-    XVF16GER2PN mma_xvf16ger2pn {mma,quad}
+    XVF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2pn_internal (v512 *, vuc, vuc);
+    XVF16GER2PN_INTERNAL mma_xvf16ger2pn {mma,quad}
 
   void __builtin_mma_xvf16ger2pp (v512 *, vuc, vuc);
-    XVF16GER2PP mma_xvf16ger2pp {mma,quad}
+    XVF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2pp_internal (v512 *, vuc, vuc);
+    XVF16GER2PP_INTERNAL mma_xvf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf32ger (v512 *, vuc, vuc);
-    XVF32GER mma_xvf32ger {mma}
+    XVF32GER nothing {mma}
+
+  void __builtin_mma_xvf32ger_internal (v512 *, vuc, vuc);
+    XVF32GER_INTERNAL mma_xvf32ger {mma}
 
   void __builtin_mma_xvf32gernn (v512 *, vuc, vuc);
-    XVF32GERNN mma_xvf32gernn {mma,quad}
+    XVF32GERNN nothing {mma,quad}
+
+  void __builtin_mma_xvf32gernn_internal (v512 *, vuc, vuc);
+    XVF32GERNN_INTERNAL mma_xvf32gernn {mma,quad}
 
   void __builtin_mma_xvf32gernp (v512 *, vuc, vuc);
-    XVF32GERNP mma_xvf32gernp {mma,quad}
+    XVF32GERNP nothing {mma,quad}
+
+  void __builtin_mma_xvf32gernp_internal (v512 *, vuc, vuc);
+    XVF32GERNP_INTERNAL mma_xvf32gernp {mma,quad}
 
   void __builtin_mma_xvf32gerpn (v512 *, vuc, vuc);
-    XVF32GERPN mma_xvf32gerpn {mma,quad}
+    XVF32GERPN nothing {mma,quad}
+
+  void __builtin_mma_xvf32gerpn_internal (v512 *, vuc, vuc);
+    XVF32GERPN_INTERNAL mma_xvf32gerpn {mma,quad}
 
   void __builtin_mma_xvf32gerpp (v512 *, vuc, vuc);
-    XVF32GERPP mma_xvf32gerpp {mma,quad}
+    XVF32GERPP nothing {mma,quad}
+
+  void __builtin_mma_xvf32gerpp_internal (v512 *, vuc, vuc);
+    XVF32GERPP_INTERNAL mma_xvf32gerpp {mma,quad}
 
   void __builtin_mma_xvf64ger (v512 *, v256, vuc);
-    XVF64GER mma_xvf64ger {mma,pair}
+    XVF64GER nothing {mma,pair}
+
+  void __builtin_mma_xvf64ger_internal (v512 *, v256, vuc);
+    XVF64GER_INTERNAL mma_xvf64ger {mma,pair}
 
   void __builtin_mma_xvf64gernn (v512 *, v256, vuc);
-    XVF64GERNN mma_xvf64gernn {mma,pair,quad}
+    XVF64GERNN nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gernn_internal (v512 *, v256, vuc);
+    XVF64GERNN_INTERNAL mma_xvf64gernn {mma,pair,quad}
 
   void __builtin_mma_xvf64gernp (v512 *, v256, vuc);
-    XVF64GERNP mma_xvf64gernp {mma,pair,quad}
+    XVF64GERNP nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gernp_internal (v512 *, v256, vuc);
+    XVF64GERNP_INTERNAL mma_xvf64gernp {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpn (v512 *, v256, vuc);
-    XVF64GERPN mma_xvf64gerpn {mma,pair,quad}
+    XVF64GERPN nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gerpn_internal (v512 *, v256, vuc);
+    XVF64GERPN_INTERNAL mma_xvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpp (v512 *, v256, vuc);
-    XVF64GERPP mma_xvf64gerpp {mma,pair,quad}
+    XVF64GERPP nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gerpp_internal (v512 *, v256, vuc);
+    XVF64GERPP_INTERNAL mma_xvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_xvi16ger2 (v512 *, vuc, vuc);
-    XVI16GER2 mma_xvi16ger2 {mma}
+    XVI16GER2 nothing {mma}
+
+  void __builtin_mma_xvi16ger2_internal (v512 *, vuc, vuc);
+    XVI16GER2_INTERNAL mma_xvi16ger2 {mma}
 
   void __builtin_mma_xvi16ger2pp (v512 *, vuc, vuc);
-    XVI16GER2PP mma_xvi16ger2pp {mma,quad}
+    XVI16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_xvi16ger2pp_internal (v512 *, vuc, vuc);
+    XVI16GER2PP_INTERNAL mma_xvi16ger2pp {mma,quad}
 
   void __builtin_mma_xvi16ger2s (v512 *, vuc, vuc);
-    XVI16GER2S mma_xvi16ger2s {mma}
+    XVI16GER2S nothing {mma}
+
+  void __builtin_mma_xvi16ger2s_internal (v512 *, vuc, vuc);
+    XVI16GER2S_INTERNAL mma_xvi16ger2s {mma}
 
   void __builtin_mma_xvi16ger2spp (v512 *, vuc, vuc);
-    XVI16GER2SPP mma_xvi16ger2spp {mma,quad}
+    XVI16GER2SPP nothing {mma,quad}
+
+  void __builtin_mma_xvi16ger2spp_internal (v512 *, vuc, vuc);
+    XVI16GER2SPP_INTERNAL mma_xvi16ger2spp {mma,quad}
 
   void __builtin_mma_xvi4ger8 (v512 *, vuc, vuc);
-    XVI4GER8 mma_xvi4ger8 {mma}
+    XVI4GER8 nothing {mma}
+
+  void __builtin_mma_xvi4ger8_internal (v512 *, vuc, vuc);
+    XVI4GER8_INTERNAL mma_xvi4ger8 {mma}
 
   void __builtin_mma_xvi4ger8pp (v512 *, vuc, vuc);
-    XVI4GER8PP mma_xvi4ger8pp {mma,quad}
+    XVI4GER8PP nothing {mma,quad}
+
+  void __builtin_mma_xvi4ger8pp_internal (v512 *, vuc, vuc);
+    XVI4GER8PP_INTERNAL mma_xvi4ger8pp {mma,quad}
 
   void __builtin_mma_xvi8ger4 (v512 *, vuc, vuc);
-    XVI8GER4 mma_xvi8ger4 {mma}
+    XVI8GER4 nothing {mma}
+
+  void __builtin_mma_xvi8ger4_internal (v512 *, vuc, vuc);
+    XVI8GER4_INTERNAL mma_xvi8ger4 {mma}
 
   void __builtin_mma_xvi8ger4pp (v512 *, vuc, vuc);
-    XVI8GER4PP mma_xvi8ger4pp {mma,quad}
+    XVI8GER4PP nothing {mma,quad}
+
+  void __builtin_mma_xvi8ger4pp_internal (v512 *, vuc, vuc);
+    XVI8GER4PP_INTERNAL mma_xvi8ger4pp {mma,quad}
 
   void __builtin_mma_xvi8ger4spp (v512 *, vuc, vuc);
-    XVI8GER4SPP mma_xvi8ger4spp {mma,quad}
+    XVI8GER4SPP nothing {mma,quad}
+
+  void __builtin_mma_xvi8ger4spp_internal (v512 *, vuc, vuc);
+    XVI8GER4SPP_INTERNAL mma_xvi8ger4spp {mma,quad}
 
   void __builtin_mma_xxmfacc (v512 *);
-    XXMFACC mma_xxmfacc {mma,quad}
+    XXMFACC nothing {mma,quad}
+
+  void __builtin_mma_xxmfacc_internal (v512 *);
+    XXMFACC_INTERNAL mma_xxmfacc {mma,quad}
 
   void __builtin_mma_xxmtacc (v512 *);
-    XXMTACC mma_xxmtacc {mma,quad}
+    XXMTACC nothing {mma,quad}
+
+  void __builtin_mma_xxmtacc_internal (v512 *);
+    XXMTACC_INTERNAL mma_xxmtacc {mma,quad}
 
   void __builtin_mma_xxsetaccz (v512 *);
-    XXSETACCZ mma_xxsetaccz {mma}
+    XXSETACCZ nothing {mma}
+
+  void __builtin_mma_xxsetaccz_internal (v512 *);
+    XXSETACCZ_INTERNAL mma_xxsetaccz {mma}
 
 
 
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index c7057005a18..9600801b838 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -14598,7 +14598,7 @@ altivec_init_builtins (void)
     {
       def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
 		   P9V_BUILTIN_STXVL);
-      def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long,
+      def_builtin ("__builtin_altivec_xst_len_r", void_ftype_v16qi_pvoid_long,
 		   P9V_BUILTIN_XST_LEN_R);
     }


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-09-14 14:00 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-09-14 14:00 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:0254a26b151450e2ba93debef2589dd8a3263f2b

commit 0254a26b151450e2ba93debef2589dd8a3263f2b
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Sun Aug 30 10:28:28 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-08-30  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 127 ++++++++++++++++++-------------
 1 file changed, 72 insertions(+), 55 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index c0f8821be64..565c14f9f4c 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -321,9 +321,7 @@
   pure vuc __builtin_altivec_lvsr (signed long long, void *);
     LVSR altivec_lvsr {ldvec}
 
-; Following LVX one is redundant, and I don't think we need to
-; keep it.  It only maps to LVX_V4SI.  Probably remove.
-  pure vop __builtin_altivec_lvx (signed long long, void *);
+  pure vsi __builtin_altivec_lvx (signed long long, void *);
     LVX altivec_lvx_v4si {ldvec}
 
   pure vsc __builtin_altivec_lvx_v16qi (signed long long, void *);
@@ -395,7 +393,8 @@
   void __builtin_altivec_stvrxl (vop, signed long long, void *);
     STVRXL altivec_stvrxl {stvec}
 
-; Skipping the STVX one that maps to STVX_V4SI (see above for LVX)
+  void __builtin_altivec_stvx (vsi, signed long long, void *);
+    STVX altivec_stvx_v4si {stvec}
 
   void __builtin_altivec_stvx_v16qi (vsc, signed long long, void *);
     STVX_V16QI altivec_stvx_v16qi {stvec}
@@ -409,7 +408,8 @@
   void __builtin_altivec_stvx_v8hi (vss, signed long long, void *);
     STVX_V8HI altivec_stvx_v8hi {stvec}
 
-; Skipping the STVXL one that maps to STVXL_V4SI (see above for LVX)
+  void __builtin_altivec_stvxl (vsi, signed long long, void *);
+    STVXL altivec_stvxl_v4si {stvec}
 
   void __builtin_altivec_stvxl_v16qi (vsc, signed long long, void *);
     STVXL_V16QI altivec_stvxl_v16qi {stvec}
@@ -1206,6 +1206,33 @@
   const vull __builtin_altivec_vxor_v2di_uns (vull, vull);
     VXOR_V2DI_UNS xorv2di3 {}
 
+  const signed __int128 __builtin_vec_ext_v1ti (vsq, signed int);
+    VEC_EXT_V1TI nothing {extract}
+
+  const double __builtin_vec_ext_v2df (vd, signed int);
+    VEC_EXT_V2DF nothing {extract}
+
+  const signed long long __builtin_vec_ext_v2di (vsll, signed int);
+    VEC_EXT_V2DI nothing {extract}
+
+  const vsq __builtin_vec_init_v1ti (signed __int128);
+    VEC_INIT_V1TI nothing {init}
+
+  const vd __builtin_vec_init_v2df (double, double);
+    VEC_INIT_V2DF nothing {init}
+
+  const vsll __builtin_vec_init_v2di (signed long long, signed long long);
+    VEC_INIT_V2DI nothing {init}
+
+  const vsq __builtin_vec_set_v1ti (vsq, signed __int128, const int<0,0>);
+    VEC_SET_V1TI nothing {set}
+
+  const vd __builtin_vec_set_v2df (vd, double, const int<1>);
+    VEC_SET_V2DF nothing {set}
+
+  const vsll __builtin_vec_set_v2di (vsll, signed long long, const int<1>);
+    VEC_SET_V2DI nothing {set}
+
   const vbc __builtin_vsx_cmpge_16qi (vsc, vsc);
     CMPGE_16QI vector_nltv16qi {}
 
@@ -1348,7 +1375,7 @@
   pure vsll __builtin_vsx_lxvd2x_v2di (signed long long, void *);
     LXVD2X_V2DI vsx_load_v2di {ldvec}
 
-  pure vsc __builtin_vsx_lxvw4x_16qi (signed long long, void *);
+  pure vsc __builtin_vsx_lxvw4x_v16qi (signed long long, void *);
     LXVW4X_V16QI vsx_load_v16qi {ldvec}
 
   pure vf __builtin_vsx_lxvw4x_v4sf (signed long long, void *);
@@ -1467,33 +1494,6 @@
   const vf __builtin_vsx_uns_floato_v2di (vull);
     UNS_FLOATO_V2DI unsfloatov2di {}
 
-  const signed __int128 __builtin_vsx_vec_ext_v1ti (vsq, signed int);
-    VEC_EXT_V1TI nothing {extract}
-
-  const double __builtin_vsx_vec_ext_v2df (vd, signed int);
-    VEC_EXT_V2DF nothing {extract}
-
-  const signed long long __builtin_vsx_vec_ext_v2di (vsll, signed int);
-    VEC_EXT_V2DI nothing {extract}
-
-  const vsq __builtin_vsx_vec_init_v1ti (signed __int128);
-    VEC_INIT_V1TI nothing {init}
-
-  const vd __builtin_vsx_vec_init_v2df (double, double);
-    VEC_INIT_V2DF nothing {init}
-
-  const vsll __builtin_vsx_vec_init_v2di (signed long long, signed long long);
-    VEC_INIT_V2DI nothing {init}
-
-  const vsq __builtin_vsx_vec_set_v1ti (vsq, signed __int128, const int<0,0>);
-    VEC_SET_V1TI nothing {set}
-
-  const vd __builtin_vsx_vec_set_v2df (vd, double, const int<1>);
-    VEC_SET_V2DF nothing {set}
-
-  const vsll __builtin_vsx_vec_set_v2di (vsll, signed long long, const int<1>);
-    VEC_SET_V2DI nothing {set}
-
   const vsll __builtin_vsx_vsigned_v2df (vd);
     VEC_VSIGNED_V2DF vsx_xvcvdpsxds {}
 
@@ -2753,24 +2753,18 @@
 
 ; Builtins requiring hardware support for IEEE-128 floating-point.
 [ieee128-hw]
-  fpmath _Float128 __builtin_vsx_addf128_round_to_odd (_Float128, _Float128);
+  fpmath _Float128 __builtin_addf128_round_to_odd (_Float128, _Float128);
     ADDF128_ODD addkf3_odd {}
 
-  fpmath _Float128 __builtin_vsx_divf128_round_to_odd (_Float128, _Float128);
+  fpmath _Float128 __builtin_divf128_round_to_odd (_Float128, _Float128);
     DIVF128_ODD divkf3_odd {}
 
-  fpmath _Float128 __builtin_vsx_fmaf128_round_to_odd (_Float128, _Float128, _Float128);
+  fpmath _Float128 __builtin_fmaf128_round_to_odd (_Float128, _Float128, _Float128);
     FMAF128_ODD fmakf4_odd {}
 
-  fpmath _Float128 __builtin_vsx_mulf128_round_to_odd (_Float128, _Float128);
+  fpmath _Float128 __builtin_mulf128_round_to_odd (_Float128, _Float128);
     MULF128_ODD mulkf3_odd {}
 
-  const unsigned long long __builtin_vsx_scalar_extract_expq (_Float128);
-    VSEEQP xsxexpqp_kf {}
-
-  const unsigned __int128 __builtin_vsx_scalar_extract_sigq (_Float128);
-    VSESQP xsxsigqp_kf {}
-
   const signed int __builtin_vsx_scalar_cmp_exp_qp_eq (_Float128, _Float128);
     VSCEQPEQ xscmpexpqp_eq_kf {}
 
@@ -2783,6 +2777,21 @@
   const signed int __builtin_vsx_scalar_cmp_exp_qp_unordered (_Float128, _Float128);
     VSCEQPUO xscmpexpqp_unordered_kf {}
 
+  fpmath _Float128 __builtin_sqrtf128_round_to_odd (_Float128);
+    SQRTF128_ODD sqrtkf2_odd {}
+
+  fpmath _Float128 __builtin_subf128_round_to_odd (_Float128, _Float128);
+    SUBF128_ODD subkf3_odd {}
+
+  fpmath double __builtin_truncf128_round_to_odd (_Float128);
+    TRUNCF128_ODD trunckfdf2_odd {}
+
+  const unsigned long long __builtin_vsx_scalar_extract_expq (_Float128);
+    VSEEQP xsxexpqp_kf {}
+
+  const unsigned __int128 __builtin_vsx_scalar_extract_sigq (_Float128);
+    VSESQP xsxsigqp_kf {}
+
   const _Float128 __builtin_vsx_scalar_insert_exp_q (unsigned __int128, unsigned long long);
     VSIEQP xsiexpqp_kf {}
 
@@ -2795,15 +2804,6 @@
   const unsigned int __builtin_vsx_scalar_test_neg_qp (_Float128);
     VSTDCNQP xststdcnegqp_kf {}
 
-  fpmath _Float128 __builtin_vsx_sqrtf128_round_to_odd (_Float128);
-    SQRTF128_ODD sqrtkf2_odd {}
-
-  fpmath _Float128 __builtin_vsx_subf128_round_to_odd (_Float128, _Float128);
-    SUBF128_ODD subkf3_odd {}
-
-  fpmath double __builtin_vsx_truncf128_round_to_odd (_Float128);
-    TRUNCF128_ODD trunckfdf2_odd {}
-
 
 
 ; Decimal floating-point builtins.
@@ -3132,12 +3132,26 @@
     PEXTD pextd {}
 
 
+; TODO: Land-mine alert.
+;
+; The original built-in support has code that assumes the internal
+; copy of an MMA built-in function appears immediately after the
+; external copy in the built-in table.  This is fragile.  For the
+; new support, we should transition this to do a name lookup in
+; the built-in hash table, but to start with we will honor the
+; positioning of the built-ins in the table.
 [mma]
   void __builtin_mma_assemble_acc (v512 *, vuc, vuc, vuc, vuc);
-    ASSEMBLE_ACC mma_assemble_acc {mma}
+    ASSEMBLE_ACC nothing {mma}
+
+  void __builtin_mma_assemble_acc_internal (v512 *, vuc, vuc, vuc, vuc);
+    ASSEMBLE_ACC_INTERNAL mma_assemble_acc {mma}
 
   void __builtin_mma_assemble_pair (v256 *, vuc, vuc);
-    ASSEMBLE_PAIR mma_assemble_pair {mma}
+    ASSEMBLE_PAIR nothing {mma}
+
+  void __builtin_mma_assemble_pair_internal (v256 *, vuc, vuc);
+    ASSEMBLE_PAIR_INTERNAL mma_assemble_pair {mma}
 
   void __builtin_mma_disassemble_acc (void *, v512 *);
     DISASSEMBLE_ACC nothing {mma,quad}
@@ -3146,7 +3160,10 @@
     DISASSEMBLE_PAIR nothing {mma,pair}
 
   void __builtin_mma_pmxvbf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2 mma_pmxvbf16ger2 {mma}
+    PMXVBF16GER2 nothing {mma}
+
+  void __builtin_mma_pmxvbf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma}
 
   void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NN mma_pmxvbf16ger2nn {mma,quad}


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-09-13 23:21 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-09-13 23:21 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:f062fa9c958bab8f72582abd82e564b25aededd2

commit f062fa9c958bab8f72582abd82e564b25aededd2
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Sun Sep 13 18:21:20 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-09-13  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 195 ++++++++++++++++---------------
 1 file changed, 99 insertions(+), 96 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index 755399b1843..6b66908d7c7 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -531,73 +531,73 @@
   const int __builtin_altivec_vcmpbfp_p (int, vf, vf);
     VCMPBFP_P altivec_vcmpbfp_p {pred}
 
-  const vbi __builtin_altivec_vcmpeqfp (vf, vf);
+  const vf __builtin_altivec_vcmpeqfp (vf, vf);
     VCMPEQFP vector_eqv4sf {}
 
   const int __builtin_altivec_vcmpeqfp_p (int, vf, vf);
     VCMPEQFP_P vector_eq_v4sf_p {pred}
 
-  const vbc __builtin_altivec_vcmpequb (vuc, vuc);
+  const vsc __builtin_altivec_vcmpequb (vuc, vuc);
     VCMPEQUB vector_eqv16qi {}
 
   const int __builtin_altivec_vcmpequb_p (int, vuc, vuc);
     VCMPEQUB_P vector_eq_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpequh (vus, vus);
+  const vss __builtin_altivec_vcmpequh (vus, vus);
     VCMPEQUH vector_eqv8hi {}
 
   const int __builtin_altivec_vcmpequh_p (int, vus, vus);
     VCMPEQUH_P vector_eq_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpequw (vui, vui);
+  const vsi __builtin_altivec_vcmpequw (vui, vui);
     VCMPEQUW vector_eqv4si {}
 
   const int __builtin_altivec_vcmpequw_p (int, vui, vui);
     VCMPEQUW_P vector_eq_v4si_p {pred}
 
-  const vbi __builtin_altivec_vcmpgefp (vf, vf);
+  const vf __builtin_altivec_vcmpgefp (vf, vf);
     VCMPGEFP vector_gev4sf {}
 
   const int __builtin_altivec_vcmpgefp_p (int, vf, vf);
     VCMPGEFP_P vector_ge_v4sf_p {pred}
 
-  const vbi __builtin_altivec_vcmpgtfp (vf, vf);
+  const vf __builtin_altivec_vcmpgtfp (vf, vf);
     VCMPGTFP vector_gtv4sf {}
 
   const int __builtin_altivec_vcmpgtfp_p (int, vf, vf);
     VCMPGTFP_P vector_gt_v4sf_p {pred}
 
-  const vbc __builtin_altivec_vcmpgtsb (vsc, vsc);
+  const vsc __builtin_altivec_vcmpgtsb (vsc, vsc);
     VCMPGTSB vector_gtv16qi {}
 
   const int __builtin_altivec_vcmpgtsb_p (int, vsc, vsc);
     VCMPGTSB_P vector_gt_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpgtsh (vss, vss);
+  const vss __builtin_altivec_vcmpgtsh (vss, vss);
     VCMPGTSH vector_gtv8hi {}
 
   const int __builtin_altivec_vcmpgtsh_p (int, vss, vss);
     VCMPGTSH_P vector_gt_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpgtsw (vsi, vsi);
+  const vsi __builtin_altivec_vcmpgtsw (vsi, vsi);
     VCMPGTSW vector_gtv4si {}
 
   const int __builtin_altivec_vcmpgtsw_p (int, vsi, vsi);
     VCMPGTSW_P vector_gt_v4si_p {pred}
 
-  const vbc __builtin_altivec_vcmpgtub (vuc, vuc);
+  const vsc __builtin_altivec_vcmpgtub (vuc, vuc);
     VCMPGTUB vector_gtuv16qi {}
 
   const int __builtin_altivec_vcmpgtub_p (int, vuc, vuc);
     VCMPGTUB_P vector_gtu_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpgtuh (vus, vus);
+  const vss __builtin_altivec_vcmpgtuh (vus, vus);
     VCMPGTUH vector_gtuv8hi {}
 
   const int __builtin_altivec_vcmpgtuh_p (int, vus, vus);
     VCMPGTUH_P vector_gtu_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpgtuw (vui, vui);
+  const vsi __builtin_altivec_vcmpgtuw (vui, vui);
     VCMPGTUW vector_gtuv4si {}
 
   const int __builtin_altivec_vcmpgtuw_p (int, vui, vui);
@@ -2162,7 +2162,7 @@
   const vsi __builtin_altivec_vclzw (vsi);
     VCLZW clzv4si2 {}
 
-  const vsc __builtin_altivec_vgbbd (vsc);
+  const vuc __builtin_altivec_vgbbd (vuc);
     VGBBD p8v_vgbbd {}
 
   const vsq __builtin_altivec_vaddcuq (vsq, vsq);
@@ -2186,19 +2186,19 @@
   const vuc __builtin_altivec_vbpermq2 (vuc, vuc);
     VBPERMQ2 altivec_vbpermq2 {}
 
-  const vbll __builtin_altivec_vcmpequd (vsll, vsll);
+  const vsll __builtin_altivec_vcmpequd (vull, vull);
     VCMPEQUD vector_eqv2di {}
 
   const int __builtin_altivec_vcmpequd_p (int, vsll, vsll);
     VCMPEQUD_P vector_eq_v2di_p {pred}
 
-  const vbll __builtin_altivec_vcmpgtsd (vsll, vsll);
+  const vsll __builtin_altivec_vcmpgtsd (vsll, vsll);
     VCMPGTSD vector_gtv2di {}
 
   const int __builtin_altivec_vcmpgtsd_p (int, vsll, vsll);
     VCMPGTSD_P vector_gt_v2di_p {pred}
 
-  const vbll __builtin_altivec_vcmpgtud (vull, vull);
+  const vsll __builtin_altivec_vcmpgtud (vull, vull);
     VCMPGTUD vector_gtuv2di {}
 
   const int __builtin_altivec_vcmpgtud_p (vull, vull);
@@ -2540,7 +2540,7 @@
   const signed int __builtin_altivec_vcmpaew_p (vsi, vsi);
     VCMPAEW_P vector_ae_v4si_p {pred}
 
-  const vbc __builtin_altivec_vcmpneb (vsc, vsc);
+  const vsc __builtin_altivec_vcmpneb (vsc, vsc);
     CMPNEB vcmpneb {}
 
   const signed int __builtin_altivec_vcmpneb_p (vsc, vsc);
@@ -2555,31 +2555,31 @@
   const signed int __builtin_altivec_vcmpnefp_p (vf, vf);
     VCMPNEFP_P vector_ne_v4sf_p {pred}
 
-  const vbs __builtin_altivec_vcmpneh (vss, vss);
+  const vss __builtin_altivec_vcmpneh (vss, vss);
     CMPNEH vcmpneh {}
 
   const signed int __builtin_altivec_vcmpneh_p (vss, vss);
     VCMPNEH_P vector_ne_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpnew (vsi, vsi);
+  const vsi __builtin_altivec_vcmpnew (vsi, vsi);
     CMPNEW vcmpnew {}
 
   const signed int __builtin_altivec_vcmpnew_p (vsi, vsi);
     VCMPNEW_P vector_ne_v4si_p {pred}
 
-  const vbc __builtin_altivec_vcmpnezb (vsc, vsc);
+  const vsc __builtin_altivec_vcmpnezb (vsc, vsc);
     CMPNEZB vcmpnezb {}
 
   const signed int __builtin_altivec_vcmpnezb_p (signed int, vsc, vsc);
     VCMPNEZB_P vector_nez_v16qi_p {pred}
 
-  const vbs __builtin_altivec_vcmpnezh (vss, vss);
+  const vss __builtin_altivec_vcmpnezh (vss, vss);
     CMPNEZH vcmpnezh {}
 
   const signed int __builtin_altivec_vcmpnezh_p (signed int, vss, vss);
     VCMPNEZH_P vector_nez_v8hi_p {pred}
 
-  const vbi __builtin_altivec_vcmpnezw (vsi, vsi);
+  const vsi __builtin_altivec_vcmpnezw (vsi, vsi);
     CMPNEZW vcmpnezw {}
 
   const signed int __builtin_altivec_vcmpnezw_p (vsi, vsi);
@@ -3039,7 +3039,7 @@
   const vui __builtin_altivec_mtvsrwm (unsigned long long);
     MTVSRWM vec_mtvsr_v4si {}
 
-  const vuc __builtin_altivec_vcfuged (vuc, vuc);
+  const vull __builtin_altivec_vcfuged (vull, vull);
     VCFUGED vcfuged {}
 
   const vsc __builtin_altivec_vclrlb (vsc, unsigned int);
@@ -3048,10 +3048,10 @@
   const vsc __builtin_altivec_vclrrb (vsc, unsigned int);
     VCLRRB vclrrb {}
 
-  const vuc __builtin_altivec_vclzdm (vuc, vuc);
+  const vull __builtin_altivec_vclzdm (vull, vull);
     VCLZDM vclzdm {}
 
-  const vuc __builtin_altivec_vctzdm (vuc, vuc);
+  const vull __builtin_altivec_vctzdm (vull, vull);
     VCTZDM vctzdm {}
 
   const vuc __builtin_altivec_vexpandmb (vuc);
@@ -3093,19 +3093,19 @@
   const vull __builtin_altivec_vextduwvlx (vui, vui, unsigned char);
     VEXTRACTWL vextractlv4si {}
 
-  const unsigned int __builtin_altivec_vextractmb (vuc);
+  const signed int __builtin_altivec_vextractmb (vuc);
     VEXTRACTMB vec_extract_v16qi {}
 
-  const unsigned int __builtin_altivec_vextractmd (vull);
+  const signed int __builtin_altivec_vextractmd (vull);
     VEXTRACTMD vec_extract_v2di {}
 
-  const unsigned int __builtin_altivec_vextractmh (vus);
+  const signed int __builtin_altivec_vextractmh (vus);
     VEXTRACTMH vec_extract_v8hi {}
 
-  const unsigned int __builtin_altivec_vextractmq (vuq);
+  const signed int __builtin_altivec_vextractmq (vuq);
     VEXTRACTMQ vec_extract_v1ti {}
 
-  const unsigned int __builtin_altivec_vextractmw (vui);
+  const signed int __builtin_altivec_vextractmw (vui);
     VEXTRACTMW vec_extract_v4si {}
 
   const unsigned long long __builtin_altivec_vgnb (vuq, const int <2,7>);
@@ -3153,10 +3153,10 @@
   const vui __builtin_altivec_vinsvuwvrx (vui, vui, unsigned int);
     VINSERTVPRWR vinsertvr_v4si {}
 
-  const vuc __builtin_altivec_vpdepd (vuc, vuc);
+  const vull __builtin_altivec_vpdepd (vull, vull);
     VPDEPD vpdepd {}
 
-  const vuc __builtin_altivec_vpextd (vuc, vuc);
+  const vull __builtin_altivec_vpextd (vull, vull);
     VPEXTD vpextd {}
 
   const vull __builtin_altivec_vreplace_un_uv2di (vull, unsigned long long, const int<4>);
@@ -3360,18 +3360,21 @@
 ; external copy in the built-in table.  This is fragile.  For the
 ; new support, we should transition this to do a name lookup in
 ; the built-in hash table, but to start with we will honor the
-; positioning of the built-ins in the table.
+; positioning of the built-ins in the table.  Note that right now
+; there is going to be breakage with __builtin_mma_disassemble_{acc,pair}
+; since they each require a blank builtin to follow them with icode
+; CODE_FOR_nothing.
 [mma]
   void __builtin_mma_assemble_acc (v512 *, vuc, vuc, vuc, vuc);
     ASSEMBLE_ACC nothing {mma}
 
-  void __builtin_mma_assemble_acc_internal (v512 *, vuc, vuc, vuc, vuc);
+  v512 __builtin_mma_assemble_acc_internal (v512 *, vuc, vuc, vuc, vuc);
     ASSEMBLE_ACC_INTERNAL mma_assemble_acc {mma}
 
   void __builtin_mma_assemble_pair (v256 *, vuc, vuc);
     ASSEMBLE_PAIR nothing {mma}
 
-  void __builtin_mma_assemble_pair_internal (v256 *, vuc, vuc);
+  v256 __builtin_mma_assemble_pair_internal (v256 *, vuc, vuc);
     ASSEMBLE_PAIR_INTERNAL mma_assemble_pair {mma}
 
   void __builtin_mma_disassemble_acc (void *, v512 *);
@@ -3383,367 +3386,367 @@
   void __builtin_mma_pmxvbf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2 nothing {mma}
 
-  void __builtin_mma_pmxvbf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma}
 
   void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NN_INTERNAL mma_pmxvbf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NP_INTERNAL mma_pmxvbf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PN_INTERNAL mma_pmxvbf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvbf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvbf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2PP_INTERNAL mma_pmxvbf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2 nothing {mma}
 
-  void __builtin_mma_pmxvf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2_INTERNAL mma_pmxvf16ger2 {mma}
 
   void __builtin_mma_pmxvf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NN_INTERNAL mma_pmxvf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2NP_INTERNAL mma_pmxvf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PN_INTERNAL mma_pmxvf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVF16GER2PP_INTERNAL mma_pmxvf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf32ger (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GER nothing {mma}
 
-  void __builtin_mma_pmxvf32ger_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32ger_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GER_INTERNAL mma_pmxvf32ger {mma}
 
   void __builtin_mma_pmxvf32gernn (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gernn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gernn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNN_INTERNAL mma_pmxvf32gernn {mma,quad}
 
   void __builtin_mma_pmxvf32gernp (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gernp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gernp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERNP_INTERNAL mma_pmxvf32gernp {mma,quad}
 
   void __builtin_mma_pmxvf32gerpn (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPN nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gerpn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gerpn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPN_INTERNAL mma_pmxvf32gerpn {mma,quad}
 
   void __builtin_mma_pmxvf32gerpp (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPP nothing {mma,quad}
 
-  void __builtin_mma_pmxvf32gerpp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvf32gerpp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
     PMXVF32GERPP_INTERNAL mma_pmxvf32gerpp {mma,quad}
 
   void __builtin_mma_pmxvf64ger (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GER nothing {mma,pair}
 
-  void __builtin_mma_pmxvf64ger_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64ger_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GER_INTERNAL mma_pmxvf64ger {mma,pair}
 
   void __builtin_mma_pmxvf64gernn (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNN nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gernn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gernn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNN_INTERNAL mma_pmxvf64gernn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gernp (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNP nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gernp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gernp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERNP_INTERNAL mma_pmxvf64gernp {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpn (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPN nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gerpn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gerpn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPN_INTERNAL mma_pmxvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpp (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPP nothing {mma,pair,quad}
 
-  void __builtin_mma_pmxvf64gerpp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvf64gerpp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
     PMXVF64GERPP_INTERNAL mma_pmxvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_pmxvi16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2 nothing {mma}
 
-  void __builtin_mma_pmxvi16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2_INTERNAL mma_pmxvi16ger2 {mma}
 
   void __builtin_mma_pmxvi16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2PP_INTERNAL mma_pmxvi16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvi16ger2s (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2S nothing {mma}
 
-  void __builtin_mma_pmxvi16ger2s_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2s_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2S_INTERNAL mma_pmxvi16ger2s {mma}
 
   void __builtin_mma_pmxvi16ger2spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2SPP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi16ger2spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+  v512 __builtin_mma_pmxvi16ger2spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVI16GER2SPP_INTERNAL mma_pmxvi16ger2spp {mma,quad}
 
   void __builtin_mma_pmxvi4ger8 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
     PMXVI4GER8 nothing {mma}
 
-  void __builtin_mma_pmxvi4ger8_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
+  v512 __builtin_mma_pmxvi4ger8_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
     PMXVI4GER8_INTERNAL mma_pmxvi4ger8 {mma}
 
   void __builtin_mma_pmxvi4ger8pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI4GER8PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi4ger8pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi4ger8pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI4GER8PP_INTERNAL mma_pmxvi4ger8pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4 nothing {mma}
 
-  void __builtin_mma_pmxvi8ger4_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi8ger4_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4_INTERNAL mma_pmxvi8ger4 {mma}
 
   void __builtin_mma_pmxvi8ger4pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4PP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi8ger4pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi8ger4pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4PP_INTERNAL mma_pmxvi8ger4pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4SPP nothing {mma,quad}
 
-  void __builtin_mma_pmxvi8ger4spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+  v512 __builtin_mma_pmxvi8ger4spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
     PMXVI8GER4SPP_INTERNAL mma_pmxvi8ger4spp {mma,quad}
 
   void __builtin_mma_xvbf16ger2 (v512 *, vuc, vuc);
     XVBF16GER2 nothing {mma}
 
-  void __builtin_mma_xvbf16ger2_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2_internal (v512 *, vuc, vuc);
     XVBF16GER2_INTERNAL mma_xvbf16ger2 {mma}
 
   void __builtin_mma_xvbf16ger2nn (v512 *, vuc, vuc);
     XVBF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2nn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2nn_internal (v512 *, vuc, vuc);
     XVBF16GER2NN_INTERNAL mma_xvbf16ger2nn {mma,quad}
 
   void __builtin_mma_xvbf16ger2np (v512 *, vuc, vuc);
     XVBF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2np_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2np_internal (v512 *, vuc, vuc);
     XVBF16GER2NP_INTERNAL mma_xvbf16ger2np {mma,quad}
 
   void __builtin_mma_xvbf16ger2pn (v512 *, vuc, vuc);
     XVBF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2pn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2pn_internal (v512 *, vuc, vuc);
     XVBF16GER2PN_INTERNAL mma_xvbf16ger2pn {mma,quad}
 
   void __builtin_mma_xvbf16ger2pp (v512 *, vuc, vuc);
     XVBF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_xvbf16ger2pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvbf16ger2pp_internal (v512 *, vuc, vuc);
     XVBF16GER2PP_INTERNAL mma_xvbf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf16ger2 (v512 *, vuc, vuc);
     XVF16GER2 nothing {mma}
 
-  void __builtin_mma_xvf16ger2_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2_internal (v512 *, vuc, vuc);
     XVF16GER2_INTERNAL mma_xvf16ger2 {mma}
 
   void __builtin_mma_xvf16ger2nn (v512 *, vuc, vuc);
     XVF16GER2NN nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2nn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2nn_internal (v512 *, vuc, vuc);
     XVF16GER2NN_INTERNAL mma_xvf16ger2nn {mma,quad}
 
   void __builtin_mma_xvf16ger2np (v512 *, vuc, vuc);
     XVF16GER2NP nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2np_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2np_internal (v512 *, vuc, vuc);
     XVF16GER2NP_INTERNAL mma_xvf16ger2np {mma,quad}
 
   void __builtin_mma_xvf16ger2pn (v512 *, vuc, vuc);
     XVF16GER2PN nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2pn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2pn_internal (v512 *, vuc, vuc);
     XVF16GER2PN_INTERNAL mma_xvf16ger2pn {mma,quad}
 
   void __builtin_mma_xvf16ger2pp (v512 *, vuc, vuc);
     XVF16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_xvf16ger2pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf16ger2pp_internal (v512 *, vuc, vuc);
     XVF16GER2PP_INTERNAL mma_xvf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf32ger (v512 *, vuc, vuc);
     XVF32GER nothing {mma}
 
-  void __builtin_mma_xvf32ger_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32ger_internal (v512 *, vuc, vuc);
     XVF32GER_INTERNAL mma_xvf32ger {mma}
 
   void __builtin_mma_xvf32gernn (v512 *, vuc, vuc);
     XVF32GERNN nothing {mma,quad}
 
-  void __builtin_mma_xvf32gernn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gernn_internal (v512 *, vuc, vuc);
     XVF32GERNN_INTERNAL mma_xvf32gernn {mma,quad}
 
   void __builtin_mma_xvf32gernp (v512 *, vuc, vuc);
     XVF32GERNP nothing {mma,quad}
 
-  void __builtin_mma_xvf32gernp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gernp_internal (v512 *, vuc, vuc);
     XVF32GERNP_INTERNAL mma_xvf32gernp {mma,quad}
 
   void __builtin_mma_xvf32gerpn (v512 *, vuc, vuc);
     XVF32GERPN nothing {mma,quad}
 
-  void __builtin_mma_xvf32gerpn_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gerpn_internal (v512 *, vuc, vuc);
     XVF32GERPN_INTERNAL mma_xvf32gerpn {mma,quad}
 
   void __builtin_mma_xvf32gerpp (v512 *, vuc, vuc);
     XVF32GERPP nothing {mma,quad}
 
-  void __builtin_mma_xvf32gerpp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvf32gerpp_internal (v512 *, vuc, vuc);
     XVF32GERPP_INTERNAL mma_xvf32gerpp {mma,quad}
 
   void __builtin_mma_xvf64ger (v512 *, v256, vuc);
     XVF64GER nothing {mma,pair}
 
-  void __builtin_mma_xvf64ger_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64ger_internal (v512 *, v256, vuc);
     XVF64GER_INTERNAL mma_xvf64ger {mma,pair}
 
   void __builtin_mma_xvf64gernn (v512 *, v256, vuc);
     XVF64GERNN nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gernn_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gernn_internal (v512 *, v256, vuc);
     XVF64GERNN_INTERNAL mma_xvf64gernn {mma,pair,quad}
 
   void __builtin_mma_xvf64gernp (v512 *, v256, vuc);
     XVF64GERNP nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gernp_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gernp_internal (v512 *, v256, vuc);
     XVF64GERNP_INTERNAL mma_xvf64gernp {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpn (v512 *, v256, vuc);
     XVF64GERPN nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gerpn_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gerpn_internal (v512 *, v256, vuc);
     XVF64GERPN_INTERNAL mma_xvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpp (v512 *, v256, vuc);
     XVF64GERPP nothing {mma,pair,quad}
 
-  void __builtin_mma_xvf64gerpp_internal (v512 *, v256, vuc);
+  v512 __builtin_mma_xvf64gerpp_internal (v512 *, v256, vuc);
     XVF64GERPP_INTERNAL mma_xvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_xvi16ger2 (v512 *, vuc, vuc);
     XVI16GER2 nothing {mma}
 
-  void __builtin_mma_xvi16ger2_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2_internal (v512 *, vuc, vuc);
     XVI16GER2_INTERNAL mma_xvi16ger2 {mma}
 
   void __builtin_mma_xvi16ger2pp (v512 *, vuc, vuc);
     XVI16GER2PP nothing {mma,quad}
 
-  void __builtin_mma_xvi16ger2pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2pp_internal (v512 *, vuc, vuc);
     XVI16GER2PP_INTERNAL mma_xvi16ger2pp {mma,quad}
 
   void __builtin_mma_xvi16ger2s (v512 *, vuc, vuc);
     XVI16GER2S nothing {mma}
 
-  void __builtin_mma_xvi16ger2s_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2s_internal (v512 *, vuc, vuc);
     XVI16GER2S_INTERNAL mma_xvi16ger2s {mma}
 
   void __builtin_mma_xvi16ger2spp (v512 *, vuc, vuc);
     XVI16GER2SPP nothing {mma,quad}
 
-  void __builtin_mma_xvi16ger2spp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi16ger2spp_internal (v512 *, vuc, vuc);
     XVI16GER2SPP_INTERNAL mma_xvi16ger2spp {mma,quad}
 
   void __builtin_mma_xvi4ger8 (v512 *, vuc, vuc);
     XVI4GER8 nothing {mma}
 
-  void __builtin_mma_xvi4ger8_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi4ger8_internal (v512 *, vuc, vuc);
     XVI4GER8_INTERNAL mma_xvi4ger8 {mma}
 
   void __builtin_mma_xvi4ger8pp (v512 *, vuc, vuc);
     XVI4GER8PP nothing {mma,quad}
 
-  void __builtin_mma_xvi4ger8pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi4ger8pp_internal (v512 *, vuc, vuc);
     XVI4GER8PP_INTERNAL mma_xvi4ger8pp {mma,quad}
 
   void __builtin_mma_xvi8ger4 (v512 *, vuc, vuc);
     XVI8GER4 nothing {mma}
 
-  void __builtin_mma_xvi8ger4_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi8ger4_internal (v512 *, vuc, vuc);
     XVI8GER4_INTERNAL mma_xvi8ger4 {mma}
 
   void __builtin_mma_xvi8ger4pp (v512 *, vuc, vuc);
     XVI8GER4PP nothing {mma,quad}
 
-  void __builtin_mma_xvi8ger4pp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi8ger4pp_internal (v512 *, vuc, vuc);
     XVI8GER4PP_INTERNAL mma_xvi8ger4pp {mma,quad}
 
   void __builtin_mma_xvi8ger4spp (v512 *, vuc, vuc);
     XVI8GER4SPP nothing {mma,quad}
 
-  void __builtin_mma_xvi8ger4spp_internal (v512 *, vuc, vuc);
+  v512 __builtin_mma_xvi8ger4spp_internal (v512 *, vuc, vuc);
     XVI8GER4SPP_INTERNAL mma_xvi8ger4spp {mma,quad}
 
   void __builtin_mma_xxmfacc (v512 *);
     XXMFACC nothing {mma,quad}
 
-  void __builtin_mma_xxmfacc_internal (v512 *);
+  v512 __builtin_mma_xxmfacc_internal (v512 *);
     XXMFACC_INTERNAL mma_xxmfacc {mma,quad}
 
   void __builtin_mma_xxmtacc (v512 *);
     XXMTACC nothing {mma,quad}
 
-  void __builtin_mma_xxmtacc_internal (v512 *);
+  v512 __builtin_mma_xxmtacc_internal (v512 *);
     XXMTACC_INTERNAL mma_xxmtacc {mma,quad}
 
   void __builtin_mma_xxsetaccz (v512 *);
     XXSETACCZ nothing {mma}
 
-  void __builtin_mma_xxsetaccz_internal (v512 *);
+  v512 __builtin_mma_xxsetaccz_internal (v512 *);
     XXSETACCZ_INTERNAL mma_xxsetaccz {mma}


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-09-13 16:07 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-09-13 16:07 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:6e9cdd9c6c74bea4b80cf663ec1799cbb2cecd41

commit 6e9cdd9c6c74bea4b80cf663ec1799cbb2cecd41
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Sun Sep 13 11:06:36 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-09-13  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.
            * config/rs6000/rs6000-call.c (altivec_init_builtins): Correct
            name of __builtin_altivec_xst_len_r.

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 549 ++++++++++++++++++++++++++-----
 gcc/config/rs6000/rs6000-call.c          |   2 +-
 2 files changed, 476 insertions(+), 75 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index 565c14f9f4c..755399b1843 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -1494,6 +1494,45 @@
   const vf __builtin_vsx_uns_floato_v2di (vull);
     UNS_FLOATO_V2DI unsfloatov2di {}
 
+; I have no idea why we have __builtin_vsx_* duplicates of these when
+; the __builtin_altivec_* counterparts are already present.  Keeping
+; them for compatibility, but...oy.
+  const vsc __builtin_vsx_vperm_16qi (vsc, vsc, vuc);
+    VPERM_16QI_X altivec_vperm_v16qi {}
+
+  const vuc __builtin_vsx_vperm_16qi_uns (vuc, vuc, vuc);
+    VPERM_16QI_UNS_X altivec_vperm_v16qi_uns {}
+
+  const vsq __builtin_vsx_vperm_1ti (vsq, vsq, vuc);
+    VPERM_1TI_X altivec_vperm_v1ti {}
+
+  const vuq __builtin_vsx_vperm_1ti_uns (vuq, vuq, vuc);
+    VPERM_1TI_UNS_X altivec_vperm_v1ti_uns {}
+
+  const vd __builtin_vsx_vperm_2df (vd, vd, vuc);
+    VPERM_2DF_X altivec_vperm_v2df {}
+
+  const vsll __builtin_vsx_vperm_2di (vsll, vsll, vuc);
+    VPERM_2DI_X altivec_vperm_v2di {}
+
+  const vull __builtin_vsx_vperm_2di_uns (vull, vull, vuc);
+    VPERM_2DI_UNS_X altivec_vperm_v2di_uns {}
+
+  const vf __builtin_vsx_vperm_4sf (vf, vf, vuc);
+    VPERM_4SF_X altivec_vperm_v4sf {}
+
+  const vsi __builtin_vsx_vperm_4si (vsi, vsi, vuc);
+    VPERM_4SI_X altivec_vperm_v4si {}
+
+  const vui __builtin_vsx_vperm_4si_uns (vui, vui, vuc);
+    VPERM_4SI_UNS_X altivec_vperm_v4si_uns {}
+
+  const vss __builtin_vsx_vperm_8hi (vss, vss, vuc);
+    VPERM_8HI_X altivec_vperm_v8hi {}
+
+  const vus __builtin_vsx_vperm_8hi_uns (vus, vus, vuc);
+    VPERM_8HI_UNS_X altivec_vperm_v8hi_uns {}
+
   const vsll __builtin_vsx_vsigned_v2df (vd);
     VEC_VSIGNED_V2DF vsx_xvcvdpsxds {}
 
@@ -1617,6 +1656,9 @@
   const vui __builtin_vsx_xvcmpgtsp_p (vf, vf);
     XVCMPGTSP_P vector_gt_v4sf_p {pred}
 
+  const vf __builtin_vsx_xvcvdpsp (vd);
+    XVCVDPSP vsx_xvcvdpsp {}
+
   const vsll __builtin_vsx_xvcvdpsxds (vd);
     XVCVDPSXDS vsx_fix_truncv2dfv2di2 {}
 
@@ -1639,6 +1681,9 @@
   const vull __builtin_vsx_xvcvdpuxws (vd);
     XVCVDPUXWS vsx_xvcvdpuxws {}
 
+  const vd __builtin_vsx_xvcvspdp (vf);
+    XVCVSPDP vsx_xvcvspdp {}
+
   const vsll __builtin_vsx_xvcvspsxds (vf);
     XVCVSPSXDS vsx_xvcvspsxds {}
 
@@ -2687,28 +2732,31 @@
   signed long long __builtin_darn_raw ();
     DARN_RAW darn_raw {}
 
-  const signed int __builtin_dtstsfi_eq_dd (unsigned int, _Decimal64);
+  double __builtin_mffsl ();
+    MFFSL rs6000_mffsl {}
+
+  const signed int __builtin_dfp_dtstsfi_eq_dd (unsigned int, _Decimal64);
     TSTSFI_EQ_DD dfptstsfi_eq_dd {}
 
-  const signed int __builtin_dtstsfi_eq_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_eq_td (unsigned int, _Decimal128);
     TSTSFI_EQ_TD dfptstsfi_eq_td {}
 
-  const signed int __builtin_dtstsfi_gt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dfp_dtstsfi_gt_dd (unsigned int, _Decimal64);
     TSTSFI_GT_DD dfptstsfi_gt_dd {}
 
-  const signed int __builtin_dtstsfi_gt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_gt_td (unsigned int, _Decimal128);
     TSTSFI_GT_TD dfptstsfi_gt_td {}
 
-  const signed int __builtin_dtstsfi_lt_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dfp_dtstsfi_lt_dd (unsigned int, _Decimal64);
     TSTSFI_LT_DD dfptstsfi_lt_dd {}
 
-  const signed int __builtin_dtstsfi_lt_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_lt_td (unsigned int, _Decimal128);
     TSTSFI_LT_TD dfptstsfi_lt_td {}
 
-  const signed int __builtin_dtstsfi_ov_dd (unsigned int, _Decimal64);
+  const signed int __builtin_dfp_dtstsfi_ov_dd (unsigned int, _Decimal64);
     TSTSFI_OV_DD dfptstsfi_unordered_dd {}
 
-  const signed int __builtin_dtstsfi_ov_td (unsigned int, _Decimal128);
+  const signed int __builtin_dfp_dtstsfi_ov_td (unsigned int, _Decimal128);
     TSTSFI_OV_TD dfptstsfi_unordered_td {}
 
 
@@ -2720,10 +2768,10 @@
 ; 32 bits, and the return value is DImode, so it seems that
 ; TARGET_64BIT (actually TARGET_POWERPC64) is justified.  TBD. ####
 [power9-64]
-; The following two are inexplicably named __builtin_{alti,}vec_* while
+; The following two are inexplicably named __builtin_altivec_* while
 ; their load counterparts are __builtin_vsx_*.  Need to deprecate
 ; these interfaces in favor of the other naming scheme (or vice versa).
-  void __builtin_vec_xst_len_r (vop, void *, unsigned long long);
+  void __builtin_altivec_xst_len_r (vop, void *, unsigned long long);
     XST_LEN_R xst_len_r {}
 
   void __builtin_altivec_stxvl (vop, void *, unsigned long long);
@@ -3063,12 +3111,114 @@
   const unsigned long long __builtin_altivec_vgnb (vuq, const int <2,7>);
     VGNB vgnb {}
 
+  const vuc __builtin_altivec_vinsgubvlx (unsigned char, vuc, unsigned int);
+    VINSERTGPRBL vinsertgl_v16qi {}
+
+  const vuc __builtin_altivec_vinsgubvrx (unsigned char, vuc, unsigned int);
+    VINSERTGPRBR vinsertgr_v16qi {}
+
+  const vull __builtin_altivec_vinsgudvlx (unsigned long long, vull, unsigned int);
+    VINSERTGPRDL vinsertgl_v2di {}
+
+  const vull __builtin_altivec_vinsgudvrx (unsigned long long, vull, unsigned int);
+    VINSERTGPRDR vinsertgr_v2di {}
+
+  const vus __builtin_altivec_vinsguhvlx (unsigned short, vus, unsigned int);
+    VINSERTGPRHL vinsertgl_v8hi {}
+
+  const vus __builtin_altivec_vinsguhvrx (unsigned short, vus, unsigned int);
+    VINSERTGPRHR vinsertgr_v8hi {}
+
+  const vui __builtin_altivec_vinsguwvlx (unsigned int, vui, unsigned int);
+    VINSERTGPRWL vinsertgl_v4si {}
+
+  const vui __builtin_altivec_vinsguwvrx (unsigned int, vui, unsigned int);
+    VINSERTGPRWR vinsertgr_v4si {}
+
+  const vuc __builtin_altivec_vinsvubvlx (vuc, vuc, unsigned int);
+    VINSERTVPRBL vinsertvl_v16qi {}
+
+  const vuc __builtin_altivec_vinsvubvrx (vuc, vuc, unsigned int);
+    VINSERTVPRBR vinsertvr_v16qi {}
+
+  const vus __builtin_altivec_vinsvuhvlx (vus, vus, unsigned int);
+    VINSERTVPRHL vinsertvl_v8hi {}
+
+  const vus __builtin_altivec_vinsvuhvrx (vus, vus, unsigned int);
+    VINSERTVPRHR vinsertvr_v8hi {}
+
+  const vui __builtin_altivec_vinsvuwvlx (vui, vui, unsigned int);
+    VINSERTVPRWL vinsertvl_v4si {}
+
+  const vui __builtin_altivec_vinsvuwvrx (vui, vui, unsigned int);
+    VINSERTVPRWR vinsertvr_v4si {}
+
   const vuc __builtin_altivec_vpdepd (vuc, vuc);
     VPDEPD vpdepd {}
 
   const vuc __builtin_altivec_vpextd (vuc, vuc);
     VPEXTD vpextd {}
 
+  const vull __builtin_altivec_vreplace_un_uv2di (vull, unsigned long long, const int<4>);
+    VREPLACE_UN_UV2DI vreplace_un_v2di {}
+
+  const vui __builtin_altivec_vreplace_un_uv4si (vui, unsigned int, const int<4>);
+    VREPLACE_UN_UV4SI vreplace_un_v4si {}
+
+  const vd __builtin_altivec_vreplace_un_v2df (vd, double, const int<4>);
+    VREPLACE_UN_V2DF vreplace_un_v2df {}
+
+  const vsll __builtin_altivec_vreplace_un_v2di (vsll, unsigned long long, const int<4>);
+    VREPLACE_UN_V2DI vreplace_un_v2di {}
+
+  const vf __builtin_altivec_vreplace_un_v4sf (vf, float, const int<4>);
+    VREPLACE_UN_V4SF vreplace_un_v4sf {}
+
+  const vsi __builtin_altivec_vreplace_un_v4si (vsi, signed int, const int<4>);
+    VREPLACE_UN_V4SI vreplace_un_v4si {}
+
+  const vull __builtin_altivec_vreplace_uv2di (vull, unsigned long long, const int<1>);
+    VREPLACE_ELT_UV2DI vreplace_elt_v2di {}
+
+  const vui __builtin_altivec_vreplace_uv4si (vui, unsigned int, const int<2>);
+    VREPLACE_ELT_UV4SI vreplace_elt_v4si {}
+
+  const vd __builtin_altivec_vreplace_v2df (vd, double, const int<1>);
+    VREPLACE_ELT_V2DF vreplace_elt_v2df {}
+
+  const vsll __builtin_altivec_vreplace_v2di (vsll, unsigned long long, const int<1>);
+    VREPLACE_ELT_V2DI vreplace_elt_v2di {}
+
+  const vf __builtin_altivec_vreplace_v4sf (vf, float, const int<2>);
+    VREPLACE_ELT_V4SF vreplace_elt_v4sf {}
+
+  const vsi __builtin_altivec_vreplace_v4si (vsi, signed int, const int<2>);
+    VREPLACE_ELT_V4SI vreplace_elt_v4si {}
+
+  const vsc __builtin_altivec_vsldb_v16qi (vsc, vsc, const int<3>);
+    VSLDB_V16QI vsldb_v16qi {}
+
+  const vsll __builtin_altivec_vsldb_v2di (vsll, vsll, const int<3>);
+    VSLDB_V2DI vsldb_v2di {}
+
+  const vsi __builtin_altivec_vsldb_v4si (vsi, vsi, const int<3>);
+    VSLDB_V4SI vsldb_v4si {}
+
+  const vss __builtin_altivec_vsldb_v8hi (vss, vss, const int<3>);
+    VSLDB_V8HI vsldb_v8hi {}
+
+  const vsc __builtin_altivec_vsrdb_v16qi (vsc, vsc, const int<3>);
+    VSRDB_V16QI vsrdb_v16qi {}
+
+  const vsll __builtin_altivec_vsrdb_v2di (vsll, vsll, const int<3>);
+    VSRDB_V2DI vsrdb_v2di {}
+
+  const vsi __builtin_altivec_vsrdb_v4si (vsi, vsi, const int<3>);
+    VSRDB_V4SI vsrdb_v4si {}
+
+  const vss __builtin_altivec_vsrdb_v8hi (vss, vss, const int<3>);
+    VSRDB_V8HI vsrdb_v8hi {}
+
   const vuc __builtin_altivec_vstribl (vuc);
     VSTRIBL vstril_v16qi {}
 
@@ -3093,6 +3243,51 @@
   const signed int __builtin_altivec_vstrihr_p (vus);
     VSTRIHR_P vstrir_p_v8hi {}
 
+  const signed int __builtin_vsx_xvtlsbb_all_ones (vuc);
+    XVTLSBB_ONES xvtlsbbo {}
+
+  const signed int __builtin_vsx_xvtlsbb_all_zeros (vuc);
+    XVTLSBB_ZEROS xvtlsbbz {}
+
+  const vf __builtin_vsx_vxxsplti32dx_v4sf (vf, const int<1>, float);
+    VXXSPLTI32DX_V4SF xxsplti32dx_v4sf {}
+
+  const vsi __builtin_vsx_vxxsplti32dx_v4si (vsi, const int<1>, signed int);
+    VXXSPLTI32DX_V4SI xxsplti32dx_v4si {}
+
+  const vd __builtin_vsx_vxxspltidp (float);
+    VXXSPLTIDP xxspltidp_v2df {}
+
+  const vf __builtin_vsx_vxxspltiw_v4sf (float);
+    VXXSPLTIW_V4SF xxspltiw_v4sf {}
+
+  const vsi __builtin_vsx_vxxspltiw_v4si (signed int);
+    VXXSPLTIW_V4SI xxspltiw_v4si {}
+
+  const vuc __builtin_vsx_xvcvbf16spn (vuc);
+    XVCVBF16SPN vsx_xvcvbf16spn {}
+
+  const vuc __builtin_vsx_xvcvspbf16 (vuc);
+    XVCVSPBF16 vsx_xvcvspbf16 {}
+
+  const vsc __builtin_vsx_xxblend_v16qi (vsc, vsc, vuc);
+    VXXBLEND_V16QI xxblend_v16qi {}
+
+  const vd __builtin_vsx_xxblend_v2df (vd, vd, vull);
+    VXXBLEND_V2DF xxblend_v2df {}
+
+  const vsll __builtin_vsx_xxblend_v2di (vsll, vsll, vull);
+    VXXBLEND_V2DI xxblend_v2di {}
+
+  const vf __builtin_vsx_xxblend_v4sf (vf, vf, vui);
+    VXXBLEND_V4SF xxblend_v4sf {}
+
+  const vsi __builtin_vsx_xxblend_v4si (vsi, vsi, vui);
+    VXXBLEND_V4SI xxblend_v4si {}
+
+  const vss __builtin_vsx_xxblend_v8hi (vss, vss, vus);
+    VXXBLEND_V8HI xxblend_v8hi {}
+
   const vop __builtin_vsx_xxeval (vop, vop, vop, const int <8>);
     XXEVAL xxeval {}
 
@@ -3108,11 +3303,37 @@
   const vus __builtin_vsx_xxgenpcvm_v8hi (vus, const int <2>);
     XXGENPCVM_V8HI xxgenpcvm_v8hi {}
 
-  const vuc __builtin_vsx_xvcvbf16spn (vuc);
-    XVCVBF16SPN vsx_xvcvbf16spn {}
+; TODO: This was quite hackish in the original code, and we may need to add
+; mode-specific expansions rather than using CODE_FOR_xxpermx throughout.
+  const vuc __builtin_vsx_xxpermx_uv16qi (vuc, vuc, vuc, const int<3>);
+    XXPERMX_UV16QI xxpermx {}
 
-  const vuc __builtin_vsx_xvcvspbf16 (vuc);
-    XVCVSPBF16 vsx_xvcvspbf16 {}
+  const vull __builtin_vsx_xxpermx_uv2di (vull, vull, vuc, const int<3>);
+    XXPERMX_UV2DI xxpermx {}
+
+  const vui __builtin_vsx_xxpermx_uv4si (vui, vui, vuc, const int<3>);
+    XXPERMX_UV4SI xxpermx {}
+
+  const vus __builtin_vsx_xxpermx_uv8hi (vus, vus, vuc, const int<3>);
+    XXPERMX_UV8HI xxpermx {}
+
+  const vsc __builtin_vsx_xxpermx_v16qi (vsc, vsc, vuc, const int<3>);
+    XXPERMX_V16QI xxpermx {}
+
+  const vd __builtin_vsx_xxpermx_v2df (vd, vd, vuc, const int<3>);
+    XXPERMX_V2DF xxpermx {}
+
+  const vsll __builtin_vsx_xxpermx_v2di (vsll, vsll, vuc, const int<3>);
+    XXPERMX_V2DI xxpermx {}
+
+  const vf __builtin_vsx_xxpermx_v4sf (vf, vf, vuc, const int<3>);
+    XXPERMX_V4SF xxpermx {}
+
+  const vsi __builtin_vsx_xxpermx_v4si (vsi, vsi, vuc, const int<3>);
+    XXPERMX_V4SI xxpermx {}
+
+  const vss __builtin_vsx_xxpermx_v8hi (vss, vss, vuc, const int<3>);
+    XXPERMX_V8HI xxpermx {}
 
 
 [power10-64]
@@ -3166,184 +3387,364 @@
     PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma}
 
   void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2NN mma_pmxvbf16ger2nn {mma,quad}
+    PMXVBF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2NN_INTERNAL mma_pmxvbf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2NP mma_pmxvbf16ger2np {mma,quad}
+    PMXVBF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2NP_INTERNAL mma_pmxvbf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2PN mma_pmxvbf16ger2pn {mma,quad}
+    PMXVBF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2PN_INTERNAL mma_pmxvbf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvbf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2PP mma_pmxvbf16ger2pp {mma,quad}
+    PMXVBF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvbf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2PP_INTERNAL mma_pmxvbf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2 mma_pmxvf16ger2 {mma}
+    PMXVF16GER2 nothing {mma}
+
+  void __builtin_mma_pmxvf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2_INTERNAL mma_pmxvf16ger2 {mma}
 
   void __builtin_mma_pmxvf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2NN mma_pmxvf16ger2nn {mma,quad}
+    PMXVF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2nn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2NN_INTERNAL mma_pmxvf16ger2nn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2np (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2NP mma_pmxvf16ger2np {mma,quad}
+    PMXVF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2np_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2NP_INTERNAL mma_pmxvf16ger2np {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2PN mma_pmxvf16ger2pn {mma,quad}
+    PMXVF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2pn_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2PN_INTERNAL mma_pmxvf16ger2pn {mma,quad}
 
   void __builtin_mma_pmxvf16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVF16GER2PP mma_pmxvf16ger2pp {mma,quad}
+    PMXVF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVF16GER2PP_INTERNAL mma_pmxvf16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvf32ger (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GER mma_pmxvf32ger {mma}
+    PMXVF32GER nothing {mma}
+
+  void __builtin_mma_pmxvf32ger_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GER_INTERNAL mma_pmxvf32ger {mma}
 
   void __builtin_mma_pmxvf32gernn (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERNN mma_pmxvf32gernn {mma,quad}
+    PMXVF32GERNN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gernn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERNN_INTERNAL mma_pmxvf32gernn {mma,quad}
 
   void __builtin_mma_pmxvf32gernp (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERNP mma_pmxvf32gernp {mma,quad}
+    PMXVF32GERNP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gernp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERNP_INTERNAL mma_pmxvf32gernp {mma,quad}
 
   void __builtin_mma_pmxvf32gerpn (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERPN mma_pmxvf32gerpn {mma,quad}
+    PMXVF32GERPN nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gerpn_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERPN_INTERNAL mma_pmxvf32gerpn {mma,quad}
 
   void __builtin_mma_pmxvf32gerpp (v512 *, vuc, vuc, const int<4>, const int<4>);
-    PMXVF32GERPP mma_pmxvf32gerpp {mma,quad}
+    PMXVF32GERPP nothing {mma,quad}
+
+  void __builtin_mma_pmxvf32gerpp_internal (v512 *, vuc, vuc, const int<4>, const int<4>);
+    PMXVF32GERPP_INTERNAL mma_pmxvf32gerpp {mma,quad}
 
   void __builtin_mma_pmxvf64ger (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GER mma_pmxvf64ger {mma,pair}
+    PMXVF64GER nothing {mma,pair}
+
+  void __builtin_mma_pmxvf64ger_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GER_INTERNAL mma_pmxvf64ger {mma,pair}
 
   void __builtin_mma_pmxvf64gernn (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERNN mma_pmxvf64gernn {mma,pair,quad}
+    PMXVF64GERNN nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gernn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERNN_INTERNAL mma_pmxvf64gernn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gernp (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERNP mma_pmxvf64gernp {mma,pair,quad}
+    PMXVF64GERNP nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gernp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERNP_INTERNAL mma_pmxvf64gernp {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpn (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERPN mma_pmxvf64gerpn {mma,pair,quad}
+    PMXVF64GERPN nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gerpn_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERPN_INTERNAL mma_pmxvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_pmxvf64gerpp (v512 *, v256, vuc, const int<4>, const int<2>);
-    PMXVF64GERPP mma_pmxvf64gerpp {mma,pair,quad}
+    PMXVF64GERPP nothing {mma,pair,quad}
+
+  void __builtin_mma_pmxvf64gerpp_internal (v512 *, v256, vuc, const int<4>, const int<2>);
+    PMXVF64GERPP_INTERNAL mma_pmxvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_pmxvi16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2 mma_pmxvi16ger2 {mma}
+    PMXVI16GER2 nothing {mma}
+
+  void __builtin_mma_pmxvi16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2_INTERNAL mma_pmxvi16ger2 {mma}
 
   void __builtin_mma_pmxvi16ger2pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2PP mma_pmxvi16ger2pp {mma,quad}
+    PMXVI16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi16ger2pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2PP_INTERNAL mma_pmxvi16ger2pp {mma,quad}
 
   void __builtin_mma_pmxvi16ger2s (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2S mma_pmxvi16ger2s {mma}
+    PMXVI16GER2S nothing {mma}
+
+  void __builtin_mma_pmxvi16ger2s_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2S_INTERNAL mma_pmxvi16ger2s {mma}
 
   void __builtin_mma_pmxvi16ger2spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVI16GER2SPP mma_pmxvi16ger2spp {mma,quad}
+    PMXVI16GER2SPP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi16ger2spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVI16GER2SPP_INTERNAL mma_pmxvi16ger2spp {mma,quad}
 
   void __builtin_mma_pmxvi4ger8 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
-    PMXVI4GER8 mma_pmxvi4ger8 {mma}
+    PMXVI4GER8 nothing {mma}
+
+  void __builtin_mma_pmxvi4ger8_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<8>);
+    PMXVI4GER8_INTERNAL mma_pmxvi4ger8 {mma}
 
   void __builtin_mma_pmxvi4ger8pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI4GER8PP mma_pmxvi4ger8pp {mma,quad}
+    PMXVI4GER8PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi4ger8pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI4GER8PP_INTERNAL mma_pmxvi4ger8pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI8GER4 mma_pmxvi8ger4 {mma}
+    PMXVI8GER4 nothing {mma}
+
+  void __builtin_mma_pmxvi8ger4_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI8GER4_INTERNAL mma_pmxvi8ger4 {mma}
 
   void __builtin_mma_pmxvi8ger4pp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI8GER4PP mma_pmxvi8ger4pp {mma,quad}
+    PMXVI8GER4PP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi8ger4pp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI8GER4PP_INTERNAL mma_pmxvi8ger4pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4spp (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
-    PMXVI8GER4SPP mma_pmxvi8ger4spp {mma,quad}
+    PMXVI8GER4SPP nothing {mma,quad}
+
+  void __builtin_mma_pmxvi8ger4spp_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<4>);
+    PMXVI8GER4SPP_INTERNAL mma_pmxvi8ger4spp {mma,quad}
 
   void __builtin_mma_xvbf16ger2 (v512 *, vuc, vuc);
-    XVBF16GER2 mma_xvbf16ger2 {mma}
+    XVBF16GER2 nothing {mma}
+
+  void __builtin_mma_xvbf16ger2_internal (v512 *, vuc, vuc);
+    XVBF16GER2_INTERNAL mma_xvbf16ger2 {mma}
 
   void __builtin_mma_xvbf16ger2nn (v512 *, vuc, vuc);
-    XVBF16GER2NN mma_xvbf16ger2nn {mma,quad}
+    XVBF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2nn_internal (v512 *, vuc, vuc);
+    XVBF16GER2NN_INTERNAL mma_xvbf16ger2nn {mma,quad}
 
   void __builtin_mma_xvbf16ger2np (v512 *, vuc, vuc);
-    XVBF16GER2NP mma_xvbf16ger2np {mma,quad}
+    XVBF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2np_internal (v512 *, vuc, vuc);
+    XVBF16GER2NP_INTERNAL mma_xvbf16ger2np {mma,quad}
 
   void __builtin_mma_xvbf16ger2pn (v512 *, vuc, vuc);
-    XVBF16GER2PN mma_xvbf16ger2pn {mma,quad}
+    XVBF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2pn_internal (v512 *, vuc, vuc);
+    XVBF16GER2PN_INTERNAL mma_xvbf16ger2pn {mma,quad}
 
   void __builtin_mma_xvbf16ger2pp (v512 *, vuc, vuc);
-    XVBF16GER2PP mma_xvbf16ger2pp {mma,quad}
+    XVBF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_xvbf16ger2pp_internal (v512 *, vuc, vuc);
+    XVBF16GER2PP_INTERNAL mma_xvbf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf16ger2 (v512 *, vuc, vuc);
-    XVF16GER2 mma_xvf16ger2 {mma}
+    XVF16GER2 nothing {mma}
+
+  void __builtin_mma_xvf16ger2_internal (v512 *, vuc, vuc);
+    XVF16GER2_INTERNAL mma_xvf16ger2 {mma}
 
   void __builtin_mma_xvf16ger2nn (v512 *, vuc, vuc);
-    XVF16GER2NN mma_xvf16ger2nn {mma,quad}
+    XVF16GER2NN nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2nn_internal (v512 *, vuc, vuc);
+    XVF16GER2NN_INTERNAL mma_xvf16ger2nn {mma,quad}
 
   void __builtin_mma_xvf16ger2np (v512 *, vuc, vuc);
-    XVF16GER2NP mma_xvf16ger2np {mma,quad}
+    XVF16GER2NP nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2np_internal (v512 *, vuc, vuc);
+    XVF16GER2NP_INTERNAL mma_xvf16ger2np {mma,quad}
 
   void __builtin_mma_xvf16ger2pn (v512 *, vuc, vuc);
-    XVF16GER2PN mma_xvf16ger2pn {mma,quad}
+    XVF16GER2PN nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2pn_internal (v512 *, vuc, vuc);
+    XVF16GER2PN_INTERNAL mma_xvf16ger2pn {mma,quad}
 
   void __builtin_mma_xvf16ger2pp (v512 *, vuc, vuc);
-    XVF16GER2PP mma_xvf16ger2pp {mma,quad}
+    XVF16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_xvf16ger2pp_internal (v512 *, vuc, vuc);
+    XVF16GER2PP_INTERNAL mma_xvf16ger2pp {mma,quad}
 
   void __builtin_mma_xvf32ger (v512 *, vuc, vuc);
-    XVF32GER mma_xvf32ger {mma}
+    XVF32GER nothing {mma}
+
+  void __builtin_mma_xvf32ger_internal (v512 *, vuc, vuc);
+    XVF32GER_INTERNAL mma_xvf32ger {mma}
 
   void __builtin_mma_xvf32gernn (v512 *, vuc, vuc);
-    XVF32GERNN mma_xvf32gernn {mma,quad}
+    XVF32GERNN nothing {mma,quad}
+
+  void __builtin_mma_xvf32gernn_internal (v512 *, vuc, vuc);
+    XVF32GERNN_INTERNAL mma_xvf32gernn {mma,quad}
 
   void __builtin_mma_xvf32gernp (v512 *, vuc, vuc);
-    XVF32GERNP mma_xvf32gernp {mma,quad}
+    XVF32GERNP nothing {mma,quad}
+
+  void __builtin_mma_xvf32gernp_internal (v512 *, vuc, vuc);
+    XVF32GERNP_INTERNAL mma_xvf32gernp {mma,quad}
 
   void __builtin_mma_xvf32gerpn (v512 *, vuc, vuc);
-    XVF32GERPN mma_xvf32gerpn {mma,quad}
+    XVF32GERPN nothing {mma,quad}
+
+  void __builtin_mma_xvf32gerpn_internal (v512 *, vuc, vuc);
+    XVF32GERPN_INTERNAL mma_xvf32gerpn {mma,quad}
 
   void __builtin_mma_xvf32gerpp (v512 *, vuc, vuc);
-    XVF32GERPP mma_xvf32gerpp {mma,quad}
+    XVF32GERPP nothing {mma,quad}
+
+  void __builtin_mma_xvf32gerpp_internal (v512 *, vuc, vuc);
+    XVF32GERPP_INTERNAL mma_xvf32gerpp {mma,quad}
 
   void __builtin_mma_xvf64ger (v512 *, v256, vuc);
-    XVF64GER mma_xvf64ger {mma,pair}
+    XVF64GER nothing {mma,pair}
+
+  void __builtin_mma_xvf64ger_internal (v512 *, v256, vuc);
+    XVF64GER_INTERNAL mma_xvf64ger {mma,pair}
 
   void __builtin_mma_xvf64gernn (v512 *, v256, vuc);
-    XVF64GERNN mma_xvf64gernn {mma,pair,quad}
+    XVF64GERNN nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gernn_internal (v512 *, v256, vuc);
+    XVF64GERNN_INTERNAL mma_xvf64gernn {mma,pair,quad}
 
   void __builtin_mma_xvf64gernp (v512 *, v256, vuc);
-    XVF64GERNP mma_xvf64gernp {mma,pair,quad}
+    XVF64GERNP nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gernp_internal (v512 *, v256, vuc);
+    XVF64GERNP_INTERNAL mma_xvf64gernp {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpn (v512 *, v256, vuc);
-    XVF64GERPN mma_xvf64gerpn {mma,pair,quad}
+    XVF64GERPN nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gerpn_internal (v512 *, v256, vuc);
+    XVF64GERPN_INTERNAL mma_xvf64gerpn {mma,pair,quad}
 
   void __builtin_mma_xvf64gerpp (v512 *, v256, vuc);
-    XVF64GERPP mma_xvf64gerpp {mma,pair,quad}
+    XVF64GERPP nothing {mma,pair,quad}
+
+  void __builtin_mma_xvf64gerpp_internal (v512 *, v256, vuc);
+    XVF64GERPP_INTERNAL mma_xvf64gerpp {mma,pair,quad}
 
   void __builtin_mma_xvi16ger2 (v512 *, vuc, vuc);
-    XVI16GER2 mma_xvi16ger2 {mma}
+    XVI16GER2 nothing {mma}
+
+  void __builtin_mma_xvi16ger2_internal (v512 *, vuc, vuc);
+    XVI16GER2_INTERNAL mma_xvi16ger2 {mma}
 
   void __builtin_mma_xvi16ger2pp (v512 *, vuc, vuc);
-    XVI16GER2PP mma_xvi16ger2pp {mma,quad}
+    XVI16GER2PP nothing {mma,quad}
+
+  void __builtin_mma_xvi16ger2pp_internal (v512 *, vuc, vuc);
+    XVI16GER2PP_INTERNAL mma_xvi16ger2pp {mma,quad}
 
   void __builtin_mma_xvi16ger2s (v512 *, vuc, vuc);
-    XVI16GER2S mma_xvi16ger2s {mma}
+    XVI16GER2S nothing {mma}
+
+  void __builtin_mma_xvi16ger2s_internal (v512 *, vuc, vuc);
+    XVI16GER2S_INTERNAL mma_xvi16ger2s {mma}
 
   void __builtin_mma_xvi16ger2spp (v512 *, vuc, vuc);
-    XVI16GER2SPP mma_xvi16ger2spp {mma,quad}
+    XVI16GER2SPP nothing {mma,quad}
+
+  void __builtin_mma_xvi16ger2spp_internal (v512 *, vuc, vuc);
+    XVI16GER2SPP_INTERNAL mma_xvi16ger2spp {mma,quad}
 
   void __builtin_mma_xvi4ger8 (v512 *, vuc, vuc);
-    XVI4GER8 mma_xvi4ger8 {mma}
+    XVI4GER8 nothing {mma}
+
+  void __builtin_mma_xvi4ger8_internal (v512 *, vuc, vuc);
+    XVI4GER8_INTERNAL mma_xvi4ger8 {mma}
 
   void __builtin_mma_xvi4ger8pp (v512 *, vuc, vuc);
-    XVI4GER8PP mma_xvi4ger8pp {mma,quad}
+    XVI4GER8PP nothing {mma,quad}
+
+  void __builtin_mma_xvi4ger8pp_internal (v512 *, vuc, vuc);
+    XVI4GER8PP_INTERNAL mma_xvi4ger8pp {mma,quad}
 
   void __builtin_mma_xvi8ger4 (v512 *, vuc, vuc);
-    XVI8GER4 mma_xvi8ger4 {mma}
+    XVI8GER4 nothing {mma}
+
+  void __builtin_mma_xvi8ger4_internal (v512 *, vuc, vuc);
+    XVI8GER4_INTERNAL mma_xvi8ger4 {mma}
 
   void __builtin_mma_xvi8ger4pp (v512 *, vuc, vuc);
-    XVI8GER4PP mma_xvi8ger4pp {mma,quad}
+    XVI8GER4PP nothing {mma,quad}
+
+  void __builtin_mma_xvi8ger4pp_internal (v512 *, vuc, vuc);
+    XVI8GER4PP_INTERNAL mma_xvi8ger4pp {mma,quad}
 
   void __builtin_mma_xvi8ger4spp (v512 *, vuc, vuc);
-    XVI8GER4SPP mma_xvi8ger4spp {mma,quad}
+    XVI8GER4SPP nothing {mma,quad}
+
+  void __builtin_mma_xvi8ger4spp_internal (v512 *, vuc, vuc);
+    XVI8GER4SPP_INTERNAL mma_xvi8ger4spp {mma,quad}
 
   void __builtin_mma_xxmfacc (v512 *);
-    XXMFACC mma_xxmfacc {mma,quad}
+    XXMFACC nothing {mma,quad}
+
+  void __builtin_mma_xxmfacc_internal (v512 *);
+    XXMFACC_INTERNAL mma_xxmfacc {mma,quad}
 
   void __builtin_mma_xxmtacc (v512 *);
-    XXMTACC mma_xxmtacc {mma,quad}
+    XXMTACC nothing {mma,quad}
+
+  void __builtin_mma_xxmtacc_internal (v512 *);
+    XXMTACC_INTERNAL mma_xxmtacc {mma,quad}
 
   void __builtin_mma_xxsetaccz (v512 *);
-    XXSETACCZ mma_xxsetaccz {mma}
+    XXSETACCZ nothing {mma}
+
+  void __builtin_mma_xxsetaccz_internal (v512 *);
+    XXSETACCZ_INTERNAL mma_xxsetaccz {mma}
 
 
 
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index c8ef4dc5229..b8da23c1f0a 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -14602,7 +14602,7 @@ altivec_init_builtins (void)
     {
       def_builtin ("__builtin_altivec_stxvl", void_ftype_v16qi_pvoid_long,
 		   P9V_BUILTIN_STXVL);
-      def_builtin ("__builtin_xst_len_r", void_ftype_v16qi_pvoid_long,
+      def_builtin ("__builtin_altivec_xst_len_r", void_ftype_v16qi_pvoid_long,
 		   P9V_BUILTIN_XST_LEN_R);
     }


^ permalink raw reply	[flat|nested] 19+ messages in thread

* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins
@ 2020-08-30 15:28 William Schmidt
  0 siblings, 0 replies; 19+ messages in thread
From: William Schmidt @ 2020-08-30 15:28 UTC (permalink / raw)
  To: gcc-cvs

https://gcc.gnu.org/g:8dd43b49d94e0482de1c716ae9ee9dcb005fc198

commit 8dd43b49d94e0482de1c716ae9ee9dcb005fc198
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date:   Sun Aug 30 10:28:28 2020 -0500

    rs6000: Fix more inconsistencies between old and new builtins
    
    2020-08-30  Bill Schmidt  <wschmidt@linux.ibm.com>
    
    gcc/
            * config/rs6000/rs6000-builtin-new.def: Numerous repairs.

Diff:
---
 gcc/config/rs6000/rs6000-builtin-new.def | 127 ++++++++++++++++++-------------
 1 file changed, 72 insertions(+), 55 deletions(-)

diff --git a/gcc/config/rs6000/rs6000-builtin-new.def b/gcc/config/rs6000/rs6000-builtin-new.def
index c0f8821be64..565c14f9f4c 100644
--- a/gcc/config/rs6000/rs6000-builtin-new.def
+++ b/gcc/config/rs6000/rs6000-builtin-new.def
@@ -321,9 +321,7 @@
   pure vuc __builtin_altivec_lvsr (signed long long, void *);
     LVSR altivec_lvsr {ldvec}
 
-; Following LVX one is redundant, and I don't think we need to
-; keep it.  It only maps to LVX_V4SI.  Probably remove.
-  pure vop __builtin_altivec_lvx (signed long long, void *);
+  pure vsi __builtin_altivec_lvx (signed long long, void *);
     LVX altivec_lvx_v4si {ldvec}
 
   pure vsc __builtin_altivec_lvx_v16qi (signed long long, void *);
@@ -395,7 +393,8 @@
   void __builtin_altivec_stvrxl (vop, signed long long, void *);
     STVRXL altivec_stvrxl {stvec}
 
-; Skipping the STVX one that maps to STVX_V4SI (see above for LVX)
+  void __builtin_altivec_stvx (vsi, signed long long, void *);
+    STVX altivec_stvx_v4si {stvec}
 
   void __builtin_altivec_stvx_v16qi (vsc, signed long long, void *);
     STVX_V16QI altivec_stvx_v16qi {stvec}
@@ -409,7 +408,8 @@
   void __builtin_altivec_stvx_v8hi (vss, signed long long, void *);
     STVX_V8HI altivec_stvx_v8hi {stvec}
 
-; Skipping the STVXL one that maps to STVXL_V4SI (see above for LVX)
+  void __builtin_altivec_stvxl (vsi, signed long long, void *);
+    STVXL altivec_stvxl_v4si {stvec}
 
   void __builtin_altivec_stvxl_v16qi (vsc, signed long long, void *);
     STVXL_V16QI altivec_stvxl_v16qi {stvec}
@@ -1206,6 +1206,33 @@
   const vull __builtin_altivec_vxor_v2di_uns (vull, vull);
     VXOR_V2DI_UNS xorv2di3 {}
 
+  const signed __int128 __builtin_vec_ext_v1ti (vsq, signed int);
+    VEC_EXT_V1TI nothing {extract}
+
+  const double __builtin_vec_ext_v2df (vd, signed int);
+    VEC_EXT_V2DF nothing {extract}
+
+  const signed long long __builtin_vec_ext_v2di (vsll, signed int);
+    VEC_EXT_V2DI nothing {extract}
+
+  const vsq __builtin_vec_init_v1ti (signed __int128);
+    VEC_INIT_V1TI nothing {init}
+
+  const vd __builtin_vec_init_v2df (double, double);
+    VEC_INIT_V2DF nothing {init}
+
+  const vsll __builtin_vec_init_v2di (signed long long, signed long long);
+    VEC_INIT_V2DI nothing {init}
+
+  const vsq __builtin_vec_set_v1ti (vsq, signed __int128, const int<0,0>);
+    VEC_SET_V1TI nothing {set}
+
+  const vd __builtin_vec_set_v2df (vd, double, const int<1>);
+    VEC_SET_V2DF nothing {set}
+
+  const vsll __builtin_vec_set_v2di (vsll, signed long long, const int<1>);
+    VEC_SET_V2DI nothing {set}
+
   const vbc __builtin_vsx_cmpge_16qi (vsc, vsc);
     CMPGE_16QI vector_nltv16qi {}
 
@@ -1348,7 +1375,7 @@
   pure vsll __builtin_vsx_lxvd2x_v2di (signed long long, void *);
     LXVD2X_V2DI vsx_load_v2di {ldvec}
 
-  pure vsc __builtin_vsx_lxvw4x_16qi (signed long long, void *);
+  pure vsc __builtin_vsx_lxvw4x_v16qi (signed long long, void *);
     LXVW4X_V16QI vsx_load_v16qi {ldvec}
 
   pure vf __builtin_vsx_lxvw4x_v4sf (signed long long, void *);
@@ -1467,33 +1494,6 @@
   const vf __builtin_vsx_uns_floato_v2di (vull);
     UNS_FLOATO_V2DI unsfloatov2di {}
 
-  const signed __int128 __builtin_vsx_vec_ext_v1ti (vsq, signed int);
-    VEC_EXT_V1TI nothing {extract}
-
-  const double __builtin_vsx_vec_ext_v2df (vd, signed int);
-    VEC_EXT_V2DF nothing {extract}
-
-  const signed long long __builtin_vsx_vec_ext_v2di (vsll, signed int);
-    VEC_EXT_V2DI nothing {extract}
-
-  const vsq __builtin_vsx_vec_init_v1ti (signed __int128);
-    VEC_INIT_V1TI nothing {init}
-
-  const vd __builtin_vsx_vec_init_v2df (double, double);
-    VEC_INIT_V2DF nothing {init}
-
-  const vsll __builtin_vsx_vec_init_v2di (signed long long, signed long long);
-    VEC_INIT_V2DI nothing {init}
-
-  const vsq __builtin_vsx_vec_set_v1ti (vsq, signed __int128, const int<0,0>);
-    VEC_SET_V1TI nothing {set}
-
-  const vd __builtin_vsx_vec_set_v2df (vd, double, const int<1>);
-    VEC_SET_V2DF nothing {set}
-
-  const vsll __builtin_vsx_vec_set_v2di (vsll, signed long long, const int<1>);
-    VEC_SET_V2DI nothing {set}
-
   const vsll __builtin_vsx_vsigned_v2df (vd);
     VEC_VSIGNED_V2DF vsx_xvcvdpsxds {}
 
@@ -2753,24 +2753,18 @@
 
 ; Builtins requiring hardware support for IEEE-128 floating-point.
 [ieee128-hw]
-  fpmath _Float128 __builtin_vsx_addf128_round_to_odd (_Float128, _Float128);
+  fpmath _Float128 __builtin_addf128_round_to_odd (_Float128, _Float128);
     ADDF128_ODD addkf3_odd {}
 
-  fpmath _Float128 __builtin_vsx_divf128_round_to_odd (_Float128, _Float128);
+  fpmath _Float128 __builtin_divf128_round_to_odd (_Float128, _Float128);
     DIVF128_ODD divkf3_odd {}
 
-  fpmath _Float128 __builtin_vsx_fmaf128_round_to_odd (_Float128, _Float128, _Float128);
+  fpmath _Float128 __builtin_fmaf128_round_to_odd (_Float128, _Float128, _Float128);
     FMAF128_ODD fmakf4_odd {}
 
-  fpmath _Float128 __builtin_vsx_mulf128_round_to_odd (_Float128, _Float128);
+  fpmath _Float128 __builtin_mulf128_round_to_odd (_Float128, _Float128);
     MULF128_ODD mulkf3_odd {}
 
-  const unsigned long long __builtin_vsx_scalar_extract_expq (_Float128);
-    VSEEQP xsxexpqp_kf {}
-
-  const unsigned __int128 __builtin_vsx_scalar_extract_sigq (_Float128);
-    VSESQP xsxsigqp_kf {}
-
   const signed int __builtin_vsx_scalar_cmp_exp_qp_eq (_Float128, _Float128);
     VSCEQPEQ xscmpexpqp_eq_kf {}
 
@@ -2783,6 +2777,21 @@
   const signed int __builtin_vsx_scalar_cmp_exp_qp_unordered (_Float128, _Float128);
     VSCEQPUO xscmpexpqp_unordered_kf {}
 
+  fpmath _Float128 __builtin_sqrtf128_round_to_odd (_Float128);
+    SQRTF128_ODD sqrtkf2_odd {}
+
+  fpmath _Float128 __builtin_subf128_round_to_odd (_Float128, _Float128);
+    SUBF128_ODD subkf3_odd {}
+
+  fpmath double __builtin_truncf128_round_to_odd (_Float128);
+    TRUNCF128_ODD trunckfdf2_odd {}
+
+  const unsigned long long __builtin_vsx_scalar_extract_expq (_Float128);
+    VSEEQP xsxexpqp_kf {}
+
+  const unsigned __int128 __builtin_vsx_scalar_extract_sigq (_Float128);
+    VSESQP xsxsigqp_kf {}
+
   const _Float128 __builtin_vsx_scalar_insert_exp_q (unsigned __int128, unsigned long long);
     VSIEQP xsiexpqp_kf {}
 
@@ -2795,15 +2804,6 @@
   const unsigned int __builtin_vsx_scalar_test_neg_qp (_Float128);
     VSTDCNQP xststdcnegqp_kf {}
 
-  fpmath _Float128 __builtin_vsx_sqrtf128_round_to_odd (_Float128);
-    SQRTF128_ODD sqrtkf2_odd {}
-
-  fpmath _Float128 __builtin_vsx_subf128_round_to_odd (_Float128, _Float128);
-    SUBF128_ODD subkf3_odd {}
-
-  fpmath double __builtin_vsx_truncf128_round_to_odd (_Float128);
-    TRUNCF128_ODD trunckfdf2_odd {}
-
 
 
 ; Decimal floating-point builtins.
@@ -3132,12 +3132,26 @@
     PEXTD pextd {}
 
 
+; TODO: Land-mine alert.
+;
+; The original built-in support has code that assumes the internal
+; copy of an MMA built-in function appears immediately after the
+; external copy in the built-in table.  This is fragile.  For the
+; new support, we should transition this to do a name lookup in
+; the built-in hash table, but to start with we will honor the
+; positioning of the built-ins in the table.
 [mma]
   void __builtin_mma_assemble_acc (v512 *, vuc, vuc, vuc, vuc);
-    ASSEMBLE_ACC mma_assemble_acc {mma}
+    ASSEMBLE_ACC nothing {mma}
+
+  void __builtin_mma_assemble_acc_internal (v512 *, vuc, vuc, vuc, vuc);
+    ASSEMBLE_ACC_INTERNAL mma_assemble_acc {mma}
 
   void __builtin_mma_assemble_pair (v256 *, vuc, vuc);
-    ASSEMBLE_PAIR mma_assemble_pair {mma}
+    ASSEMBLE_PAIR nothing {mma}
+
+  void __builtin_mma_assemble_pair_internal (v256 *, vuc, vuc);
+    ASSEMBLE_PAIR_INTERNAL mma_assemble_pair {mma}
 
   void __builtin_mma_disassemble_acc (void *, v512 *);
     DISASSEMBLE_ACC nothing {mma,quad}
@@ -3146,7 +3160,10 @@
     DISASSEMBLE_PAIR nothing {mma,pair}
 
   void __builtin_mma_pmxvbf16ger2 (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
-    PMXVBF16GER2 mma_pmxvbf16ger2 {mma}
+    PMXVBF16GER2 nothing {mma}
+
+  void __builtin_mma_pmxvbf16ger2_internal (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
+    PMXVBF16GER2_INTERNAL mma_pmxvbf16ger2 {mma}
 
   void __builtin_mma_pmxvbf16ger2nn (v512 *, vuc, vuc, const int<4>, const int<4>, const int<2>);
     PMXVBF16GER2NN mma_pmxvbf16ger2nn {mma,quad}


^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2020-10-29 19:53 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-09-16 21:32 [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Fix more inconsistencies between old and new builtins William Schmidt
  -- strict thread matches above, loose matches on Subject: below --
2020-10-29 19:53 William Schmidt
2020-10-29 19:53 William Schmidt
2020-10-29 19:53 William Schmidt
2020-10-29 19:53 William Schmidt
2020-10-27 16:30 William Schmidt
2020-10-27 16:30 William Schmidt
2020-10-27 16:30 William Schmidt
2020-10-27 16:30 William Schmidt
2020-09-16 21:32 William Schmidt
2020-09-16 21:32 William Schmidt
2020-09-16 21:31 William Schmidt
2020-09-15 16:54 William Schmidt
2020-09-14 14:00 William Schmidt
2020-09-14 14:00 William Schmidt
2020-09-14 14:00 William Schmidt
2020-09-13 23:21 William Schmidt
2020-09-13 16:07 William Schmidt
2020-08-30 15:28 William Schmidt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).