public inbox for gcc-cvs@sourceware.org
help / color / mirror / Atom feed
* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Support two builtin expansion algorithms
@ 2020-07-17 17:24 William Schmidt
0 siblings, 0 replies; 10+ messages in thread
From: William Schmidt @ 2020-07-17 17:24 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:d9f7f25c345d077f3e906ad347aeea746e88b951
commit d9f7f25c345d077f3e906ad347aeea746e88b951
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date: Wed Jun 17 12:10:56 2020 -0500
rs6000: Support two builtin expansion algorithms
2020-06-17 Bill Schmidt <wschmidt@linux.ibm.com>
* config/rs6000/rs6000-call.c (rs6000_expand_builtin): Support two
kinds of masks and icodes; use a new algorithm under control of
new_builtins_are_live, while still supporting the old one.
Diff:
---
gcc/config/rs6000/rs6000-call.c | 644 ++++++++++++++++++++++++++++++----------
1 file changed, 481 insertions(+), 163 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 6463bfc77c6..a7320712ad3 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9736,7 +9736,6 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
/* Expand the MMA built-in in EXP.
Store true in *EXPANDEDP if we found a built-in to expand. */
-
static rtx
mma_expand_builtin (tree exp, rtx target, bool *expandedp)
{
@@ -9861,6 +9860,110 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
return target;
}
+/* Expand the MMA built-in in EXP. */
+static rtx
+new_mma_expand_builtin (tree exp, rtx target, rs6000_builtins fcode,
+ insn_code icode)
+{
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ tree arg;
+ call_expr_arg_iterator iter;
+ const struct insn_operand_data *insn_op;
+ rtx op[MAX_MMA_OPERANDS];
+ unsigned nopnds = 0;
+ /* ### This attr/void_func business needs replacing. */
+ unsigned attr = rs6000_builtin_info[fcode].attr;
+ bool void_func = (attr & RS6000_BTC_VOID);
+ machine_mode tmode = VOIDmode;
+
+ if (TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node)
+ {
+ tmode = insn_data[icode].operand[0].mode;
+ if (!target
+ || GET_MODE (target) != tmode
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ op[nopnds++] = target;
+ }
+ else
+ target = const0_rtx;
+
+ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+ {
+ if (arg == error_mark_node)
+ return const0_rtx;
+
+ rtx opnd;
+ insn_op = &insn_data[icode].operand[nopnds];
+ if (TREE_CODE (arg) == ADDR_EXPR
+ && MEM_P (DECL_RTL (TREE_OPERAND (arg, 0))))
+ opnd = DECL_RTL (TREE_OPERAND (arg, 0));
+ else
+ opnd = expand_normal (arg);
+
+ if (!(*insn_op->predicate) (opnd, insn_op->mode))
+ {
+ if (!strcmp (insn_op->constraint, "n"))
+ {
+ if (!CONST_INT_P (opnd))
+ error ("argument %d must be an unsigned literal", nopnds);
+ else
+ error ("argument %d is an unsigned literal that is "
+ "out of range", nopnds);
+ return const0_rtx;
+ }
+ opnd = copy_to_mode_reg (insn_op->mode, opnd);
+ }
+
+ /* Some MMA instructions have INOUT accumulator operands, so force
+ their target register to be the same as their input register. */
+ /* ### The void_func test needs to happen some other way,
+ probably with TREE_TYPE (TREE_TYPE (fndecl)) as earlier. */
+ if (!void_func
+ && nopnds == 1
+ && !strcmp (insn_op->constraint, "0")
+ && insn_op->mode == tmode
+ && REG_P (opnd)
+ && (*insn_data[icode].operand[0].predicate) (opnd, tmode))
+ target = op[0] = opnd;
+
+ op[nopnds++] = opnd;
+ }
+
+ rtx pat;
+ switch (nopnds)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
+ break;
+ case 6:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
+ break;
+ case 7:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5], op[6]);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (!pat)
+ return NULL_RTX;
+ emit_insn (pat);
+
+ return target;
+}
+
static rtx
stv_expand_builtin (insn_code icode, rtx *op,
machine_mode tmode, machine_mode smode)
@@ -12714,6 +12817,10 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ /* #### This needs to be rs6000_gen_builtins now. Can't make this
+ whole until the full set of builtins has been added, and there
+ is no overlap between the two enumerations, so we can run the
+ two in parallel. */
enum rs6000_builtins fcode
= (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
size_t uns_fcode = (size_t)fcode;
@@ -12721,9 +12828,13 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
size_t i;
rtx ret;
bool success;
- HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
+ HOST_WIDE_INT mask = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].mask
+ : rs6000_builtin_info[uns_fcode].mask);
bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
- enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
+ enum insn_code icode = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].icode
+ : rs6000_builtin_info[uns_fcode].icode);
/* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
floating point type, depending on whether long double is the IBM extended
@@ -12754,7 +12865,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
}
- if (TARGET_DEBUG_BUILTIN)
+ if (!new_builtins_are_live && TARGET_DEBUG_BUILTIN)
{
const char *name1 = rs6000_builtin_info[uns_fcode].name;
const char *name2 = (icode != CODE_FOR_nothing)
@@ -12792,207 +12903,414 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
return expand_call (exp, target, ignore);
}
- switch (fcode)
+ if (new_builtins_are_live)
{
- case RS6000_BUILTIN_RECIP:
- return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
-
- case RS6000_BUILTIN_RECIPF:
- return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
-
- case RS6000_BUILTIN_RSQRTF:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
+ bifdata *bifaddr = &rs6000_builtin_info_x[uns_fcode];
- case RS6000_BUILTIN_RSQRT:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
-
- case POWER7_BUILTIN_BPERMD:
- return rs6000_expand_binop_builtin (((TARGET_64BIT)
- ? CODE_FOR_bpermd_di
- : CODE_FOR_bpermd_si), exp, target);
-
- case RS6000_BUILTIN_GET_TB:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
- target);
-
- case RS6000_BUILTIN_MFTB:
- return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
- ? CODE_FOR_rs6000_mftb_di
- : CODE_FOR_rs6000_mftb_si),
- target);
-
- case RS6000_BUILTIN_MFFS:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
-
- case RS6000_BUILTIN_MTFSB0:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+ if (bif_is_nosoft (*bifaddr)
+ && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
+ {
+ error ("%<%s%> not supported with %<-msoft-float%>",
+ bifaddr->bifname);
+ return const0_rtx;
+ }
- case RS6000_BUILTIN_MTFSB1:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+ if (bif_is_no32bit (*bifaddr) && TARGET_32BIT)
+ fatal_error (input_location,
+ "%<%s%> is not supported in 32-bit mode",
+ bifaddr->bifname);
- case RS6000_BUILTIN_SET_FPSCR_RN:
- return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
- exp);
+ if (bif_is_cpu (*bifaddr))
+ return cpu_expand_builtin (fcode, exp, target);
- case RS6000_BUILTIN_SET_FPSCR_DRN:
- return
- rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
- exp);
+ if (bif_is_init (*bifaddr))
+ return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
- case RS6000_BUILTIN_MFFSL:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+ if (bif_is_set (*bifaddr))
+ return altivec_expand_vec_set_builtin (exp);
- case RS6000_BUILTIN_MTFSF:
- return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+ if (bif_is_extract (*bifaddr))
+ return altivec_expand_vec_ext_builtin (exp, target);
- case RS6000_BUILTIN_CPU_INIT:
- case RS6000_BUILTIN_CPU_IS:
- case RS6000_BUILTIN_CPU_SUPPORTS:
- return cpu_expand_builtin (fcode, exp, target);
+ if (bif_is_predicate (*bifaddr))
+ return altivec_expand_predicate_builtin (icode, exp, target);
- case MISC_BUILTIN_SPEC_BARRIER:
- {
- emit_insn (gen_speculation_barrier ());
- return NULL_RTX;
- }
+ if (bif_is_htm (*bifaddr))
+ return new_htm_expand_builtin (bifaddr, fcode, exp, target);
- case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
- case ALTIVEC_BUILTIN_MASK_FOR_STORE:
- {
- int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
- : (int) CODE_FOR_altivec_lvsl_direct);
- machine_mode tmode = insn_data[icode2].operand[0].mode;
- machine_mode mode = insn_data[icode2].operand[1].mode;
- tree arg;
- rtx op, addr, pat;
+ rtx pat;
+ const int MAX_BUILTIN_ARGS = 5;
+ tree arg[MAX_BUILTIN_ARGS];
+ rtx op[MAX_BUILTIN_ARGS];
+ machine_mode mode[MAX_BUILTIN_ARGS + 1];
- gcc_assert (TARGET_ALTIVEC);
+ int nargs = bifaddr->nargs;
+ gcc_assert (nargs <= MAX_BUILTIN_ARGS);
- arg = CALL_EXPR_ARG (exp, 0);
- gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
- op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
- addr = memory_address (mode, op);
- if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
- op = addr;
- else
- {
- /* For the load case need to negate the address. */
- op = gen_reg_rtx (GET_MODE (addr));
- emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
- }
- op = gen_rtx_MEM (mode, op);
+ mode[0] = insn_data[icode].operand[0].mode;
+ for (int i = 0; i < nargs; i++)
+ {
+ arg[i] = CALL_EXPR_ARG (exp, i);
+ if (arg[i] == error_mark_node)
+ return const0_rtx;
+ op[i] = expand_normal (arg[i]);
+ mode[i+1] = insn_data[icode].operand[i+1].mode;
+ }
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
+ /* Check for restricted constant arguments. */
+ for (int i = 0; i < 2; i++)
+ {
+ switch (bifaddr->restr[i])
+ {
+ default:
+ case RES_NONE:
+ break;
+ case RES_BITS:
+ {
+ size_t mask = (1 << bifaddr->restr_val1[i]) - 1;
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || TREE_INT_CST_LOW (restr_arg) & ~mask)
+ {
+ error ("argument %d must be a %d-bit unsigned literal",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a literal between %d and %d,"
+ " inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VAR_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) == INTEGER_CST
+ && !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a variable or a literal "
+ "between %d and %d, inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VALUES:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || (tree_to_shwi (restr_arg) != bifaddr->restr_val1[i]
+ && tree_to_shwi (restr_arg) != bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be either a literal %d or a "
+ "literal %d",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ }
+ }
- pat = GEN_FCN (icode2) (target, op);
- if (!pat)
- return 0;
- emit_insn (pat);
+ if (bif_is_ldstmask (*bifaddr))
+ return rs6000_expand_ldst_mask (target, fcode, arg[0]);
- return target;
- }
+ if (bif_is_stvec (*bifaddr))
+ {
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return stv_expand_builtin (icode, op, mode[0], mode[1]);
+ }
- case ALTIVEC_BUILTIN_VCFUX:
- case ALTIVEC_BUILTIN_VCFSX:
- case ALTIVEC_BUILTIN_VCTUXS:
- case ALTIVEC_BUILTIN_VCTSXS:
- /* FIXME: There's got to be a nicer way to handle this case than
- constructing a new CALL_EXPR. */
- if (call_expr_nargs (exp) == 1)
+ if (bif_is_ldvec (*bifaddr))
{
- exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
- 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return ldv_expand_builtin (target, icode, op, mode[0]);
}
- break;
- /* For the pack and unpack int128 routines, fix up the builtin so it
- uses the correct IBM128 type. */
- case MISC_BUILTIN_PACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ if (bif_is_mma (*bifaddr))
+ return new_mma_expand_builtin (exp, target, fcode, icode);
+
+ if (fcode == MISC_BUILTIN_PACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_packtf;
fcode = MISC_BUILTIN_PACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
-
- case MISC_BUILTIN_UNPACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ else if (fcode == MISC_BUILTIN_UNPACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_unpacktf;
fcode = MISC_BUILTIN_UNPACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
- default:
- break;
- }
+ if (target == 0
+ || GET_MODE (target) != mode[0]
+ || !(*insn_data[icode].operand[0].predicate) (target, mode[0]))
+ target = gen_reg_rtx (mode[0]);
- if (TARGET_MMA)
- {
- ret = mma_expand_builtin (exp, target, &success);
+ for (int i = 0; i < nargs; i++)
+ if (! (*insn_data[icode].operand[i+1].predicate) (op[i], mode[i+1]))
+ op[i] = copy_to_mode_reg (mode[i+1], op[i]);
- if (success)
- return ret;
- }
- if (TARGET_ALTIVEC)
- {
- ret = altivec_expand_builtin (exp, target, &success);
+ switch (nargs)
+ {
+ default:
+ gcc_assert (MAX_BUILTIN_ARGS == 5);
+ gcc_unreachable ();
+ case 0:
+ pat = GEN_FCN (icode) (target);
+ break;
+ case 1:
+ pat = GEN_FCN (icode) (target, op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
+ break;
+ }
- if (success)
- return ret;
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+>>>>>>> rs6000: Support two builtin expansion algorithms
+
+ return target;
}
- if (TARGET_HTM)
+ else
{
- ret = htm_expand_builtin (exp, target, &success);
+ switch (fcode)
+ {
+ case RS6000_BUILTIN_RECIP:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
- if (success)
- return ret;
- }
+ case RS6000_BUILTIN_RECIPF:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
- unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_OPND_MASK;
- /* RS6000_BTC_SPECIAL represents no-operand operators. */
- gcc_assert (attr == RS6000_BTC_UNARY
- || attr == RS6000_BTC_BINARY
- || attr == RS6000_BTC_TERNARY
- || attr == RS6000_BTC_QUATERNARY
- || attr == RS6000_BTC_SPECIAL);
-
- /* Handle simple unary operations. */
- d = bdesc_1arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_unop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRTF:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
- /* Handle simple binary operations. */
- d = bdesc_2arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_binop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRT:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
- /* Handle simple ternary operations. */
- d = bdesc_3arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_ternop_builtin (icode, exp, target);
+ case POWER7_BUILTIN_BPERMD:
+ return rs6000_expand_binop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_bpermd_di
+ : CODE_FOR_bpermd_si),
+ exp, target);
- /* Handle simple quaternary operations. */
- d = bdesc_4arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_quaternop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_GET_TB:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
+ target);
- /* Handle simple no-argument operations. */
- d = bdesc_0arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_zeroop_builtin (icode, target);
+ case RS6000_BUILTIN_MFTB:
+ return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_rs6000_mftb_di
+ : CODE_FOR_rs6000_mftb_si),
+ target);
- gcc_unreachable ();
+ case RS6000_BUILTIN_MFFS:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+
+ case RS6000_BUILTIN_MTFSB0:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+
+ case RS6000_BUILTIN_MTFSB1:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_RN:
+ return rs6000_expand_set_fpscr_rn_builtin
+ (CODE_FOR_rs6000_set_fpscr_rn, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_DRN:
+ return
+ rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
+ exp);
+
+ case RS6000_BUILTIN_MFFSL:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+
+ case RS6000_BUILTIN_MTFSF:
+ return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+
+ case RS6000_BUILTIN_CPU_INIT:
+ case RS6000_BUILTIN_CPU_IS:
+ case RS6000_BUILTIN_CPU_SUPPORTS:
+ return cpu_expand_builtin (fcode, exp, target);
+
+ case MISC_BUILTIN_SPEC_BARRIER:
+ {
+ emit_insn (gen_speculation_barrier ());
+ return NULL_RTX;
+ }
+
+ case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
+ case ALTIVEC_BUILTIN_MASK_FOR_STORE:
+ {
+ int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+ : (int) CODE_FOR_altivec_lvsl_direct);
+ machine_mode tmode = insn_data[icode2].operand[0].mode;
+ machine_mode mode = insn_data[icode2].operand[1].mode;
+ tree arg;
+ rtx op, addr, pat;
+
+ gcc_assert (TARGET_ALTIVEC);
+
+ arg = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
+ op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+ addr = memory_address (mode, op);
+ if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
+ op = addr;
+ else
+ {
+ /* For the load case need to negate the address. */
+ op = gen_reg_rtx (GET_MODE (addr));
+ emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr),
+ addr)));
+ }
+ op = gen_rtx_MEM (mode, op);
+
+ if (target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ pat = GEN_FCN (icode2) (target, op);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
+ }
+
+ case ALTIVEC_BUILTIN_VCFUX:
+ case ALTIVEC_BUILTIN_VCFSX:
+ case ALTIVEC_BUILTIN_VCTUXS:
+ case ALTIVEC_BUILTIN_VCTSXS:
+ /* #### Replace this nonsense with a separate built-in for the
+ vectorizer to use, which I believe is the only way we get
+ into this situation. */
+ /* FIXME: There's got to be a nicer way to handle this case than
+ constructing a new CALL_EXPR. */
+ if (call_expr_nargs (exp) == 1)
+ {
+ exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
+ 2, CALL_EXPR_ARG (exp, 0),
+ integer_zero_node);
+ }
+ break;
+
+ /* For the pack and unpack int128 routines, fix up the builtin so it
+ uses the correct IBM128 type. */
+ case MISC_BUILTIN_PACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_packtf;
+ fcode = MISC_BUILTIN_PACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ case MISC_BUILTIN_UNPACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_unpacktf;
+ fcode = MISC_BUILTIN_UNPACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (TARGET_ALTIVEC)
+ {
+ ret = altivec_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_HTM)
+ {
+ ret = htm_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+
+ unsigned attr = (rs6000_builtin_info[uns_fcode].attr
+ & RS6000_BTC_TYPE_MASK);
+ /* RS6000_BTC_SPECIAL represents no-operand operators. */
+ gcc_assert (attr == RS6000_BTC_UNARY
+ || attr == RS6000_BTC_BINARY
+ || attr == RS6000_BTC_TERNARY
+ || attr == RS6000_BTC_SPECIAL);
+
+ /* Handle simple unary operations. */
+ d = bdesc_1arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_unop_builtin (icode, exp, target);
+
+ /* Handle simple binary operations. */
+ d = bdesc_2arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_binop_builtin (icode, exp, target);
+
+ /* Handle simple ternary operations. */
+ d = bdesc_3arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_ternop_builtin (icode, exp, target);
+
+ /* Handle simple quaternary operations. */
+ d = bdesc_4arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_quaternop_builtin (icode, exp, target);
+
+ /* Handle simple no-argument operations. */
+ d = bdesc_0arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_zeroop_builtin (icode, target);
+
+ gcc_unreachable ();
+ }
}
/* Create a builtin vector type with a name. Taking care not to give
^ permalink raw reply [flat|nested] 10+ messages in thread
* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Support two builtin expansion algorithms
@ 2020-10-29 19:52 William Schmidt
0 siblings, 0 replies; 10+ messages in thread
From: William Schmidt @ 2020-10-29 19:52 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:254a8a388021d60af4360aa0340d0171d7ea60fa
commit 254a8a388021d60af4360aa0340d0171d7ea60fa
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date: Wed Jun 17 12:10:56 2020 -0500
rs6000: Support two builtin expansion algorithms
Note: For future rebasing, put new_mma_expand_builtin into the
previous patch.
2020-07-26 Bill Schmidt <wschmidt@linux.ibm.com>
* config/rs6000/rs6000-call.c (new_mma_expand_builtin): New.
(rs6000_expand_builtin): Support two kinds of masks and icodes;
use a new algorithm under control of new_builtins_are_live,
while still supporting the old one; do lazy enablement to
support #pragma target.
Diff:
---
gcc/config/rs6000/rs6000-call.c | 731 +++++++++++++++++++++++++++++++---------
1 file changed, 574 insertions(+), 157 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index c40421b15e7..fe37e45512c 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -10132,7 +10132,6 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
/* Expand the MMA built-in in EXP.
Store true in *EXPANDEDP if we found a built-in to expand. */
-
static rtx
mma_expand_builtin (tree exp, rtx target, bool *expandedp)
{
@@ -10257,6 +10256,110 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
return target;
}
+/* Expand the MMA built-in in EXP. */
+static rtx
+new_mma_expand_builtin (tree exp, rtx target, rs6000_builtins fcode,
+ insn_code icode)
+{
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ tree arg;
+ call_expr_arg_iterator iter;
+ const struct insn_operand_data *insn_op;
+ rtx op[MAX_MMA_OPERANDS];
+ unsigned nopnds = 0;
+ /* ### This attr/void_func business needs replacing. */
+ unsigned attr = rs6000_builtin_info[fcode].attr;
+ bool void_func = (attr & RS6000_BTC_VOID);
+ machine_mode tmode = VOIDmode;
+
+ if (TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node)
+ {
+ tmode = insn_data[icode].operand[0].mode;
+ if (!target
+ || GET_MODE (target) != tmode
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ op[nopnds++] = target;
+ }
+ else
+ target = const0_rtx;
+
+ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+ {
+ if (arg == error_mark_node)
+ return const0_rtx;
+
+ rtx opnd;
+ insn_op = &insn_data[icode].operand[nopnds];
+ if (TREE_CODE (arg) == ADDR_EXPR
+ && MEM_P (DECL_RTL (TREE_OPERAND (arg, 0))))
+ opnd = DECL_RTL (TREE_OPERAND (arg, 0));
+ else
+ opnd = expand_normal (arg);
+
+ if (!(*insn_op->predicate) (opnd, insn_op->mode))
+ {
+ if (!strcmp (insn_op->constraint, "n"))
+ {
+ if (!CONST_INT_P (opnd))
+ error ("argument %d must be an unsigned literal", nopnds);
+ else
+ error ("argument %d is an unsigned literal that is "
+ "out of range", nopnds);
+ return const0_rtx;
+ }
+ opnd = copy_to_mode_reg (insn_op->mode, opnd);
+ }
+
+ /* Some MMA instructions have INOUT accumulator operands, so force
+ their target register to be the same as their input register. */
+ /* ### The void_func test needs to happen some other way,
+ probably with TREE_TYPE (TREE_TYPE (fndecl)) as earlier. */
+ if (!void_func
+ && nopnds == 1
+ && !strcmp (insn_op->constraint, "0")
+ && insn_op->mode == tmode
+ && REG_P (opnd)
+ && (*insn_data[icode].operand[0].predicate) (opnd, tmode))
+ target = op[0] = opnd;
+
+ op[nopnds++] = opnd;
+ }
+
+ rtx pat;
+ switch (nopnds)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
+ break;
+ case 6:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
+ break;
+ case 7:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5], op[6]);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (!pat)
+ return NULL_RTX;
+ emit_insn (pat);
+
+ return target;
+}
+
static rtx
stv_expand_builtin (insn_code icode, rtx *op,
machine_mode tmode, machine_mode smode)
@@ -13201,6 +13304,10 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ /* #### This needs to be rs6000_gen_builtins now. Can't make this
+ whole until the full set of builtins has been added, and there
+ is no overlap between the two enumerations, so we can run the
+ two in parallel. */
enum rs6000_builtins fcode
= (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
size_t uns_fcode = (size_t)fcode;
@@ -13208,9 +13315,18 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
size_t i;
rtx ret;
bool success;
- HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
+ HOST_WIDE_INT mask = (new_builtins_are_live
+ ? 0
+ : rs6000_builtin_info[uns_fcode].mask);
+ /*
+ bif_enable enable = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].enable
+ : (bif_enable) 0);
+ */
bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
- enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
+ enum insn_code icode = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].icode
+ : rs6000_builtin_info[uns_fcode].icode);
/* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
floating point type, depending on whether long double is the IBM extended
@@ -13241,7 +13357,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
}
- if (TARGET_DEBUG_BUILTIN)
+ if (!new_builtins_are_live && TARGET_DEBUG_BUILTIN)
{
const char *name1 = rs6000_builtin_info[uns_fcode].name;
const char *name2 = (icode != CODE_FOR_nothing)
@@ -13279,201 +13395,499 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
return expand_call (exp, target, ignore);
}
- switch (fcode)
+ if (new_builtins_are_live)
{
- case RS6000_BUILTIN_RECIP:
- return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
-
- case RS6000_BUILTIN_RECIPF:
- return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
-
- case RS6000_BUILTIN_RSQRTF:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
-
- case RS6000_BUILTIN_RSQRT:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
-
- case POWER7_BUILTIN_BPERMD:
- return rs6000_expand_binop_builtin (((TARGET_64BIT)
- ? CODE_FOR_bpermd_di
- : CODE_FOR_bpermd_si), exp, target);
+ bifdata *bifaddr = &rs6000_builtin_info_x[uns_fcode];
- case RS6000_BUILTIN_GET_TB:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
- target);
-
- case RS6000_BUILTIN_MFTB:
- return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
- ? CODE_FOR_rs6000_mftb_di
- : CODE_FOR_rs6000_mftb_si),
- target);
-
- case RS6000_BUILTIN_MFFS:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+ /* In case of "#pragma target" changes, we initialize all builtins
+ but check for actual availability during expand time. */
+ switch (bifaddr->enable)
+ {
+ default:
+ gcc_unreachable ();
+ case ENB_ALWAYS:
+ break;
+ case ENB_P5:
+ if (!TARGET_POPCNTB)
+ return const0_rtx;
+ break;
+ case ENB_P6:
+ if (!TARGET_CMPB)
+ return const0_rtx;
+ break;
+ case ENB_ALTIVEC:
+ if (!TARGET_ALTIVEC)
+ return const0_rtx;
+ break;
+ case ENB_VSX:
+ if (!TARGET_VSX)
+ return const0_rtx;
+ break;
+ case ENB_P7:
+ if (!TARGET_POPCNTD)
+ return const0_rtx;
+ break;
+ case ENB_P7_64:
+ if (!TARGET_POPCNTD || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P8:
+ if (!TARGET_DIRECT_MOVE)
+ return const0_rtx;
+ break;
+ case ENB_P8V:
+ if (!TARGET_P8_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_P9:
+ if (!TARGET_MODULO)
+ return const0_rtx;
+ break;
+ case ENB_P9_64:
+ if (!TARGET_MODULO || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P9V:
+ if (!TARGET_P9_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_IEEE128_HW:
+ if (!TARGET_FLOAT128_HW)
+ return const0_rtx;
+ break;
+ case ENB_DFP:
+ if (!TARGET_DFP)
+ return const0_rtx;
+ break;
+ case ENB_CRYPTO:
+ if (!TARGET_CRYPTO)
+ return const0_rtx;
+ break;
+ case ENB_HTM:
+ if (!TARGET_HTM)
+ return const0_rtx;
+ break;
+ case ENB_P10:
+ if (!TARGET_POWER10)
+ return const0_rtx;
+ break;
+ case ENB_MMA:
+ if (!TARGET_MMA)
+ return const0_rtx;
+ break;
+ };
- case RS6000_BUILTIN_MTFSB0:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+ if (bif_is_nosoft (*bifaddr)
+ && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
+ {
+ error ("%<%s%> not supported with %<-msoft-float%>",
+ bifaddr->bifname);
+ return const0_rtx;
+ }
- case RS6000_BUILTIN_MTFSB1:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+ if (bif_is_no32bit (*bifaddr) && TARGET_32BIT)
+ fatal_error (input_location,
+ "%<%s%> is not supported in 32-bit mode",
+ bifaddr->bifname);
- case RS6000_BUILTIN_SET_FPSCR_RN:
- return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
- exp);
+ if (bif_is_cpu (*bifaddr))
+ return cpu_expand_builtin (fcode, exp, target);
- case RS6000_BUILTIN_SET_FPSCR_DRN:
- return
- rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
- exp);
+ if (bif_is_init (*bifaddr))
+ return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
- case RS6000_BUILTIN_MFFSL:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+ if (bif_is_set (*bifaddr))
+ return altivec_expand_vec_set_builtin (exp);
- case RS6000_BUILTIN_MTFSF:
- return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+ if (bif_is_extract (*bifaddr))
+ return altivec_expand_vec_ext_builtin (exp, target);
- case RS6000_BUILTIN_CPU_INIT:
- case RS6000_BUILTIN_CPU_IS:
- case RS6000_BUILTIN_CPU_SUPPORTS:
- return cpu_expand_builtin (fcode, exp, target);
+ if (bif_is_predicate (*bifaddr))
+ return altivec_expand_predicate_builtin (icode, exp, target);
- case MISC_BUILTIN_SPEC_BARRIER:
- {
- emit_insn (gen_speculation_barrier ());
- return NULL_RTX;
- }
+ if (bif_is_htm (*bifaddr))
+ return new_htm_expand_builtin (bifaddr, fcode, exp, target);
- case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
- {
- int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
- : (int) CODE_FOR_altivec_lvsl_direct);
- machine_mode tmode = insn_data[icode2].operand[0].mode;
- machine_mode mode = insn_data[icode2].operand[1].mode;
- tree arg;
- rtx op, addr, pat;
+ rtx pat;
+ const int MAX_BUILTIN_ARGS = 5;
+ tree arg[MAX_BUILTIN_ARGS];
+ rtx op[MAX_BUILTIN_ARGS];
+ machine_mode mode[MAX_BUILTIN_ARGS + 1];
- gcc_assert (TARGET_ALTIVEC);
+ int nargs = bifaddr->nargs;
+ gcc_assert (nargs <= MAX_BUILTIN_ARGS);
- arg = CALL_EXPR_ARG (exp, 0);
- gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
- op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
- addr = memory_address (mode, op);
- /* We need to negate the address. */
- op = gen_reg_rtx (GET_MODE (addr));
- emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
- op = gen_rtx_MEM (mode, op);
+ mode[0] = insn_data[icode].operand[0].mode;
+ for (int i = 0; i < nargs; i++)
+ {
+ arg[i] = CALL_EXPR_ARG (exp, i);
+ if (arg[i] == error_mark_node)
+ return const0_rtx;
+ op[i] = expand_normal (arg[i]);
+ mode[i+1] = insn_data[icode].operand[i+1].mode;
+ }
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
+ /* Check for restricted constant arguments. */
+ for (int i = 0; i < 2; i++)
+ {
+ switch (bifaddr->restr[i])
+ {
+ default:
+ case RES_NONE:
+ break;
+ case RES_BITS:
+ {
+ size_t mask = (1 << bifaddr->restr_val1[i]) - 1;
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || TREE_INT_CST_LOW (restr_arg) & ~mask)
+ {
+ error ("argument %d must be a %d-bit unsigned literal",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a literal between %d and %d,"
+ " inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VAR_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) == INTEGER_CST
+ && !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a variable or a literal "
+ "between %d and %d, inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VALUES:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || (tree_to_shwi (restr_arg) != bifaddr->restr_val1[i]
+ && tree_to_shwi (restr_arg) != bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be either a literal %d or a "
+ "literal %d",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ }
+ }
- pat = GEN_FCN (icode2) (target, op);
- if (!pat)
- return 0;
- emit_insn (pat);
+ if (bif_is_ldstmask (*bifaddr))
+ return rs6000_expand_ldst_mask (target, fcode, arg[0]);
- return target;
- }
+ if (bif_is_stvec (*bifaddr))
+ {
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return stv_expand_builtin (icode, op, mode[0], mode[1]);
+ }
- case ALTIVEC_BUILTIN_VCFUX:
- case ALTIVEC_BUILTIN_VCFSX:
- case ALTIVEC_BUILTIN_VCTUXS:
- case ALTIVEC_BUILTIN_VCTSXS:
- /* FIXME: There's got to be a nicer way to handle this case than
- constructing a new CALL_EXPR. */
- if (call_expr_nargs (exp) == 1)
+ if (bif_is_ldvec (*bifaddr))
{
- exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
- 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return ldv_expand_builtin (target, icode, op, mode[0]);
}
- break;
- /* For the pack and unpack int128 routines, fix up the builtin so it
- uses the correct IBM128 type. */
- case MISC_BUILTIN_PACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ if (bif_is_mma (*bifaddr))
+ return new_mma_expand_builtin (exp, target, fcode, icode);
+
+ if (fcode == MISC_BUILTIN_PACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_packtf;
fcode = MISC_BUILTIN_PACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
-
- case MISC_BUILTIN_UNPACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ else if (fcode == MISC_BUILTIN_UNPACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_unpacktf;
fcode = MISC_BUILTIN_UNPACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
- default:
- break;
- }
+ if (target == 0
+ || GET_MODE (target) != mode[0]
+ || !(*insn_data[icode].operand[0].predicate) (target, mode[0]))
+ target = gen_reg_rtx (mode[0]);
- if (TARGET_MMA)
- {
- ret = mma_expand_builtin (exp, target, &success);
+ for (int i = 0; i < nargs; i++)
+ if (! (*insn_data[icode].operand[i+1].predicate) (op[i], mode[i+1]))
+ op[i] = copy_to_mode_reg (mode[i+1], op[i]);
- if (success)
- return ret;
- }
- if (TARGET_ALTIVEC)
- {
- ret = altivec_expand_builtin (exp, target, &success);
+ switch (nargs)
+ {
+ default:
+ gcc_assert (MAX_BUILTIN_ARGS == 5);
+ gcc_unreachable ();
+ case 0:
+ pat = GEN_FCN (icode) (target);
+ break;
+ case 1:
+ pat = GEN_FCN (icode) (target, op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
+ break;
+ }
- if (success)
- return ret;
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
}
- if (TARGET_HTM)
+ else
{
- ret = htm_expand_builtin (exp, target, &success);
+ switch (fcode)
+ {
+ case RS6000_BUILTIN_RECIP:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
- if (success)
- return ret;
- }
+ case RS6000_BUILTIN_RECIPF:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
- unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_OPND_MASK;
- /* RS6000_BTC_SPECIAL represents no-operand operators. */
- gcc_assert (attr == RS6000_BTC_UNARY
- || attr == RS6000_BTC_BINARY
- || attr == RS6000_BTC_TERNARY
- || attr == RS6000_BTC_QUATERNARY
- || attr == RS6000_BTC_SPECIAL);
-
- /* Handle simple unary operations. */
- d = bdesc_1arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_unop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRTF:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
- /* Handle simple binary operations. */
- d = bdesc_2arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_binop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRT:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
- /* Handle simple ternary operations. */
- d = bdesc_3arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_ternop_builtin (icode, exp, target);
+ case POWER7_BUILTIN_BPERMD:
+ return rs6000_expand_binop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_bpermd_di
+ : CODE_FOR_bpermd_si),
+ exp, target);
- /* Handle simple quaternary operations. */
- d = bdesc_4arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_quaternop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_GET_TB:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
+ target);
- /* Handle simple no-argument operations. */
- d = bdesc_0arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_zeroop_builtin (icode, target);
+ case RS6000_BUILTIN_MFTB:
+ return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_rs6000_mftb_di
+ : CODE_FOR_rs6000_mftb_si),
+ target);
- gcc_unreachable ();
+ case RS6000_BUILTIN_MFFS:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+
+ case RS6000_BUILTIN_MTFSB0:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+
+ case RS6000_BUILTIN_MTFSB1:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_RN:
+ return rs6000_expand_set_fpscr_rn_builtin
+ (CODE_FOR_rs6000_set_fpscr_rn, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_DRN:
+ return
+ rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
+ exp);
+
+ case RS6000_BUILTIN_MFFSL:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+
+ case RS6000_BUILTIN_MTFSF:
+ return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+
+ case RS6000_BUILTIN_CPU_INIT:
+ case RS6000_BUILTIN_CPU_IS:
+ case RS6000_BUILTIN_CPU_SUPPORTS:
+ return cpu_expand_builtin (fcode, exp, target);
+
+ case MISC_BUILTIN_SPEC_BARRIER:
+ {
+ emit_insn (gen_speculation_barrier ());
+ return NULL_RTX;
+ }
+
+ case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
+ case ALTIVEC_BUILTIN_MASK_FOR_STORE:
+ {
+ int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+ : (int) CODE_FOR_altivec_lvsl_direct);
+ machine_mode tmode = insn_data[icode2].operand[0].mode;
+ machine_mode mode = insn_data[icode2].operand[1].mode;
+ tree arg;
+ rtx op, addr, pat;
+
+ gcc_assert (TARGET_ALTIVEC);
+
+ arg = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
+ op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+ addr = memory_address (mode, op);
+ if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
+ op = addr;
+ else
+ {
+ /* For the load case need to negate the address. */
+ op = gen_reg_rtx (GET_MODE (addr));
+ emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr),
+ addr)));
+ }
+ op = gen_rtx_MEM (mode, op);
+
+ if (target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ pat = GEN_FCN (icode2) (target, op);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
+ }
+
+ case ALTIVEC_BUILTIN_VCFUX:
+ case ALTIVEC_BUILTIN_VCFSX:
+ case ALTIVEC_BUILTIN_VCTUXS:
+ case ALTIVEC_BUILTIN_VCTSXS:
+ /* #### Replace this nonsense with a separate built-in for the
+ vectorizer to use, which I believe is the only way we get
+ into this situation. */
+ /* FIXME: There's got to be a nicer way to handle this case than
+ constructing a new CALL_EXPR. */
+ if (call_expr_nargs (exp) == 1)
+ {
+ exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
+ 2, CALL_EXPR_ARG (exp, 0),
+ integer_zero_node);
+ }
+ break;
+
+ /* For the pack and unpack int128 routines, fix up the builtin so it
+ uses the correct IBM128 type. */
+ case MISC_BUILTIN_PACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_packtf;
+ fcode = MISC_BUILTIN_PACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ case MISC_BUILTIN_UNPACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_unpacktf;
+ fcode = MISC_BUILTIN_UNPACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (TARGET_MMA)
+ {
+ ret = mma_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_ALTIVEC)
+ {
+ ret = altivec_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_HTM)
+ {
+ ret = htm_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+
+ unsigned attr = (rs6000_builtin_info[uns_fcode].attr
+ & RS6000_BTC_TYPE_MASK);
+ /* RS6000_BTC_SPECIAL represents no-operand operators. */
+ gcc_assert (attr == RS6000_BTC_UNARY
+ || attr == RS6000_BTC_BINARY
+ || attr == RS6000_BTC_TERNARY
+ || attr == RS6000_BTC_QUATERNARY
+ || attr == RS6000_BTC_SPECIAL);
+
+ /* Handle simple unary operations. */
+ d = bdesc_1arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_unop_builtin (icode, exp, target);
+
+ /* Handle simple binary operations. */
+ d = bdesc_2arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_binop_builtin (icode, exp, target);
+
+ /* Handle simple ternary operations. */
+ d = bdesc_3arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_ternop_builtin (icode, exp, target);
+
+ /* Handle simple quaternary operations. */
+ d = bdesc_4arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_quaternop_builtin (icode, exp, target);
+
+ /* Handle simple no-argument operations. */
+ d = bdesc_0arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_zeroop_builtin (icode, target);
+
+ gcc_unreachable ();
+ }
}
/* Create a builtin vector type with a name. Taking care not to give
@@ -13702,7 +14116,10 @@ rs6000_init_builtins (void)
general purpose extensions (970 and newer) to allow the use of
the target attribute. */
if (TARGET_EXTRA_BUILTINS)
- altivec_init_builtins ();
+ {
+ altivec_init_builtins ();
+ mma_init_builtins ();
+ }
if (TARGET_HTM)
htm_init_builtins ();
^ permalink raw reply [flat|nested] 10+ messages in thread
* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Support two builtin expansion algorithms
@ 2020-10-27 16:29 William Schmidt
0 siblings, 0 replies; 10+ messages in thread
From: William Schmidt @ 2020-10-27 16:29 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:2dadf9a664fc6aabe359da51cd98846ab55468c6
commit 2dadf9a664fc6aabe359da51cd98846ab55468c6
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date: Wed Jun 17 12:10:56 2020 -0500
rs6000: Support two builtin expansion algorithms
Note: For future rebasing, put new_mma_expand_builtin into the
previous patch.
2020-07-26 Bill Schmidt <wschmidt@linux.ibm.com>
* config/rs6000/rs6000-call.c (new_mma_expand_builtin): New.
(rs6000_expand_builtin): Support two kinds of masks and icodes;
use a new algorithm under control of new_builtins_are_live,
while still supporting the old one; do lazy enablement to
support #pragma target.
Diff:
---
gcc/config/rs6000/rs6000-call.c | 731 +++++++++++++++++++++++++++++++---------
1 file changed, 574 insertions(+), 157 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index c40421b15e7..fe37e45512c 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -10132,7 +10132,6 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
/* Expand the MMA built-in in EXP.
Store true in *EXPANDEDP if we found a built-in to expand. */
-
static rtx
mma_expand_builtin (tree exp, rtx target, bool *expandedp)
{
@@ -10257,6 +10256,110 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
return target;
}
+/* Expand the MMA built-in in EXP. */
+static rtx
+new_mma_expand_builtin (tree exp, rtx target, rs6000_builtins fcode,
+ insn_code icode)
+{
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ tree arg;
+ call_expr_arg_iterator iter;
+ const struct insn_operand_data *insn_op;
+ rtx op[MAX_MMA_OPERANDS];
+ unsigned nopnds = 0;
+ /* ### This attr/void_func business needs replacing. */
+ unsigned attr = rs6000_builtin_info[fcode].attr;
+ bool void_func = (attr & RS6000_BTC_VOID);
+ machine_mode tmode = VOIDmode;
+
+ if (TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node)
+ {
+ tmode = insn_data[icode].operand[0].mode;
+ if (!target
+ || GET_MODE (target) != tmode
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ op[nopnds++] = target;
+ }
+ else
+ target = const0_rtx;
+
+ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+ {
+ if (arg == error_mark_node)
+ return const0_rtx;
+
+ rtx opnd;
+ insn_op = &insn_data[icode].operand[nopnds];
+ if (TREE_CODE (arg) == ADDR_EXPR
+ && MEM_P (DECL_RTL (TREE_OPERAND (arg, 0))))
+ opnd = DECL_RTL (TREE_OPERAND (arg, 0));
+ else
+ opnd = expand_normal (arg);
+
+ if (!(*insn_op->predicate) (opnd, insn_op->mode))
+ {
+ if (!strcmp (insn_op->constraint, "n"))
+ {
+ if (!CONST_INT_P (opnd))
+ error ("argument %d must be an unsigned literal", nopnds);
+ else
+ error ("argument %d is an unsigned literal that is "
+ "out of range", nopnds);
+ return const0_rtx;
+ }
+ opnd = copy_to_mode_reg (insn_op->mode, opnd);
+ }
+
+ /* Some MMA instructions have INOUT accumulator operands, so force
+ their target register to be the same as their input register. */
+ /* ### The void_func test needs to happen some other way,
+ probably with TREE_TYPE (TREE_TYPE (fndecl)) as earlier. */
+ if (!void_func
+ && nopnds == 1
+ && !strcmp (insn_op->constraint, "0")
+ && insn_op->mode == tmode
+ && REG_P (opnd)
+ && (*insn_data[icode].operand[0].predicate) (opnd, tmode))
+ target = op[0] = opnd;
+
+ op[nopnds++] = opnd;
+ }
+
+ rtx pat;
+ switch (nopnds)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
+ break;
+ case 6:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
+ break;
+ case 7:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5], op[6]);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (!pat)
+ return NULL_RTX;
+ emit_insn (pat);
+
+ return target;
+}
+
static rtx
stv_expand_builtin (insn_code icode, rtx *op,
machine_mode tmode, machine_mode smode)
@@ -13201,6 +13304,10 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ /* #### This needs to be rs6000_gen_builtins now. Can't make this
+ whole until the full set of builtins has been added, and there
+ is no overlap between the two enumerations, so we can run the
+ two in parallel. */
enum rs6000_builtins fcode
= (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
size_t uns_fcode = (size_t)fcode;
@@ -13208,9 +13315,18 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
size_t i;
rtx ret;
bool success;
- HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
+ HOST_WIDE_INT mask = (new_builtins_are_live
+ ? 0
+ : rs6000_builtin_info[uns_fcode].mask);
+ /*
+ bif_enable enable = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].enable
+ : (bif_enable) 0);
+ */
bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
- enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
+ enum insn_code icode = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].icode
+ : rs6000_builtin_info[uns_fcode].icode);
/* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
floating point type, depending on whether long double is the IBM extended
@@ -13241,7 +13357,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
}
- if (TARGET_DEBUG_BUILTIN)
+ if (!new_builtins_are_live && TARGET_DEBUG_BUILTIN)
{
const char *name1 = rs6000_builtin_info[uns_fcode].name;
const char *name2 = (icode != CODE_FOR_nothing)
@@ -13279,201 +13395,499 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
return expand_call (exp, target, ignore);
}
- switch (fcode)
+ if (new_builtins_are_live)
{
- case RS6000_BUILTIN_RECIP:
- return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
-
- case RS6000_BUILTIN_RECIPF:
- return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
-
- case RS6000_BUILTIN_RSQRTF:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
-
- case RS6000_BUILTIN_RSQRT:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
-
- case POWER7_BUILTIN_BPERMD:
- return rs6000_expand_binop_builtin (((TARGET_64BIT)
- ? CODE_FOR_bpermd_di
- : CODE_FOR_bpermd_si), exp, target);
+ bifdata *bifaddr = &rs6000_builtin_info_x[uns_fcode];
- case RS6000_BUILTIN_GET_TB:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
- target);
-
- case RS6000_BUILTIN_MFTB:
- return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
- ? CODE_FOR_rs6000_mftb_di
- : CODE_FOR_rs6000_mftb_si),
- target);
-
- case RS6000_BUILTIN_MFFS:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+ /* In case of "#pragma target" changes, we initialize all builtins
+ but check for actual availability during expand time. */
+ switch (bifaddr->enable)
+ {
+ default:
+ gcc_unreachable ();
+ case ENB_ALWAYS:
+ break;
+ case ENB_P5:
+ if (!TARGET_POPCNTB)
+ return const0_rtx;
+ break;
+ case ENB_P6:
+ if (!TARGET_CMPB)
+ return const0_rtx;
+ break;
+ case ENB_ALTIVEC:
+ if (!TARGET_ALTIVEC)
+ return const0_rtx;
+ break;
+ case ENB_VSX:
+ if (!TARGET_VSX)
+ return const0_rtx;
+ break;
+ case ENB_P7:
+ if (!TARGET_POPCNTD)
+ return const0_rtx;
+ break;
+ case ENB_P7_64:
+ if (!TARGET_POPCNTD || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P8:
+ if (!TARGET_DIRECT_MOVE)
+ return const0_rtx;
+ break;
+ case ENB_P8V:
+ if (!TARGET_P8_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_P9:
+ if (!TARGET_MODULO)
+ return const0_rtx;
+ break;
+ case ENB_P9_64:
+ if (!TARGET_MODULO || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P9V:
+ if (!TARGET_P9_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_IEEE128_HW:
+ if (!TARGET_FLOAT128_HW)
+ return const0_rtx;
+ break;
+ case ENB_DFP:
+ if (!TARGET_DFP)
+ return const0_rtx;
+ break;
+ case ENB_CRYPTO:
+ if (!TARGET_CRYPTO)
+ return const0_rtx;
+ break;
+ case ENB_HTM:
+ if (!TARGET_HTM)
+ return const0_rtx;
+ break;
+ case ENB_P10:
+ if (!TARGET_POWER10)
+ return const0_rtx;
+ break;
+ case ENB_MMA:
+ if (!TARGET_MMA)
+ return const0_rtx;
+ break;
+ };
- case RS6000_BUILTIN_MTFSB0:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+ if (bif_is_nosoft (*bifaddr)
+ && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
+ {
+ error ("%<%s%> not supported with %<-msoft-float%>",
+ bifaddr->bifname);
+ return const0_rtx;
+ }
- case RS6000_BUILTIN_MTFSB1:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+ if (bif_is_no32bit (*bifaddr) && TARGET_32BIT)
+ fatal_error (input_location,
+ "%<%s%> is not supported in 32-bit mode",
+ bifaddr->bifname);
- case RS6000_BUILTIN_SET_FPSCR_RN:
- return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
- exp);
+ if (bif_is_cpu (*bifaddr))
+ return cpu_expand_builtin (fcode, exp, target);
- case RS6000_BUILTIN_SET_FPSCR_DRN:
- return
- rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
- exp);
+ if (bif_is_init (*bifaddr))
+ return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
- case RS6000_BUILTIN_MFFSL:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+ if (bif_is_set (*bifaddr))
+ return altivec_expand_vec_set_builtin (exp);
- case RS6000_BUILTIN_MTFSF:
- return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+ if (bif_is_extract (*bifaddr))
+ return altivec_expand_vec_ext_builtin (exp, target);
- case RS6000_BUILTIN_CPU_INIT:
- case RS6000_BUILTIN_CPU_IS:
- case RS6000_BUILTIN_CPU_SUPPORTS:
- return cpu_expand_builtin (fcode, exp, target);
+ if (bif_is_predicate (*bifaddr))
+ return altivec_expand_predicate_builtin (icode, exp, target);
- case MISC_BUILTIN_SPEC_BARRIER:
- {
- emit_insn (gen_speculation_barrier ());
- return NULL_RTX;
- }
+ if (bif_is_htm (*bifaddr))
+ return new_htm_expand_builtin (bifaddr, fcode, exp, target);
- case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
- {
- int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
- : (int) CODE_FOR_altivec_lvsl_direct);
- machine_mode tmode = insn_data[icode2].operand[0].mode;
- machine_mode mode = insn_data[icode2].operand[1].mode;
- tree arg;
- rtx op, addr, pat;
+ rtx pat;
+ const int MAX_BUILTIN_ARGS = 5;
+ tree arg[MAX_BUILTIN_ARGS];
+ rtx op[MAX_BUILTIN_ARGS];
+ machine_mode mode[MAX_BUILTIN_ARGS + 1];
- gcc_assert (TARGET_ALTIVEC);
+ int nargs = bifaddr->nargs;
+ gcc_assert (nargs <= MAX_BUILTIN_ARGS);
- arg = CALL_EXPR_ARG (exp, 0);
- gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
- op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
- addr = memory_address (mode, op);
- /* We need to negate the address. */
- op = gen_reg_rtx (GET_MODE (addr));
- emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
- op = gen_rtx_MEM (mode, op);
+ mode[0] = insn_data[icode].operand[0].mode;
+ for (int i = 0; i < nargs; i++)
+ {
+ arg[i] = CALL_EXPR_ARG (exp, i);
+ if (arg[i] == error_mark_node)
+ return const0_rtx;
+ op[i] = expand_normal (arg[i]);
+ mode[i+1] = insn_data[icode].operand[i+1].mode;
+ }
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
+ /* Check for restricted constant arguments. */
+ for (int i = 0; i < 2; i++)
+ {
+ switch (bifaddr->restr[i])
+ {
+ default:
+ case RES_NONE:
+ break;
+ case RES_BITS:
+ {
+ size_t mask = (1 << bifaddr->restr_val1[i]) - 1;
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || TREE_INT_CST_LOW (restr_arg) & ~mask)
+ {
+ error ("argument %d must be a %d-bit unsigned literal",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a literal between %d and %d,"
+ " inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VAR_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) == INTEGER_CST
+ && !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a variable or a literal "
+ "between %d and %d, inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VALUES:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || (tree_to_shwi (restr_arg) != bifaddr->restr_val1[i]
+ && tree_to_shwi (restr_arg) != bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be either a literal %d or a "
+ "literal %d",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ }
+ }
- pat = GEN_FCN (icode2) (target, op);
- if (!pat)
- return 0;
- emit_insn (pat);
+ if (bif_is_ldstmask (*bifaddr))
+ return rs6000_expand_ldst_mask (target, fcode, arg[0]);
- return target;
- }
+ if (bif_is_stvec (*bifaddr))
+ {
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return stv_expand_builtin (icode, op, mode[0], mode[1]);
+ }
- case ALTIVEC_BUILTIN_VCFUX:
- case ALTIVEC_BUILTIN_VCFSX:
- case ALTIVEC_BUILTIN_VCTUXS:
- case ALTIVEC_BUILTIN_VCTSXS:
- /* FIXME: There's got to be a nicer way to handle this case than
- constructing a new CALL_EXPR. */
- if (call_expr_nargs (exp) == 1)
+ if (bif_is_ldvec (*bifaddr))
{
- exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
- 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return ldv_expand_builtin (target, icode, op, mode[0]);
}
- break;
- /* For the pack and unpack int128 routines, fix up the builtin so it
- uses the correct IBM128 type. */
- case MISC_BUILTIN_PACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ if (bif_is_mma (*bifaddr))
+ return new_mma_expand_builtin (exp, target, fcode, icode);
+
+ if (fcode == MISC_BUILTIN_PACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_packtf;
fcode = MISC_BUILTIN_PACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
-
- case MISC_BUILTIN_UNPACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ else if (fcode == MISC_BUILTIN_UNPACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_unpacktf;
fcode = MISC_BUILTIN_UNPACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
- default:
- break;
- }
+ if (target == 0
+ || GET_MODE (target) != mode[0]
+ || !(*insn_data[icode].operand[0].predicate) (target, mode[0]))
+ target = gen_reg_rtx (mode[0]);
- if (TARGET_MMA)
- {
- ret = mma_expand_builtin (exp, target, &success);
+ for (int i = 0; i < nargs; i++)
+ if (! (*insn_data[icode].operand[i+1].predicate) (op[i], mode[i+1]))
+ op[i] = copy_to_mode_reg (mode[i+1], op[i]);
- if (success)
- return ret;
- }
- if (TARGET_ALTIVEC)
- {
- ret = altivec_expand_builtin (exp, target, &success);
+ switch (nargs)
+ {
+ default:
+ gcc_assert (MAX_BUILTIN_ARGS == 5);
+ gcc_unreachable ();
+ case 0:
+ pat = GEN_FCN (icode) (target);
+ break;
+ case 1:
+ pat = GEN_FCN (icode) (target, op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
+ break;
+ }
- if (success)
- return ret;
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
}
- if (TARGET_HTM)
+ else
{
- ret = htm_expand_builtin (exp, target, &success);
+ switch (fcode)
+ {
+ case RS6000_BUILTIN_RECIP:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
- if (success)
- return ret;
- }
+ case RS6000_BUILTIN_RECIPF:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
- unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_OPND_MASK;
- /* RS6000_BTC_SPECIAL represents no-operand operators. */
- gcc_assert (attr == RS6000_BTC_UNARY
- || attr == RS6000_BTC_BINARY
- || attr == RS6000_BTC_TERNARY
- || attr == RS6000_BTC_QUATERNARY
- || attr == RS6000_BTC_SPECIAL);
-
- /* Handle simple unary operations. */
- d = bdesc_1arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_unop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRTF:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
- /* Handle simple binary operations. */
- d = bdesc_2arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_binop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRT:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
- /* Handle simple ternary operations. */
- d = bdesc_3arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_ternop_builtin (icode, exp, target);
+ case POWER7_BUILTIN_BPERMD:
+ return rs6000_expand_binop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_bpermd_di
+ : CODE_FOR_bpermd_si),
+ exp, target);
- /* Handle simple quaternary operations. */
- d = bdesc_4arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_quaternop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_GET_TB:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
+ target);
- /* Handle simple no-argument operations. */
- d = bdesc_0arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_zeroop_builtin (icode, target);
+ case RS6000_BUILTIN_MFTB:
+ return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_rs6000_mftb_di
+ : CODE_FOR_rs6000_mftb_si),
+ target);
- gcc_unreachable ();
+ case RS6000_BUILTIN_MFFS:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+
+ case RS6000_BUILTIN_MTFSB0:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+
+ case RS6000_BUILTIN_MTFSB1:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_RN:
+ return rs6000_expand_set_fpscr_rn_builtin
+ (CODE_FOR_rs6000_set_fpscr_rn, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_DRN:
+ return
+ rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
+ exp);
+
+ case RS6000_BUILTIN_MFFSL:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+
+ case RS6000_BUILTIN_MTFSF:
+ return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+
+ case RS6000_BUILTIN_CPU_INIT:
+ case RS6000_BUILTIN_CPU_IS:
+ case RS6000_BUILTIN_CPU_SUPPORTS:
+ return cpu_expand_builtin (fcode, exp, target);
+
+ case MISC_BUILTIN_SPEC_BARRIER:
+ {
+ emit_insn (gen_speculation_barrier ());
+ return NULL_RTX;
+ }
+
+ case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
+ case ALTIVEC_BUILTIN_MASK_FOR_STORE:
+ {
+ int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+ : (int) CODE_FOR_altivec_lvsl_direct);
+ machine_mode tmode = insn_data[icode2].operand[0].mode;
+ machine_mode mode = insn_data[icode2].operand[1].mode;
+ tree arg;
+ rtx op, addr, pat;
+
+ gcc_assert (TARGET_ALTIVEC);
+
+ arg = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
+ op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+ addr = memory_address (mode, op);
+ if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
+ op = addr;
+ else
+ {
+ /* For the load case need to negate the address. */
+ op = gen_reg_rtx (GET_MODE (addr));
+ emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr),
+ addr)));
+ }
+ op = gen_rtx_MEM (mode, op);
+
+ if (target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ pat = GEN_FCN (icode2) (target, op);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
+ }
+
+ case ALTIVEC_BUILTIN_VCFUX:
+ case ALTIVEC_BUILTIN_VCFSX:
+ case ALTIVEC_BUILTIN_VCTUXS:
+ case ALTIVEC_BUILTIN_VCTSXS:
+ /* #### Replace this nonsense with a separate built-in for the
+ vectorizer to use, which I believe is the only way we get
+ into this situation. */
+ /* FIXME: There's got to be a nicer way to handle this case than
+ constructing a new CALL_EXPR. */
+ if (call_expr_nargs (exp) == 1)
+ {
+ exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
+ 2, CALL_EXPR_ARG (exp, 0),
+ integer_zero_node);
+ }
+ break;
+
+ /* For the pack and unpack int128 routines, fix up the builtin so it
+ uses the correct IBM128 type. */
+ case MISC_BUILTIN_PACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_packtf;
+ fcode = MISC_BUILTIN_PACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ case MISC_BUILTIN_UNPACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_unpacktf;
+ fcode = MISC_BUILTIN_UNPACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (TARGET_MMA)
+ {
+ ret = mma_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_ALTIVEC)
+ {
+ ret = altivec_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_HTM)
+ {
+ ret = htm_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+
+ unsigned attr = (rs6000_builtin_info[uns_fcode].attr
+ & RS6000_BTC_TYPE_MASK);
+ /* RS6000_BTC_SPECIAL represents no-operand operators. */
+ gcc_assert (attr == RS6000_BTC_UNARY
+ || attr == RS6000_BTC_BINARY
+ || attr == RS6000_BTC_TERNARY
+ || attr == RS6000_BTC_QUATERNARY
+ || attr == RS6000_BTC_SPECIAL);
+
+ /* Handle simple unary operations. */
+ d = bdesc_1arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_unop_builtin (icode, exp, target);
+
+ /* Handle simple binary operations. */
+ d = bdesc_2arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_binop_builtin (icode, exp, target);
+
+ /* Handle simple ternary operations. */
+ d = bdesc_3arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_ternop_builtin (icode, exp, target);
+
+ /* Handle simple quaternary operations. */
+ d = bdesc_4arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_quaternop_builtin (icode, exp, target);
+
+ /* Handle simple no-argument operations. */
+ d = bdesc_0arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_zeroop_builtin (icode, target);
+
+ gcc_unreachable ();
+ }
}
/* Create a builtin vector type with a name. Taking care not to give
@@ -13702,7 +14116,10 @@ rs6000_init_builtins (void)
general purpose extensions (970 and newer) to allow the use of
the target attribute. */
if (TARGET_EXTRA_BUILTINS)
- altivec_init_builtins ();
+ {
+ altivec_init_builtins ();
+ mma_init_builtins ();
+ }
if (TARGET_HTM)
htm_init_builtins ();
^ permalink raw reply [flat|nested] 10+ messages in thread
* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Support two builtin expansion algorithms
@ 2020-09-16 21:31 William Schmidt
0 siblings, 0 replies; 10+ messages in thread
From: William Schmidt @ 2020-09-16 21:31 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:efeb781287d66055fa3717ca97ee7641212092ba
commit efeb781287d66055fa3717ca97ee7641212092ba
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date: Wed Jun 17 12:10:56 2020 -0500
rs6000: Support two builtin expansion algorithms
Note: For future rebasing, put new_mma_expand_builtin into the
previous patch.
2020-07-26 Bill Schmidt <wschmidt@linux.ibm.com>
* config/rs6000/rs6000-call.c (new_mma_expand_builtin): New.
(rs6000_expand_builtin): Support two kinds of masks and icodes;
use a new algorithm under control of new_builtins_are_live,
while still supporting the old one; do lazy enablement to
support #pragma target.
Diff:
---
gcc/config/rs6000/rs6000-call.c | 731 +++++++++++++++++++++++++++++++---------
1 file changed, 574 insertions(+), 157 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index d25722c8abf..b8d9074ef2d 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9977,7 +9977,6 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
/* Expand the MMA built-in in EXP.
Store true in *EXPANDEDP if we found a built-in to expand. */
-
static rtx
mma_expand_builtin (tree exp, rtx target, bool *expandedp)
{
@@ -10102,6 +10101,110 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
return target;
}
+/* Expand the MMA built-in in EXP. */
+static rtx
+new_mma_expand_builtin (tree exp, rtx target, rs6000_builtins fcode,
+ insn_code icode)
+{
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ tree arg;
+ call_expr_arg_iterator iter;
+ const struct insn_operand_data *insn_op;
+ rtx op[MAX_MMA_OPERANDS];
+ unsigned nopnds = 0;
+ /* ### This attr/void_func business needs replacing. */
+ unsigned attr = rs6000_builtin_info[fcode].attr;
+ bool void_func = (attr & RS6000_BTC_VOID);
+ machine_mode tmode = VOIDmode;
+
+ if (TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node)
+ {
+ tmode = insn_data[icode].operand[0].mode;
+ if (!target
+ || GET_MODE (target) != tmode
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ op[nopnds++] = target;
+ }
+ else
+ target = const0_rtx;
+
+ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+ {
+ if (arg == error_mark_node)
+ return const0_rtx;
+
+ rtx opnd;
+ insn_op = &insn_data[icode].operand[nopnds];
+ if (TREE_CODE (arg) == ADDR_EXPR
+ && MEM_P (DECL_RTL (TREE_OPERAND (arg, 0))))
+ opnd = DECL_RTL (TREE_OPERAND (arg, 0));
+ else
+ opnd = expand_normal (arg);
+
+ if (!(*insn_op->predicate) (opnd, insn_op->mode))
+ {
+ if (!strcmp (insn_op->constraint, "n"))
+ {
+ if (!CONST_INT_P (opnd))
+ error ("argument %d must be an unsigned literal", nopnds);
+ else
+ error ("argument %d is an unsigned literal that is "
+ "out of range", nopnds);
+ return const0_rtx;
+ }
+ opnd = copy_to_mode_reg (insn_op->mode, opnd);
+ }
+
+ /* Some MMA instructions have INOUT accumulator operands, so force
+ their target register to be the same as their input register. */
+ /* ### The void_func test needs to happen some other way,
+ probably with TREE_TYPE (TREE_TYPE (fndecl)) as earlier. */
+ if (!void_func
+ && nopnds == 1
+ && !strcmp (insn_op->constraint, "0")
+ && insn_op->mode == tmode
+ && REG_P (opnd)
+ && (*insn_data[icode].operand[0].predicate) (opnd, tmode))
+ target = op[0] = opnd;
+
+ op[nopnds++] = opnd;
+ }
+
+ rtx pat;
+ switch (nopnds)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
+ break;
+ case 6:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
+ break;
+ case 7:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5], op[6]);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (!pat)
+ return NULL_RTX;
+ emit_insn (pat);
+
+ return target;
+}
+
static rtx
stv_expand_builtin (insn_code icode, rtx *op,
machine_mode tmode, machine_mode smode)
@@ -13012,6 +13115,10 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ /* #### This needs to be rs6000_gen_builtins now. Can't make this
+ whole until the full set of builtins has been added, and there
+ is no overlap between the two enumerations, so we can run the
+ two in parallel. */
enum rs6000_builtins fcode
= (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
size_t uns_fcode = (size_t)fcode;
@@ -13019,9 +13126,18 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
size_t i;
rtx ret;
bool success;
- HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
+ HOST_WIDE_INT mask = (new_builtins_are_live
+ ? 0
+ : rs6000_builtin_info[uns_fcode].mask);
+ /*
+ bif_enable enable = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].enable
+ : (bif_enable) 0);
+ */
bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
- enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
+ enum insn_code icode = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].icode
+ : rs6000_builtin_info[uns_fcode].icode);
/* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
floating point type, depending on whether long double is the IBM extended
@@ -13052,7 +13168,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
}
- if (TARGET_DEBUG_BUILTIN)
+ if (!new_builtins_are_live && TARGET_DEBUG_BUILTIN)
{
const char *name1 = rs6000_builtin_info[uns_fcode].name;
const char *name2 = (icode != CODE_FOR_nothing)
@@ -13090,201 +13206,499 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
return expand_call (exp, target, ignore);
}
- switch (fcode)
+ if (new_builtins_are_live)
{
- case RS6000_BUILTIN_RECIP:
- return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
+ bifdata *bifaddr = &rs6000_builtin_info_x[uns_fcode];
- case RS6000_BUILTIN_RECIPF:
- return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
-
- case RS6000_BUILTIN_RSQRTF:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
-
- case RS6000_BUILTIN_RSQRT:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
-
- case POWER7_BUILTIN_BPERMD:
- return rs6000_expand_binop_builtin (((TARGET_64BIT)
- ? CODE_FOR_bpermd_di
- : CODE_FOR_bpermd_si), exp, target);
-
- case RS6000_BUILTIN_GET_TB:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
- target);
-
- case RS6000_BUILTIN_MFTB:
- return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
- ? CODE_FOR_rs6000_mftb_di
- : CODE_FOR_rs6000_mftb_si),
- target);
-
- case RS6000_BUILTIN_MFFS:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+ /* In case of "#pragma target" changes, we initialize all builtins
+ but check for actual availability during expand time. */
+ switch (bifaddr->enable)
+ {
+ default:
+ gcc_unreachable ();
+ case ENB_ALWAYS:
+ break;
+ case ENB_P5:
+ if (!TARGET_POPCNTB)
+ return const0_rtx;
+ break;
+ case ENB_P6:
+ if (!TARGET_CMPB)
+ return const0_rtx;
+ break;
+ case ENB_ALTIVEC:
+ if (!TARGET_ALTIVEC)
+ return const0_rtx;
+ break;
+ case ENB_VSX:
+ if (!TARGET_VSX)
+ return const0_rtx;
+ break;
+ case ENB_P7:
+ if (!TARGET_POPCNTD)
+ return const0_rtx;
+ break;
+ case ENB_P7_64:
+ if (!TARGET_POPCNTD || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P8:
+ if (!TARGET_DIRECT_MOVE)
+ return const0_rtx;
+ break;
+ case ENB_P8V:
+ if (!TARGET_P8_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_P9:
+ if (!TARGET_MODULO)
+ return const0_rtx;
+ break;
+ case ENB_P9_64:
+ if (!TARGET_MODULO || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P9V:
+ if (!TARGET_P9_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_IEEE128_HW:
+ if (!TARGET_FLOAT128_HW)
+ return const0_rtx;
+ break;
+ case ENB_DFP:
+ if (!TARGET_DFP)
+ return const0_rtx;
+ break;
+ case ENB_CRYPTO:
+ if (!TARGET_CRYPTO)
+ return const0_rtx;
+ break;
+ case ENB_HTM:
+ if (!TARGET_HTM)
+ return const0_rtx;
+ break;
+ case ENB_P10:
+ if (!TARGET_POWER10)
+ return const0_rtx;
+ break;
+ case ENB_MMA:
+ if (!TARGET_MMA)
+ return const0_rtx;
+ break;
+ };
- case RS6000_BUILTIN_MTFSB0:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+ if (bif_is_nosoft (*bifaddr)
+ && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
+ {
+ error ("%<%s%> not supported with %<-msoft-float%>",
+ bifaddr->bifname);
+ return const0_rtx;
+ }
- case RS6000_BUILTIN_MTFSB1:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+ if (bif_is_no32bit (*bifaddr) && TARGET_32BIT)
+ fatal_error (input_location,
+ "%<%s%> is not supported in 32-bit mode",
+ bifaddr->bifname);
- case RS6000_BUILTIN_SET_FPSCR_RN:
- return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
- exp);
+ if (bif_is_cpu (*bifaddr))
+ return cpu_expand_builtin (fcode, exp, target);
- case RS6000_BUILTIN_SET_FPSCR_DRN:
- return
- rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
- exp);
+ if (bif_is_init (*bifaddr))
+ return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
- case RS6000_BUILTIN_MFFSL:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+ if (bif_is_set (*bifaddr))
+ return altivec_expand_vec_set_builtin (exp);
- case RS6000_BUILTIN_MTFSF:
- return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+ if (bif_is_extract (*bifaddr))
+ return altivec_expand_vec_ext_builtin (exp, target);
- case RS6000_BUILTIN_CPU_INIT:
- case RS6000_BUILTIN_CPU_IS:
- case RS6000_BUILTIN_CPU_SUPPORTS:
- return cpu_expand_builtin (fcode, exp, target);
+ if (bif_is_predicate (*bifaddr))
+ return altivec_expand_predicate_builtin (icode, exp, target);
- case MISC_BUILTIN_SPEC_BARRIER:
- {
- emit_insn (gen_speculation_barrier ());
- return NULL_RTX;
- }
+ if (bif_is_htm (*bifaddr))
+ return new_htm_expand_builtin (bifaddr, fcode, exp, target);
- case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
- {
- int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
- : (int) CODE_FOR_altivec_lvsl_direct);
- machine_mode tmode = insn_data[icode2].operand[0].mode;
- machine_mode mode = insn_data[icode2].operand[1].mode;
- tree arg;
- rtx op, addr, pat;
+ rtx pat;
+ const int MAX_BUILTIN_ARGS = 5;
+ tree arg[MAX_BUILTIN_ARGS];
+ rtx op[MAX_BUILTIN_ARGS];
+ machine_mode mode[MAX_BUILTIN_ARGS + 1];
- gcc_assert (TARGET_ALTIVEC);
+ int nargs = bifaddr->nargs;
+ gcc_assert (nargs <= MAX_BUILTIN_ARGS);
- arg = CALL_EXPR_ARG (exp, 0);
- gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
- op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
- addr = memory_address (mode, op);
- /* We need to negate the address. */
- op = gen_reg_rtx (GET_MODE (addr));
- emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
- op = gen_rtx_MEM (mode, op);
+ mode[0] = insn_data[icode].operand[0].mode;
+ for (int i = 0; i < nargs; i++)
+ {
+ arg[i] = CALL_EXPR_ARG (exp, i);
+ if (arg[i] == error_mark_node)
+ return const0_rtx;
+ op[i] = expand_normal (arg[i]);
+ mode[i+1] = insn_data[icode].operand[i+1].mode;
+ }
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
+ /* Check for restricted constant arguments. */
+ for (int i = 0; i < 2; i++)
+ {
+ switch (bifaddr->restr[i])
+ {
+ default:
+ case RES_NONE:
+ break;
+ case RES_BITS:
+ {
+ size_t mask = (1 << bifaddr->restr_val1[i]) - 1;
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || TREE_INT_CST_LOW (restr_arg) & ~mask)
+ {
+ error ("argument %d must be a %d-bit unsigned literal",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a literal between %d and %d,"
+ " inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VAR_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) == INTEGER_CST
+ && !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a variable or a literal "
+ "between %d and %d, inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VALUES:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || (tree_to_shwi (restr_arg) != bifaddr->restr_val1[i]
+ && tree_to_shwi (restr_arg) != bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be either a literal %d or a "
+ "literal %d",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ }
+ }
- pat = GEN_FCN (icode2) (target, op);
- if (!pat)
- return 0;
- emit_insn (pat);
+ if (bif_is_ldstmask (*bifaddr))
+ return rs6000_expand_ldst_mask (target, fcode, arg[0]);
- return target;
- }
+ if (bif_is_stvec (*bifaddr))
+ {
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return stv_expand_builtin (icode, op, mode[0], mode[1]);
+ }
- case ALTIVEC_BUILTIN_VCFUX:
- case ALTIVEC_BUILTIN_VCFSX:
- case ALTIVEC_BUILTIN_VCTUXS:
- case ALTIVEC_BUILTIN_VCTSXS:
- /* FIXME: There's got to be a nicer way to handle this case than
- constructing a new CALL_EXPR. */
- if (call_expr_nargs (exp) == 1)
+ if (bif_is_ldvec (*bifaddr))
{
- exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
- 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return ldv_expand_builtin (target, icode, op, mode[0]);
}
- break;
- /* For the pack and unpack int128 routines, fix up the builtin so it
- uses the correct IBM128 type. */
- case MISC_BUILTIN_PACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ if (bif_is_mma (*bifaddr))
+ return new_mma_expand_builtin (exp, target, fcode, icode);
+
+ if (fcode == MISC_BUILTIN_PACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_packtf;
fcode = MISC_BUILTIN_PACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
-
- case MISC_BUILTIN_UNPACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ else if (fcode == MISC_BUILTIN_UNPACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_unpacktf;
fcode = MISC_BUILTIN_UNPACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
- default:
- break;
- }
+ if (target == 0
+ || GET_MODE (target) != mode[0]
+ || !(*insn_data[icode].operand[0].predicate) (target, mode[0]))
+ target = gen_reg_rtx (mode[0]);
- if (TARGET_MMA)
- {
- ret = mma_expand_builtin (exp, target, &success);
+ for (int i = 0; i < nargs; i++)
+ if (! (*insn_data[icode].operand[i+1].predicate) (op[i], mode[i+1]))
+ op[i] = copy_to_mode_reg (mode[i+1], op[i]);
- if (success)
- return ret;
- }
- if (TARGET_ALTIVEC)
- {
- ret = altivec_expand_builtin (exp, target, &success);
+ switch (nargs)
+ {
+ default:
+ gcc_assert (MAX_BUILTIN_ARGS == 5);
+ gcc_unreachable ();
+ case 0:
+ pat = GEN_FCN (icode) (target);
+ break;
+ case 1:
+ pat = GEN_FCN (icode) (target, op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
+ break;
+ }
- if (success)
- return ret;
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
}
- if (TARGET_HTM)
+ else
{
- ret = htm_expand_builtin (exp, target, &success);
+ switch (fcode)
+ {
+ case RS6000_BUILTIN_RECIP:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
- if (success)
- return ret;
- }
+ case RS6000_BUILTIN_RECIPF:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
- unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_OPND_MASK;
- /* RS6000_BTC_SPECIAL represents no-operand operators. */
- gcc_assert (attr == RS6000_BTC_UNARY
- || attr == RS6000_BTC_BINARY
- || attr == RS6000_BTC_TERNARY
- || attr == RS6000_BTC_QUATERNARY
- || attr == RS6000_BTC_SPECIAL);
-
- /* Handle simple unary operations. */
- d = bdesc_1arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_unop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRTF:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
- /* Handle simple binary operations. */
- d = bdesc_2arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_binop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRT:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
- /* Handle simple ternary operations. */
- d = bdesc_3arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_ternop_builtin (icode, exp, target);
+ case POWER7_BUILTIN_BPERMD:
+ return rs6000_expand_binop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_bpermd_di
+ : CODE_FOR_bpermd_si),
+ exp, target);
- /* Handle simple quaternary operations. */
- d = bdesc_4arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_quaternop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_GET_TB:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
+ target);
- /* Handle simple no-argument operations. */
- d = bdesc_0arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_zeroop_builtin (icode, target);
+ case RS6000_BUILTIN_MFTB:
+ return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_rs6000_mftb_di
+ : CODE_FOR_rs6000_mftb_si),
+ target);
- gcc_unreachable ();
+ case RS6000_BUILTIN_MFFS:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+
+ case RS6000_BUILTIN_MTFSB0:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+
+ case RS6000_BUILTIN_MTFSB1:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_RN:
+ return rs6000_expand_set_fpscr_rn_builtin
+ (CODE_FOR_rs6000_set_fpscr_rn, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_DRN:
+ return
+ rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
+ exp);
+
+ case RS6000_BUILTIN_MFFSL:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+
+ case RS6000_BUILTIN_MTFSF:
+ return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+
+ case RS6000_BUILTIN_CPU_INIT:
+ case RS6000_BUILTIN_CPU_IS:
+ case RS6000_BUILTIN_CPU_SUPPORTS:
+ return cpu_expand_builtin (fcode, exp, target);
+
+ case MISC_BUILTIN_SPEC_BARRIER:
+ {
+ emit_insn (gen_speculation_barrier ());
+ return NULL_RTX;
+ }
+
+ case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
+ case ALTIVEC_BUILTIN_MASK_FOR_STORE:
+ {
+ int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+ : (int) CODE_FOR_altivec_lvsl_direct);
+ machine_mode tmode = insn_data[icode2].operand[0].mode;
+ machine_mode mode = insn_data[icode2].operand[1].mode;
+ tree arg;
+ rtx op, addr, pat;
+
+ gcc_assert (TARGET_ALTIVEC);
+
+ arg = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
+ op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+ addr = memory_address (mode, op);
+ if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
+ op = addr;
+ else
+ {
+ /* For the load case need to negate the address. */
+ op = gen_reg_rtx (GET_MODE (addr));
+ emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr),
+ addr)));
+ }
+ op = gen_rtx_MEM (mode, op);
+
+ if (target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ pat = GEN_FCN (icode2) (target, op);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
+ }
+
+ case ALTIVEC_BUILTIN_VCFUX:
+ case ALTIVEC_BUILTIN_VCFSX:
+ case ALTIVEC_BUILTIN_VCTUXS:
+ case ALTIVEC_BUILTIN_VCTSXS:
+ /* #### Replace this nonsense with a separate built-in for the
+ vectorizer to use, which I believe is the only way we get
+ into this situation. */
+ /* FIXME: There's got to be a nicer way to handle this case than
+ constructing a new CALL_EXPR. */
+ if (call_expr_nargs (exp) == 1)
+ {
+ exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
+ 2, CALL_EXPR_ARG (exp, 0),
+ integer_zero_node);
+ }
+ break;
+
+ /* For the pack and unpack int128 routines, fix up the builtin so it
+ uses the correct IBM128 type. */
+ case MISC_BUILTIN_PACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_packtf;
+ fcode = MISC_BUILTIN_PACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ case MISC_BUILTIN_UNPACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_unpacktf;
+ fcode = MISC_BUILTIN_UNPACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (TARGET_MMA)
+ {
+ ret = mma_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_ALTIVEC)
+ {
+ ret = altivec_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_HTM)
+ {
+ ret = htm_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+
+ unsigned attr = (rs6000_builtin_info[uns_fcode].attr
+ & RS6000_BTC_TYPE_MASK);
+ /* RS6000_BTC_SPECIAL represents no-operand operators. */
+ gcc_assert (attr == RS6000_BTC_UNARY
+ || attr == RS6000_BTC_BINARY
+ || attr == RS6000_BTC_TERNARY
+ || attr == RS6000_BTC_QUATERNARY
+ || attr == RS6000_BTC_SPECIAL);
+
+ /* Handle simple unary operations. */
+ d = bdesc_1arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_unop_builtin (icode, exp, target);
+
+ /* Handle simple binary operations. */
+ d = bdesc_2arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_binop_builtin (icode, exp, target);
+
+ /* Handle simple ternary operations. */
+ d = bdesc_3arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_ternop_builtin (icode, exp, target);
+
+ /* Handle simple quaternary operations. */
+ d = bdesc_4arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_quaternop_builtin (icode, exp, target);
+
+ /* Handle simple no-argument operations. */
+ d = bdesc_0arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_zeroop_builtin (icode, target);
+
+ gcc_unreachable ();
+ }
}
/* Create a builtin vector type with a name. Taking care not to give
@@ -13515,7 +13929,10 @@ rs6000_init_builtins (void)
general purpose extensions (970 and newer) to allow the use of
the target attribute. */
if (TARGET_EXTRA_BUILTINS)
- altivec_init_builtins ();
+ {
+ altivec_init_builtins ();
+ mma_init_builtins ();
+ }
if (TARGET_HTM)
htm_init_builtins ();
^ permalink raw reply [flat|nested] 10+ messages in thread
* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Support two builtin expansion algorithms
@ 2020-09-14 13:59 William Schmidt
0 siblings, 0 replies; 10+ messages in thread
From: William Schmidt @ 2020-09-14 13:59 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:f0c762793de627d52d2c27b0814a8fe2d21a587e
commit f0c762793de627d52d2c27b0814a8fe2d21a587e
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date: Wed Jun 17 12:10:56 2020 -0500
rs6000: Support two builtin expansion algorithms
Note: For future rebasing, put new_mma_expand_builtin into the
previous patch.
2020-07-26 Bill Schmidt <wschmidt@linux.ibm.com>
* config/rs6000/rs6000-call.c (new_mma_expand_builtin): New.
(rs6000_expand_builtin): Support two kinds of masks and icodes;
use a new algorithm under control of new_builtins_are_live,
while still supporting the old one; do lazy enablement to
support #pragma target.
Diff:
---
gcc/config/rs6000/rs6000-call.c | 731 +++++++++++++++++++++++++++++++---------
1 file changed, 574 insertions(+), 157 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 842c396db2f..8aa3cd762f2 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9977,7 +9977,6 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
/* Expand the MMA built-in in EXP.
Store true in *EXPANDEDP if we found a built-in to expand. */
-
static rtx
mma_expand_builtin (tree exp, rtx target, bool *expandedp)
{
@@ -10102,6 +10101,110 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
return target;
}
+/* Expand the MMA built-in in EXP. */
+static rtx
+new_mma_expand_builtin (tree exp, rtx target, rs6000_builtins fcode,
+ insn_code icode)
+{
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ tree arg;
+ call_expr_arg_iterator iter;
+ const struct insn_operand_data *insn_op;
+ rtx op[MAX_MMA_OPERANDS];
+ unsigned nopnds = 0;
+ /* ### This attr/void_func business needs replacing. */
+ unsigned attr = rs6000_builtin_info[fcode].attr;
+ bool void_func = (attr & RS6000_BTC_VOID);
+ machine_mode tmode = VOIDmode;
+
+ if (TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node)
+ {
+ tmode = insn_data[icode].operand[0].mode;
+ if (!target
+ || GET_MODE (target) != tmode
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ op[nopnds++] = target;
+ }
+ else
+ target = const0_rtx;
+
+ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+ {
+ if (arg == error_mark_node)
+ return const0_rtx;
+
+ rtx opnd;
+ insn_op = &insn_data[icode].operand[nopnds];
+ if (TREE_CODE (arg) == ADDR_EXPR
+ && MEM_P (DECL_RTL (TREE_OPERAND (arg, 0))))
+ opnd = DECL_RTL (TREE_OPERAND (arg, 0));
+ else
+ opnd = expand_normal (arg);
+
+ if (!(*insn_op->predicate) (opnd, insn_op->mode))
+ {
+ if (!strcmp (insn_op->constraint, "n"))
+ {
+ if (!CONST_INT_P (opnd))
+ error ("argument %d must be an unsigned literal", nopnds);
+ else
+ error ("argument %d is an unsigned literal that is "
+ "out of range", nopnds);
+ return const0_rtx;
+ }
+ opnd = copy_to_mode_reg (insn_op->mode, opnd);
+ }
+
+ /* Some MMA instructions have INOUT accumulator operands, so force
+ their target register to be the same as their input register. */
+ /* ### The void_func test needs to happen some other way,
+ probably with TREE_TYPE (TREE_TYPE (fndecl)) as earlier. */
+ if (!void_func
+ && nopnds == 1
+ && !strcmp (insn_op->constraint, "0")
+ && insn_op->mode == tmode
+ && REG_P (opnd)
+ && (*insn_data[icode].operand[0].predicate) (opnd, tmode))
+ target = op[0] = opnd;
+
+ op[nopnds++] = opnd;
+ }
+
+ rtx pat;
+ switch (nopnds)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
+ break;
+ case 6:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
+ break;
+ case 7:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5], op[6]);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (!pat)
+ return NULL_RTX;
+ emit_insn (pat);
+
+ return target;
+}
+
static rtx
stv_expand_builtin (insn_code icode, rtx *op,
machine_mode tmode, machine_mode smode)
@@ -13012,6 +13115,10 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ /* #### This needs to be rs6000_gen_builtins now. Can't make this
+ whole until the full set of builtins has been added, and there
+ is no overlap between the two enumerations, so we can run the
+ two in parallel. */
enum rs6000_builtins fcode
= (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
size_t uns_fcode = (size_t)fcode;
@@ -13019,9 +13126,18 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
size_t i;
rtx ret;
bool success;
- HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
+ HOST_WIDE_INT mask = (new_builtins_are_live
+ ? 0
+ : rs6000_builtin_info[uns_fcode].mask);
+ /*
+ bif_enable enable = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].enable
+ : (bif_enable) 0);
+ */
bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
- enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
+ enum insn_code icode = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].icode
+ : rs6000_builtin_info[uns_fcode].icode);
/* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
floating point type, depending on whether long double is the IBM extended
@@ -13052,7 +13168,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
}
- if (TARGET_DEBUG_BUILTIN)
+ if (!new_builtins_are_live && TARGET_DEBUG_BUILTIN)
{
const char *name1 = rs6000_builtin_info[uns_fcode].name;
const char *name2 = (icode != CODE_FOR_nothing)
@@ -13090,201 +13206,499 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
return expand_call (exp, target, ignore);
}
- switch (fcode)
+ if (new_builtins_are_live)
{
- case RS6000_BUILTIN_RECIP:
- return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
+ bifdata *bifaddr = &rs6000_builtin_info_x[uns_fcode];
- case RS6000_BUILTIN_RECIPF:
- return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
-
- case RS6000_BUILTIN_RSQRTF:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
-
- case RS6000_BUILTIN_RSQRT:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
-
- case POWER7_BUILTIN_BPERMD:
- return rs6000_expand_binop_builtin (((TARGET_64BIT)
- ? CODE_FOR_bpermd_di
- : CODE_FOR_bpermd_si), exp, target);
-
- case RS6000_BUILTIN_GET_TB:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
- target);
-
- case RS6000_BUILTIN_MFTB:
- return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
- ? CODE_FOR_rs6000_mftb_di
- : CODE_FOR_rs6000_mftb_si),
- target);
-
- case RS6000_BUILTIN_MFFS:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+ /* In case of "#pragma target" changes, we initialize all builtins
+ but check for actual availability during expand time. */
+ switch (bifaddr->enable)
+ {
+ default:
+ gcc_unreachable ();
+ case ENB_ALWAYS:
+ break;
+ case ENB_P5:
+ if (!TARGET_POPCNTB)
+ return const0_rtx;
+ break;
+ case ENB_P6:
+ if (!TARGET_CMPB)
+ return const0_rtx;
+ break;
+ case ENB_ALTIVEC:
+ if (!TARGET_ALTIVEC)
+ return const0_rtx;
+ break;
+ case ENB_VSX:
+ if (!TARGET_VSX)
+ return const0_rtx;
+ break;
+ case ENB_P7:
+ if (!TARGET_POPCNTD)
+ return const0_rtx;
+ break;
+ case ENB_P7_64:
+ if (!TARGET_POPCNTD || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P8:
+ if (!TARGET_DIRECT_MOVE)
+ return const0_rtx;
+ break;
+ case ENB_P8V:
+ if (!TARGET_P8_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_P9:
+ if (!TARGET_MODULO)
+ return const0_rtx;
+ break;
+ case ENB_P9_64:
+ if (!TARGET_MODULO || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P9V:
+ if (!TARGET_P9_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_IEEE128_HW:
+ if (!TARGET_FLOAT128_HW)
+ return const0_rtx;
+ break;
+ case ENB_DFP:
+ if (!TARGET_DFP)
+ return const0_rtx;
+ break;
+ case ENB_CRYPTO:
+ if (!TARGET_CRYPTO)
+ return const0_rtx;
+ break;
+ case ENB_HTM:
+ if (!TARGET_HTM)
+ return const0_rtx;
+ break;
+ case ENB_P10:
+ if (!TARGET_POWER10)
+ return const0_rtx;
+ break;
+ case ENB_MMA:
+ if (!TARGET_MMA)
+ return const0_rtx;
+ break;
+ };
- case RS6000_BUILTIN_MTFSB0:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+ if (bif_is_nosoft (*bifaddr)
+ && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
+ {
+ error ("%<%s%> not supported with %<-msoft-float%>",
+ bifaddr->bifname);
+ return const0_rtx;
+ }
- case RS6000_BUILTIN_MTFSB1:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+ if (bif_is_no32bit (*bifaddr) && TARGET_32BIT)
+ fatal_error (input_location,
+ "%<%s%> is not supported in 32-bit mode",
+ bifaddr->bifname);
- case RS6000_BUILTIN_SET_FPSCR_RN:
- return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
- exp);
+ if (bif_is_cpu (*bifaddr))
+ return cpu_expand_builtin (fcode, exp, target);
- case RS6000_BUILTIN_SET_FPSCR_DRN:
- return
- rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
- exp);
+ if (bif_is_init (*bifaddr))
+ return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
- case RS6000_BUILTIN_MFFSL:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+ if (bif_is_set (*bifaddr))
+ return altivec_expand_vec_set_builtin (exp);
- case RS6000_BUILTIN_MTFSF:
- return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+ if (bif_is_extract (*bifaddr))
+ return altivec_expand_vec_ext_builtin (exp, target);
- case RS6000_BUILTIN_CPU_INIT:
- case RS6000_BUILTIN_CPU_IS:
- case RS6000_BUILTIN_CPU_SUPPORTS:
- return cpu_expand_builtin (fcode, exp, target);
+ if (bif_is_predicate (*bifaddr))
+ return altivec_expand_predicate_builtin (icode, exp, target);
- case MISC_BUILTIN_SPEC_BARRIER:
- {
- emit_insn (gen_speculation_barrier ());
- return NULL_RTX;
- }
+ if (bif_is_htm (*bifaddr))
+ return new_htm_expand_builtin (bifaddr, fcode, exp, target);
- case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
- {
- int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
- : (int) CODE_FOR_altivec_lvsl_direct);
- machine_mode tmode = insn_data[icode2].operand[0].mode;
- machine_mode mode = insn_data[icode2].operand[1].mode;
- tree arg;
- rtx op, addr, pat;
+ rtx pat;
+ const int MAX_BUILTIN_ARGS = 5;
+ tree arg[MAX_BUILTIN_ARGS];
+ rtx op[MAX_BUILTIN_ARGS];
+ machine_mode mode[MAX_BUILTIN_ARGS + 1];
- gcc_assert (TARGET_ALTIVEC);
+ int nargs = bifaddr->nargs;
+ gcc_assert (nargs <= MAX_BUILTIN_ARGS);
- arg = CALL_EXPR_ARG (exp, 0);
- gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
- op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
- addr = memory_address (mode, op);
- /* We need to negate the address. */
- op = gen_reg_rtx (GET_MODE (addr));
- emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
- op = gen_rtx_MEM (mode, op);
+ mode[0] = insn_data[icode].operand[0].mode;
+ for (int i = 0; i < nargs; i++)
+ {
+ arg[i] = CALL_EXPR_ARG (exp, i);
+ if (arg[i] == error_mark_node)
+ return const0_rtx;
+ op[i] = expand_normal (arg[i]);
+ mode[i+1] = insn_data[icode].operand[i+1].mode;
+ }
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
+ /* Check for restricted constant arguments. */
+ for (int i = 0; i < 2; i++)
+ {
+ switch (bifaddr->restr[i])
+ {
+ default:
+ case RES_NONE:
+ break;
+ case RES_BITS:
+ {
+ size_t mask = (1 << bifaddr->restr_val1[i]) - 1;
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || TREE_INT_CST_LOW (restr_arg) & ~mask)
+ {
+ error ("argument %d must be a %d-bit unsigned literal",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a literal between %d and %d,"
+ " inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VAR_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) == INTEGER_CST
+ && !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a variable or a literal "
+ "between %d and %d, inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VALUES:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || (tree_to_shwi (restr_arg) != bifaddr->restr_val1[i]
+ && tree_to_shwi (restr_arg) != bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be either a literal %d or a "
+ "literal %d",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ }
+ }
- pat = GEN_FCN (icode2) (target, op);
- if (!pat)
- return 0;
- emit_insn (pat);
+ if (bif_is_ldstmask (*bifaddr))
+ return rs6000_expand_ldst_mask (target, fcode, arg[0]);
- return target;
- }
+ if (bif_is_stvec (*bifaddr))
+ {
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return stv_expand_builtin (icode, op, mode[0], mode[1]);
+ }
- case ALTIVEC_BUILTIN_VCFUX:
- case ALTIVEC_BUILTIN_VCFSX:
- case ALTIVEC_BUILTIN_VCTUXS:
- case ALTIVEC_BUILTIN_VCTSXS:
- /* FIXME: There's got to be a nicer way to handle this case than
- constructing a new CALL_EXPR. */
- if (call_expr_nargs (exp) == 1)
+ if (bif_is_ldvec (*bifaddr))
{
- exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
- 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return ldv_expand_builtin (target, icode, op, mode[0]);
}
- break;
- /* For the pack and unpack int128 routines, fix up the builtin so it
- uses the correct IBM128 type. */
- case MISC_BUILTIN_PACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ if (bif_is_mma (*bifaddr))
+ return new_mma_expand_builtin (exp, target, fcode, icode);
+
+ if (fcode == MISC_BUILTIN_PACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_packtf;
fcode = MISC_BUILTIN_PACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
-
- case MISC_BUILTIN_UNPACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ else if (fcode == MISC_BUILTIN_UNPACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_unpacktf;
fcode = MISC_BUILTIN_UNPACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
- default:
- break;
- }
+ if (target == 0
+ || GET_MODE (target) != mode[0]
+ || !(*insn_data[icode].operand[0].predicate) (target, mode[0]))
+ target = gen_reg_rtx (mode[0]);
- if (TARGET_MMA)
- {
- ret = mma_expand_builtin (exp, target, &success);
+ for (int i = 0; i < nargs; i++)
+ if (! (*insn_data[icode].operand[i+1].predicate) (op[i], mode[i+1]))
+ op[i] = copy_to_mode_reg (mode[i+1], op[i]);
- if (success)
- return ret;
- }
- if (TARGET_ALTIVEC)
- {
- ret = altivec_expand_builtin (exp, target, &success);
+ switch (nargs)
+ {
+ default:
+ gcc_assert (MAX_BUILTIN_ARGS == 5);
+ gcc_unreachable ();
+ case 0:
+ pat = GEN_FCN (icode) (target);
+ break;
+ case 1:
+ pat = GEN_FCN (icode) (target, op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
+ break;
+ }
- if (success)
- return ret;
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
}
- if (TARGET_HTM)
+ else
{
- ret = htm_expand_builtin (exp, target, &success);
+ switch (fcode)
+ {
+ case RS6000_BUILTIN_RECIP:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
- if (success)
- return ret;
- }
+ case RS6000_BUILTIN_RECIPF:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
- unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_OPND_MASK;
- /* RS6000_BTC_SPECIAL represents no-operand operators. */
- gcc_assert (attr == RS6000_BTC_UNARY
- || attr == RS6000_BTC_BINARY
- || attr == RS6000_BTC_TERNARY
- || attr == RS6000_BTC_QUATERNARY
- || attr == RS6000_BTC_SPECIAL);
-
- /* Handle simple unary operations. */
- d = bdesc_1arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_unop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRTF:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
- /* Handle simple binary operations. */
- d = bdesc_2arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_binop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRT:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
- /* Handle simple ternary operations. */
- d = bdesc_3arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_ternop_builtin (icode, exp, target);
+ case POWER7_BUILTIN_BPERMD:
+ return rs6000_expand_binop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_bpermd_di
+ : CODE_FOR_bpermd_si),
+ exp, target);
- /* Handle simple quaternary operations. */
- d = bdesc_4arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_quaternop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_GET_TB:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
+ target);
- /* Handle simple no-argument operations. */
- d = bdesc_0arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_zeroop_builtin (icode, target);
+ case RS6000_BUILTIN_MFTB:
+ return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_rs6000_mftb_di
+ : CODE_FOR_rs6000_mftb_si),
+ target);
- gcc_unreachable ();
+ case RS6000_BUILTIN_MFFS:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+
+ case RS6000_BUILTIN_MTFSB0:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+
+ case RS6000_BUILTIN_MTFSB1:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_RN:
+ return rs6000_expand_set_fpscr_rn_builtin
+ (CODE_FOR_rs6000_set_fpscr_rn, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_DRN:
+ return
+ rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
+ exp);
+
+ case RS6000_BUILTIN_MFFSL:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+
+ case RS6000_BUILTIN_MTFSF:
+ return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+
+ case RS6000_BUILTIN_CPU_INIT:
+ case RS6000_BUILTIN_CPU_IS:
+ case RS6000_BUILTIN_CPU_SUPPORTS:
+ return cpu_expand_builtin (fcode, exp, target);
+
+ case MISC_BUILTIN_SPEC_BARRIER:
+ {
+ emit_insn (gen_speculation_barrier ());
+ return NULL_RTX;
+ }
+
+ case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
+ case ALTIVEC_BUILTIN_MASK_FOR_STORE:
+ {
+ int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+ : (int) CODE_FOR_altivec_lvsl_direct);
+ machine_mode tmode = insn_data[icode2].operand[0].mode;
+ machine_mode mode = insn_data[icode2].operand[1].mode;
+ tree arg;
+ rtx op, addr, pat;
+
+ gcc_assert (TARGET_ALTIVEC);
+
+ arg = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
+ op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+ addr = memory_address (mode, op);
+ if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
+ op = addr;
+ else
+ {
+ /* For the load case need to negate the address. */
+ op = gen_reg_rtx (GET_MODE (addr));
+ emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr),
+ addr)));
+ }
+ op = gen_rtx_MEM (mode, op);
+
+ if (target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ pat = GEN_FCN (icode2) (target, op);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
+ }
+
+ case ALTIVEC_BUILTIN_VCFUX:
+ case ALTIVEC_BUILTIN_VCFSX:
+ case ALTIVEC_BUILTIN_VCTUXS:
+ case ALTIVEC_BUILTIN_VCTSXS:
+ /* #### Replace this nonsense with a separate built-in for the
+ vectorizer to use, which I believe is the only way we get
+ into this situation. */
+ /* FIXME: There's got to be a nicer way to handle this case than
+ constructing a new CALL_EXPR. */
+ if (call_expr_nargs (exp) == 1)
+ {
+ exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
+ 2, CALL_EXPR_ARG (exp, 0),
+ integer_zero_node);
+ }
+ break;
+
+ /* For the pack and unpack int128 routines, fix up the builtin so it
+ uses the correct IBM128 type. */
+ case MISC_BUILTIN_PACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_packtf;
+ fcode = MISC_BUILTIN_PACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ case MISC_BUILTIN_UNPACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_unpacktf;
+ fcode = MISC_BUILTIN_UNPACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (TARGET_MMA)
+ {
+ ret = mma_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_ALTIVEC)
+ {
+ ret = altivec_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_HTM)
+ {
+ ret = htm_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+
+ unsigned attr = (rs6000_builtin_info[uns_fcode].attr
+ & RS6000_BTC_TYPE_MASK);
+ /* RS6000_BTC_SPECIAL represents no-operand operators. */
+ gcc_assert (attr == RS6000_BTC_UNARY
+ || attr == RS6000_BTC_BINARY
+ || attr == RS6000_BTC_TERNARY
+ || attr == RS6000_BTC_QUATERNARY
+ || attr == RS6000_BTC_SPECIAL);
+
+ /* Handle simple unary operations. */
+ d = bdesc_1arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_unop_builtin (icode, exp, target);
+
+ /* Handle simple binary operations. */
+ d = bdesc_2arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_binop_builtin (icode, exp, target);
+
+ /* Handle simple ternary operations. */
+ d = bdesc_3arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_ternop_builtin (icode, exp, target);
+
+ /* Handle simple quaternary operations. */
+ d = bdesc_4arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_quaternop_builtin (icode, exp, target);
+
+ /* Handle simple no-argument operations. */
+ d = bdesc_0arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_zeroop_builtin (icode, target);
+
+ gcc_unreachable ();
+ }
}
/* Create a builtin vector type with a name. Taking care not to give
@@ -13515,7 +13929,10 @@ rs6000_init_builtins (void)
general purpose extensions (970 and newer) to allow the use of
the target attribute. */
if (TARGET_EXTRA_BUILTINS)
- altivec_init_builtins ();
+ {
+ altivec_init_builtins ();
+ mma_init_builtins ();
+ }
if (TARGET_HTM)
htm_init_builtins ();
^ permalink raw reply [flat|nested] 10+ messages in thread
* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Support two builtin expansion algorithms
@ 2020-08-28 20:09 William Schmidt
0 siblings, 0 replies; 10+ messages in thread
From: William Schmidt @ 2020-08-28 20:09 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:d15c024cd811d91c3c4c75ed1f4924ed71e0a656
commit d15c024cd811d91c3c4c75ed1f4924ed71e0a656
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date: Wed Jun 17 12:10:56 2020 -0500
rs6000: Support two builtin expansion algorithms
Note: For future rebasing, put new_mma_expand_builtin into the
previous patch.
2020-07-26 Bill Schmidt <wschmidt@linux.ibm.com>
* config/rs6000/rs6000-call.c (new_mma_expand_builtin): New.
(rs6000_expand_builtin): Support two kinds of masks and icodes;
use a new algorithm under control of new_builtins_are_live,
while still supporting the old one; do lazy enablement to
support #pragma target.
Diff:
---
gcc/config/rs6000/rs6000-call.c | 737 +++++++++++++++++++++++++++++++---------
1 file changed, 574 insertions(+), 163 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 6b2723884af..37e69bcf650 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9977,7 +9977,6 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
/* Expand the MMA built-in in EXP.
Store true in *EXPANDEDP if we found a built-in to expand. */
-
static rtx
mma_expand_builtin (tree exp, rtx target, bool *expandedp)
{
@@ -10102,6 +10101,110 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
return target;
}
+/* Expand the MMA built-in in EXP. */
+static rtx
+new_mma_expand_builtin (tree exp, rtx target, rs6000_builtins fcode,
+ insn_code icode)
+{
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ tree arg;
+ call_expr_arg_iterator iter;
+ const struct insn_operand_data *insn_op;
+ rtx op[MAX_MMA_OPERANDS];
+ unsigned nopnds = 0;
+ /* ### This attr/void_func business needs replacing. */
+ unsigned attr = rs6000_builtin_info[fcode].attr;
+ bool void_func = (attr & RS6000_BTC_VOID);
+ machine_mode tmode = VOIDmode;
+
+ if (TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node)
+ {
+ tmode = insn_data[icode].operand[0].mode;
+ if (!target
+ || GET_MODE (target) != tmode
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ op[nopnds++] = target;
+ }
+ else
+ target = const0_rtx;
+
+ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+ {
+ if (arg == error_mark_node)
+ return const0_rtx;
+
+ rtx opnd;
+ insn_op = &insn_data[icode].operand[nopnds];
+ if (TREE_CODE (arg) == ADDR_EXPR
+ && MEM_P (DECL_RTL (TREE_OPERAND (arg, 0))))
+ opnd = DECL_RTL (TREE_OPERAND (arg, 0));
+ else
+ opnd = expand_normal (arg);
+
+ if (!(*insn_op->predicate) (opnd, insn_op->mode))
+ {
+ if (!strcmp (insn_op->constraint, "n"))
+ {
+ if (!CONST_INT_P (opnd))
+ error ("argument %d must be an unsigned literal", nopnds);
+ else
+ error ("argument %d is an unsigned literal that is "
+ "out of range", nopnds);
+ return const0_rtx;
+ }
+ opnd = copy_to_mode_reg (insn_op->mode, opnd);
+ }
+
+ /* Some MMA instructions have INOUT accumulator operands, so force
+ their target register to be the same as their input register. */
+ /* ### The void_func test needs to happen some other way,
+ probably with TREE_TYPE (TREE_TYPE (fndecl)) as earlier. */
+ if (!void_func
+ && nopnds == 1
+ && !strcmp (insn_op->constraint, "0")
+ && insn_op->mode == tmode
+ && REG_P (opnd)
+ && (*insn_data[icode].operand[0].predicate) (opnd, tmode))
+ target = op[0] = opnd;
+
+ op[nopnds++] = opnd;
+ }
+
+ rtx pat;
+ switch (nopnds)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
+ break;
+ case 6:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
+ break;
+ case 7:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5], op[6]);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (!pat)
+ return NULL_RTX;
+ emit_insn (pat);
+
+ return target;
+}
+
static rtx
stv_expand_builtin (insn_code icode, rtx *op,
machine_mode tmode, machine_mode smode)
@@ -13016,6 +13119,10 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ /* #### This needs to be rs6000_gen_builtins now. Can't make this
+ whole until the full set of builtins has been added, and there
+ is no overlap between the two enumerations, so we can run the
+ two in parallel. */
enum rs6000_builtins fcode
= (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
size_t uns_fcode = (size_t)fcode;
@@ -13023,9 +13130,18 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
size_t i;
rtx ret;
bool success;
- HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
+ HOST_WIDE_INT mask = (new_builtins_are_live
+ ? 0
+ : rs6000_builtin_info[uns_fcode].mask);
+ /*
+ bif_enable enable = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].enable
+ : (bif_enable) 0);
+ */
bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
- enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
+ enum insn_code icode = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].icode
+ : rs6000_builtin_info[uns_fcode].icode);
/* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
floating point type, depending on whether long double is the IBM extended
@@ -13056,7 +13172,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
}
- if (TARGET_DEBUG_BUILTIN)
+ if (!new_builtins_are_live && TARGET_DEBUG_BUILTIN)
{
const char *name1 = rs6000_builtin_info[uns_fcode].name;
const char *name2 = (icode != CODE_FOR_nothing)
@@ -13094,207 +13210,499 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
return expand_call (exp, target, ignore);
}
- switch (fcode)
+ if (new_builtins_are_live)
{
- case RS6000_BUILTIN_RECIP:
- return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
-
- case RS6000_BUILTIN_RECIPF:
- return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
+ bifdata *bifaddr = &rs6000_builtin_info_x[uns_fcode];
- case RS6000_BUILTIN_RSQRTF:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
-
- case RS6000_BUILTIN_RSQRT:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
-
- case POWER7_BUILTIN_BPERMD:
- return rs6000_expand_binop_builtin (((TARGET_64BIT)
- ? CODE_FOR_bpermd_di
- : CODE_FOR_bpermd_si), exp, target);
-
- case RS6000_BUILTIN_GET_TB:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
- target);
-
- case RS6000_BUILTIN_MFTB:
- return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
- ? CODE_FOR_rs6000_mftb_di
- : CODE_FOR_rs6000_mftb_si),
- target);
-
- case RS6000_BUILTIN_MFFS:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+ /* In case of "#pragma target" changes, we initialize all builtins
+ but check for actual availability during expand time. */
+ switch (bifaddr->enable)
+ {
+ default:
+ gcc_unreachable ();
+ case ENB_ALWAYS:
+ break;
+ case ENB_P5:
+ if (!TARGET_POPCNTB)
+ return const0_rtx;
+ break;
+ case ENB_P6:
+ if (!TARGET_CMPB)
+ return const0_rtx;
+ break;
+ case ENB_ALTIVEC:
+ if (!TARGET_ALTIVEC)
+ return const0_rtx;
+ break;
+ case ENB_VSX:
+ if (!TARGET_VSX)
+ return const0_rtx;
+ break;
+ case ENB_P7:
+ if (!TARGET_POPCNTD)
+ return const0_rtx;
+ break;
+ case ENB_P7_64:
+ if (!TARGET_POPCNTD || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P8:
+ if (!TARGET_DIRECT_MOVE)
+ return const0_rtx;
+ break;
+ case ENB_P8V:
+ if (!TARGET_P8_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_P9:
+ if (!TARGET_MODULO)
+ return const0_rtx;
+ break;
+ case ENB_P9_64:
+ if (!TARGET_MODULO || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P9V:
+ if (!TARGET_P9_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_IEEE128_HW:
+ if (!TARGET_FLOAT128_HW)
+ return const0_rtx;
+ break;
+ case ENB_DFP:
+ if (!TARGET_DFP)
+ return const0_rtx;
+ break;
+ case ENB_CRYPTO:
+ if (!TARGET_CRYPTO)
+ return const0_rtx;
+ break;
+ case ENB_HTM:
+ if (!TARGET_HTM)
+ return const0_rtx;
+ break;
+ case ENB_P10:
+ if (!TARGET_POWER10)
+ return const0_rtx;
+ break;
+ case ENB_MMA:
+ if (!TARGET_MMA)
+ return const0_rtx;
+ break;
+ };
- case RS6000_BUILTIN_MTFSB0:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+ if (bif_is_nosoft (*bifaddr)
+ && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
+ {
+ error ("%<%s%> not supported with %<-msoft-float%>",
+ bifaddr->bifname);
+ return const0_rtx;
+ }
- case RS6000_BUILTIN_MTFSB1:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+ if (bif_is_no32bit (*bifaddr) && TARGET_32BIT)
+ fatal_error (input_location,
+ "%<%s%> is not supported in 32-bit mode",
+ bifaddr->bifname);
- case RS6000_BUILTIN_SET_FPSCR_RN:
- return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
- exp);
+ if (bif_is_cpu (*bifaddr))
+ return cpu_expand_builtin (fcode, exp, target);
- case RS6000_BUILTIN_SET_FPSCR_DRN:
- return
- rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
- exp);
+ if (bif_is_init (*bifaddr))
+ return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
- case RS6000_BUILTIN_MFFSL:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+ if (bif_is_set (*bifaddr))
+ return altivec_expand_vec_set_builtin (exp);
- case RS6000_BUILTIN_MTFSF:
- return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+ if (bif_is_extract (*bifaddr))
+ return altivec_expand_vec_ext_builtin (exp, target);
- case RS6000_BUILTIN_CPU_INIT:
- case RS6000_BUILTIN_CPU_IS:
- case RS6000_BUILTIN_CPU_SUPPORTS:
- return cpu_expand_builtin (fcode, exp, target);
+ if (bif_is_predicate (*bifaddr))
+ return altivec_expand_predicate_builtin (icode, exp, target);
- case MISC_BUILTIN_SPEC_BARRIER:
- {
- emit_insn (gen_speculation_barrier ());
- return NULL_RTX;
- }
+ if (bif_is_htm (*bifaddr))
+ return new_htm_expand_builtin (bifaddr, fcode, exp, target);
- case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
- case ALTIVEC_BUILTIN_MASK_FOR_STORE:
- {
- int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
- : (int) CODE_FOR_altivec_lvsl_direct);
- machine_mode tmode = insn_data[icode2].operand[0].mode;
- machine_mode mode = insn_data[icode2].operand[1].mode;
- tree arg;
- rtx op, addr, pat;
+ rtx pat;
+ const int MAX_BUILTIN_ARGS = 5;
+ tree arg[MAX_BUILTIN_ARGS];
+ rtx op[MAX_BUILTIN_ARGS];
+ machine_mode mode[MAX_BUILTIN_ARGS + 1];
- gcc_assert (TARGET_ALTIVEC);
+ int nargs = bifaddr->nargs;
+ gcc_assert (nargs <= MAX_BUILTIN_ARGS);
- arg = CALL_EXPR_ARG (exp, 0);
- gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
- op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
- addr = memory_address (mode, op);
- if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
- op = addr;
- else
- {
- /* For the load case need to negate the address. */
- op = gen_reg_rtx (GET_MODE (addr));
- emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
- }
- op = gen_rtx_MEM (mode, op);
+ mode[0] = insn_data[icode].operand[0].mode;
+ for (int i = 0; i < nargs; i++)
+ {
+ arg[i] = CALL_EXPR_ARG (exp, i);
+ if (arg[i] == error_mark_node)
+ return const0_rtx;
+ op[i] = expand_normal (arg[i]);
+ mode[i+1] = insn_data[icode].operand[i+1].mode;
+ }
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
+ /* Check for restricted constant arguments. */
+ for (int i = 0; i < 2; i++)
+ {
+ switch (bifaddr->restr[i])
+ {
+ default:
+ case RES_NONE:
+ break;
+ case RES_BITS:
+ {
+ size_t mask = (1 << bifaddr->restr_val1[i]) - 1;
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || TREE_INT_CST_LOW (restr_arg) & ~mask)
+ {
+ error ("argument %d must be a %d-bit unsigned literal",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a literal between %d and %d,"
+ " inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VAR_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) == INTEGER_CST
+ && !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a variable or a literal "
+ "between %d and %d, inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VALUES:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || (tree_to_shwi (restr_arg) != bifaddr->restr_val1[i]
+ && tree_to_shwi (restr_arg) != bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be either a literal %d or a "
+ "literal %d",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ }
+ }
- pat = GEN_FCN (icode2) (target, op);
- if (!pat)
- return 0;
- emit_insn (pat);
+ if (bif_is_ldstmask (*bifaddr))
+ return rs6000_expand_ldst_mask (target, fcode, arg[0]);
- return target;
- }
+ if (bif_is_stvec (*bifaddr))
+ {
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return stv_expand_builtin (icode, op, mode[0], mode[1]);
+ }
- case ALTIVEC_BUILTIN_VCFUX:
- case ALTIVEC_BUILTIN_VCFSX:
- case ALTIVEC_BUILTIN_VCTUXS:
- case ALTIVEC_BUILTIN_VCTSXS:
- /* FIXME: There's got to be a nicer way to handle this case than
- constructing a new CALL_EXPR. */
- if (call_expr_nargs (exp) == 1)
+ if (bif_is_ldvec (*bifaddr))
{
- exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
- 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return ldv_expand_builtin (target, icode, op, mode[0]);
}
- break;
- /* For the pack and unpack int128 routines, fix up the builtin so it
- uses the correct IBM128 type. */
- case MISC_BUILTIN_PACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ if (bif_is_mma (*bifaddr))
+ return new_mma_expand_builtin (exp, target, fcode, icode);
+
+ if (fcode == MISC_BUILTIN_PACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_packtf;
fcode = MISC_BUILTIN_PACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
-
- case MISC_BUILTIN_UNPACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ else if (fcode == MISC_BUILTIN_UNPACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_unpacktf;
fcode = MISC_BUILTIN_UNPACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
- default:
- break;
- }
+ if (target == 0
+ || GET_MODE (target) != mode[0]
+ || !(*insn_data[icode].operand[0].predicate) (target, mode[0]))
+ target = gen_reg_rtx (mode[0]);
- if (TARGET_MMA)
- {
- ret = mma_expand_builtin (exp, target, &success);
+ for (int i = 0; i < nargs; i++)
+ if (! (*insn_data[icode].operand[i+1].predicate) (op[i], mode[i+1]))
+ op[i] = copy_to_mode_reg (mode[i+1], op[i]);
- if (success)
- return ret;
- }
- if (TARGET_ALTIVEC)
- {
- ret = altivec_expand_builtin (exp, target, &success);
+ switch (nargs)
+ {
+ default:
+ gcc_assert (MAX_BUILTIN_ARGS == 5);
+ gcc_unreachable ();
+ case 0:
+ pat = GEN_FCN (icode) (target);
+ break;
+ case 1:
+ pat = GEN_FCN (icode) (target, op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
+ break;
+ }
- if (success)
- return ret;
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
}
- if (TARGET_HTM)
+ else
{
- ret = htm_expand_builtin (exp, target, &success);
+ switch (fcode)
+ {
+ case RS6000_BUILTIN_RECIP:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
- if (success)
- return ret;
- }
+ case RS6000_BUILTIN_RECIPF:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
- unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_OPND_MASK;
- /* RS6000_BTC_SPECIAL represents no-operand operators. */
- gcc_assert (attr == RS6000_BTC_UNARY
- || attr == RS6000_BTC_BINARY
- || attr == RS6000_BTC_TERNARY
- || attr == RS6000_BTC_QUATERNARY
- || attr == RS6000_BTC_SPECIAL);
-
- /* Handle simple unary operations. */
- d = bdesc_1arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_unop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRTF:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
- /* Handle simple binary operations. */
- d = bdesc_2arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_binop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRT:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
- /* Handle simple ternary operations. */
- d = bdesc_3arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_ternop_builtin (icode, exp, target);
+ case POWER7_BUILTIN_BPERMD:
+ return rs6000_expand_binop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_bpermd_di
+ : CODE_FOR_bpermd_si),
+ exp, target);
- /* Handle simple quaternary operations. */
- d = bdesc_4arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_quaternop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_GET_TB:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
+ target);
- /* Handle simple no-argument operations. */
- d = bdesc_0arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_zeroop_builtin (icode, target);
+ case RS6000_BUILTIN_MFTB:
+ return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_rs6000_mftb_di
+ : CODE_FOR_rs6000_mftb_si),
+ target);
- gcc_unreachable ();
+ case RS6000_BUILTIN_MFFS:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+
+ case RS6000_BUILTIN_MTFSB0:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+
+ case RS6000_BUILTIN_MTFSB1:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_RN:
+ return rs6000_expand_set_fpscr_rn_builtin
+ (CODE_FOR_rs6000_set_fpscr_rn, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_DRN:
+ return
+ rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
+ exp);
+
+ case RS6000_BUILTIN_MFFSL:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+
+ case RS6000_BUILTIN_MTFSF:
+ return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+
+ case RS6000_BUILTIN_CPU_INIT:
+ case RS6000_BUILTIN_CPU_IS:
+ case RS6000_BUILTIN_CPU_SUPPORTS:
+ return cpu_expand_builtin (fcode, exp, target);
+
+ case MISC_BUILTIN_SPEC_BARRIER:
+ {
+ emit_insn (gen_speculation_barrier ());
+ return NULL_RTX;
+ }
+
+ case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
+ case ALTIVEC_BUILTIN_MASK_FOR_STORE:
+ {
+ int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+ : (int) CODE_FOR_altivec_lvsl_direct);
+ machine_mode tmode = insn_data[icode2].operand[0].mode;
+ machine_mode mode = insn_data[icode2].operand[1].mode;
+ tree arg;
+ rtx op, addr, pat;
+
+ gcc_assert (TARGET_ALTIVEC);
+
+ arg = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
+ op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+ addr = memory_address (mode, op);
+ if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
+ op = addr;
+ else
+ {
+ /* For the load case need to negate the address. */
+ op = gen_reg_rtx (GET_MODE (addr));
+ emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr),
+ addr)));
+ }
+ op = gen_rtx_MEM (mode, op);
+
+ if (target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ pat = GEN_FCN (icode2) (target, op);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
+ }
+
+ case ALTIVEC_BUILTIN_VCFUX:
+ case ALTIVEC_BUILTIN_VCFSX:
+ case ALTIVEC_BUILTIN_VCTUXS:
+ case ALTIVEC_BUILTIN_VCTSXS:
+ /* #### Replace this nonsense with a separate built-in for the
+ vectorizer to use, which I believe is the only way we get
+ into this situation. */
+ /* FIXME: There's got to be a nicer way to handle this case than
+ constructing a new CALL_EXPR. */
+ if (call_expr_nargs (exp) == 1)
+ {
+ exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
+ 2, CALL_EXPR_ARG (exp, 0),
+ integer_zero_node);
+ }
+ break;
+
+ /* For the pack and unpack int128 routines, fix up the builtin so it
+ uses the correct IBM128 type. */
+ case MISC_BUILTIN_PACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_packtf;
+ fcode = MISC_BUILTIN_PACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ case MISC_BUILTIN_UNPACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_unpacktf;
+ fcode = MISC_BUILTIN_UNPACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (TARGET_MMA)
+ {
+ ret = mma_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_ALTIVEC)
+ {
+ ret = altivec_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_HTM)
+ {
+ ret = htm_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+
+ unsigned attr = (rs6000_builtin_info[uns_fcode].attr
+ & RS6000_BTC_TYPE_MASK);
+ /* RS6000_BTC_SPECIAL represents no-operand operators. */
+ gcc_assert (attr == RS6000_BTC_UNARY
+ || attr == RS6000_BTC_BINARY
+ || attr == RS6000_BTC_TERNARY
+ || attr == RS6000_BTC_QUATERNARY
+ || attr == RS6000_BTC_SPECIAL);
+
+ /* Handle simple unary operations. */
+ d = bdesc_1arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_unop_builtin (icode, exp, target);
+
+ /* Handle simple binary operations. */
+ d = bdesc_2arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_binop_builtin (icode, exp, target);
+
+ /* Handle simple ternary operations. */
+ d = bdesc_3arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_ternop_builtin (icode, exp, target);
+
+ /* Handle simple quaternary operations. */
+ d = bdesc_4arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_quaternop_builtin (icode, exp, target);
+
+ /* Handle simple no-argument operations. */
+ d = bdesc_0arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_zeroop_builtin (icode, target);
+
+ gcc_unreachable ();
+ }
}
/* Create a builtin vector type with a name. Taking care not to give
@@ -13525,7 +13933,10 @@ rs6000_init_builtins (void)
general purpose extensions (970 and newer) to allow the use of
the target attribute. */
if (TARGET_EXTRA_BUILTINS)
- altivec_init_builtins ();
+ {
+ altivec_init_builtins ();
+ mma_init_builtins ();
+ }
if (TARGET_HTM)
htm_init_builtins ();
^ permalink raw reply [flat|nested] 10+ messages in thread
* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Support two builtin expansion algorithms
@ 2020-08-20 16:40 William Schmidt
0 siblings, 0 replies; 10+ messages in thread
From: William Schmidt @ 2020-08-20 16:40 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:e04aa1d35f37e11a62eb51e884cbfc1eff8fedcb
commit e04aa1d35f37e11a62eb51e884cbfc1eff8fedcb
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date: Wed Jun 17 12:10:56 2020 -0500
rs6000: Support two builtin expansion algorithms
Note: For future rebasing, put new_mma_expand_builtin into the
previous patch.
2020-07-26 Bill Schmidt <wschmidt@linux.ibm.com>
* config/rs6000/rs6000-call.c (new_mma_expand_builtin): New.
(rs6000_expand_builtin): Support two kinds of masks and icodes;
use a new algorithm under control of new_builtins_are_live,
while still supporting the old one; do lazy enablement to
support #pragma target.
Diff:
---
gcc/config/rs6000/rs6000-call.c | 737 +++++++++++++++++++++++++++++++---------
1 file changed, 574 insertions(+), 163 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 6b2723884af..37e69bcf650 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9977,7 +9977,6 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
/* Expand the MMA built-in in EXP.
Store true in *EXPANDEDP if we found a built-in to expand. */
-
static rtx
mma_expand_builtin (tree exp, rtx target, bool *expandedp)
{
@@ -10102,6 +10101,110 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
return target;
}
+/* Expand the MMA built-in in EXP. */
+static rtx
+new_mma_expand_builtin (tree exp, rtx target, rs6000_builtins fcode,
+ insn_code icode)
+{
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ tree arg;
+ call_expr_arg_iterator iter;
+ const struct insn_operand_data *insn_op;
+ rtx op[MAX_MMA_OPERANDS];
+ unsigned nopnds = 0;
+ /* ### This attr/void_func business needs replacing. */
+ unsigned attr = rs6000_builtin_info[fcode].attr;
+ bool void_func = (attr & RS6000_BTC_VOID);
+ machine_mode tmode = VOIDmode;
+
+ if (TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node)
+ {
+ tmode = insn_data[icode].operand[0].mode;
+ if (!target
+ || GET_MODE (target) != tmode
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ op[nopnds++] = target;
+ }
+ else
+ target = const0_rtx;
+
+ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+ {
+ if (arg == error_mark_node)
+ return const0_rtx;
+
+ rtx opnd;
+ insn_op = &insn_data[icode].operand[nopnds];
+ if (TREE_CODE (arg) == ADDR_EXPR
+ && MEM_P (DECL_RTL (TREE_OPERAND (arg, 0))))
+ opnd = DECL_RTL (TREE_OPERAND (arg, 0));
+ else
+ opnd = expand_normal (arg);
+
+ if (!(*insn_op->predicate) (opnd, insn_op->mode))
+ {
+ if (!strcmp (insn_op->constraint, "n"))
+ {
+ if (!CONST_INT_P (opnd))
+ error ("argument %d must be an unsigned literal", nopnds);
+ else
+ error ("argument %d is an unsigned literal that is "
+ "out of range", nopnds);
+ return const0_rtx;
+ }
+ opnd = copy_to_mode_reg (insn_op->mode, opnd);
+ }
+
+ /* Some MMA instructions have INOUT accumulator operands, so force
+ their target register to be the same as their input register. */
+ /* ### The void_func test needs to happen some other way,
+ probably with TREE_TYPE (TREE_TYPE (fndecl)) as earlier. */
+ if (!void_func
+ && nopnds == 1
+ && !strcmp (insn_op->constraint, "0")
+ && insn_op->mode == tmode
+ && REG_P (opnd)
+ && (*insn_data[icode].operand[0].predicate) (opnd, tmode))
+ target = op[0] = opnd;
+
+ op[nopnds++] = opnd;
+ }
+
+ rtx pat;
+ switch (nopnds)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
+ break;
+ case 6:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
+ break;
+ case 7:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5], op[6]);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (!pat)
+ return NULL_RTX;
+ emit_insn (pat);
+
+ return target;
+}
+
static rtx
stv_expand_builtin (insn_code icode, rtx *op,
machine_mode tmode, machine_mode smode)
@@ -13016,6 +13119,10 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ /* #### This needs to be rs6000_gen_builtins now. Can't make this
+ whole until the full set of builtins has been added, and there
+ is no overlap between the two enumerations, so we can run the
+ two in parallel. */
enum rs6000_builtins fcode
= (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
size_t uns_fcode = (size_t)fcode;
@@ -13023,9 +13130,18 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
size_t i;
rtx ret;
bool success;
- HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
+ HOST_WIDE_INT mask = (new_builtins_are_live
+ ? 0
+ : rs6000_builtin_info[uns_fcode].mask);
+ /*
+ bif_enable enable = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].enable
+ : (bif_enable) 0);
+ */
bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
- enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
+ enum insn_code icode = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].icode
+ : rs6000_builtin_info[uns_fcode].icode);
/* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
floating point type, depending on whether long double is the IBM extended
@@ -13056,7 +13172,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
}
- if (TARGET_DEBUG_BUILTIN)
+ if (!new_builtins_are_live && TARGET_DEBUG_BUILTIN)
{
const char *name1 = rs6000_builtin_info[uns_fcode].name;
const char *name2 = (icode != CODE_FOR_nothing)
@@ -13094,207 +13210,499 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
return expand_call (exp, target, ignore);
}
- switch (fcode)
+ if (new_builtins_are_live)
{
- case RS6000_BUILTIN_RECIP:
- return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
-
- case RS6000_BUILTIN_RECIPF:
- return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
+ bifdata *bifaddr = &rs6000_builtin_info_x[uns_fcode];
- case RS6000_BUILTIN_RSQRTF:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
-
- case RS6000_BUILTIN_RSQRT:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
-
- case POWER7_BUILTIN_BPERMD:
- return rs6000_expand_binop_builtin (((TARGET_64BIT)
- ? CODE_FOR_bpermd_di
- : CODE_FOR_bpermd_si), exp, target);
-
- case RS6000_BUILTIN_GET_TB:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
- target);
-
- case RS6000_BUILTIN_MFTB:
- return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
- ? CODE_FOR_rs6000_mftb_di
- : CODE_FOR_rs6000_mftb_si),
- target);
-
- case RS6000_BUILTIN_MFFS:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+ /* In case of "#pragma target" changes, we initialize all builtins
+ but check for actual availability during expand time. */
+ switch (bifaddr->enable)
+ {
+ default:
+ gcc_unreachable ();
+ case ENB_ALWAYS:
+ break;
+ case ENB_P5:
+ if (!TARGET_POPCNTB)
+ return const0_rtx;
+ break;
+ case ENB_P6:
+ if (!TARGET_CMPB)
+ return const0_rtx;
+ break;
+ case ENB_ALTIVEC:
+ if (!TARGET_ALTIVEC)
+ return const0_rtx;
+ break;
+ case ENB_VSX:
+ if (!TARGET_VSX)
+ return const0_rtx;
+ break;
+ case ENB_P7:
+ if (!TARGET_POPCNTD)
+ return const0_rtx;
+ break;
+ case ENB_P7_64:
+ if (!TARGET_POPCNTD || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P8:
+ if (!TARGET_DIRECT_MOVE)
+ return const0_rtx;
+ break;
+ case ENB_P8V:
+ if (!TARGET_P8_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_P9:
+ if (!TARGET_MODULO)
+ return const0_rtx;
+ break;
+ case ENB_P9_64:
+ if (!TARGET_MODULO || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P9V:
+ if (!TARGET_P9_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_IEEE128_HW:
+ if (!TARGET_FLOAT128_HW)
+ return const0_rtx;
+ break;
+ case ENB_DFP:
+ if (!TARGET_DFP)
+ return const0_rtx;
+ break;
+ case ENB_CRYPTO:
+ if (!TARGET_CRYPTO)
+ return const0_rtx;
+ break;
+ case ENB_HTM:
+ if (!TARGET_HTM)
+ return const0_rtx;
+ break;
+ case ENB_P10:
+ if (!TARGET_POWER10)
+ return const0_rtx;
+ break;
+ case ENB_MMA:
+ if (!TARGET_MMA)
+ return const0_rtx;
+ break;
+ };
- case RS6000_BUILTIN_MTFSB0:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+ if (bif_is_nosoft (*bifaddr)
+ && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
+ {
+ error ("%<%s%> not supported with %<-msoft-float%>",
+ bifaddr->bifname);
+ return const0_rtx;
+ }
- case RS6000_BUILTIN_MTFSB1:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+ if (bif_is_no32bit (*bifaddr) && TARGET_32BIT)
+ fatal_error (input_location,
+ "%<%s%> is not supported in 32-bit mode",
+ bifaddr->bifname);
- case RS6000_BUILTIN_SET_FPSCR_RN:
- return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
- exp);
+ if (bif_is_cpu (*bifaddr))
+ return cpu_expand_builtin (fcode, exp, target);
- case RS6000_BUILTIN_SET_FPSCR_DRN:
- return
- rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
- exp);
+ if (bif_is_init (*bifaddr))
+ return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
- case RS6000_BUILTIN_MFFSL:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+ if (bif_is_set (*bifaddr))
+ return altivec_expand_vec_set_builtin (exp);
- case RS6000_BUILTIN_MTFSF:
- return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+ if (bif_is_extract (*bifaddr))
+ return altivec_expand_vec_ext_builtin (exp, target);
- case RS6000_BUILTIN_CPU_INIT:
- case RS6000_BUILTIN_CPU_IS:
- case RS6000_BUILTIN_CPU_SUPPORTS:
- return cpu_expand_builtin (fcode, exp, target);
+ if (bif_is_predicate (*bifaddr))
+ return altivec_expand_predicate_builtin (icode, exp, target);
- case MISC_BUILTIN_SPEC_BARRIER:
- {
- emit_insn (gen_speculation_barrier ());
- return NULL_RTX;
- }
+ if (bif_is_htm (*bifaddr))
+ return new_htm_expand_builtin (bifaddr, fcode, exp, target);
- case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
- case ALTIVEC_BUILTIN_MASK_FOR_STORE:
- {
- int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
- : (int) CODE_FOR_altivec_lvsl_direct);
- machine_mode tmode = insn_data[icode2].operand[0].mode;
- machine_mode mode = insn_data[icode2].operand[1].mode;
- tree arg;
- rtx op, addr, pat;
+ rtx pat;
+ const int MAX_BUILTIN_ARGS = 5;
+ tree arg[MAX_BUILTIN_ARGS];
+ rtx op[MAX_BUILTIN_ARGS];
+ machine_mode mode[MAX_BUILTIN_ARGS + 1];
- gcc_assert (TARGET_ALTIVEC);
+ int nargs = bifaddr->nargs;
+ gcc_assert (nargs <= MAX_BUILTIN_ARGS);
- arg = CALL_EXPR_ARG (exp, 0);
- gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
- op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
- addr = memory_address (mode, op);
- if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
- op = addr;
- else
- {
- /* For the load case need to negate the address. */
- op = gen_reg_rtx (GET_MODE (addr));
- emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
- }
- op = gen_rtx_MEM (mode, op);
+ mode[0] = insn_data[icode].operand[0].mode;
+ for (int i = 0; i < nargs; i++)
+ {
+ arg[i] = CALL_EXPR_ARG (exp, i);
+ if (arg[i] == error_mark_node)
+ return const0_rtx;
+ op[i] = expand_normal (arg[i]);
+ mode[i+1] = insn_data[icode].operand[i+1].mode;
+ }
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
+ /* Check for restricted constant arguments. */
+ for (int i = 0; i < 2; i++)
+ {
+ switch (bifaddr->restr[i])
+ {
+ default:
+ case RES_NONE:
+ break;
+ case RES_BITS:
+ {
+ size_t mask = (1 << bifaddr->restr_val1[i]) - 1;
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || TREE_INT_CST_LOW (restr_arg) & ~mask)
+ {
+ error ("argument %d must be a %d-bit unsigned literal",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a literal between %d and %d,"
+ " inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VAR_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) == INTEGER_CST
+ && !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a variable or a literal "
+ "between %d and %d, inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VALUES:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || (tree_to_shwi (restr_arg) != bifaddr->restr_val1[i]
+ && tree_to_shwi (restr_arg) != bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be either a literal %d or a "
+ "literal %d",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ }
+ }
- pat = GEN_FCN (icode2) (target, op);
- if (!pat)
- return 0;
- emit_insn (pat);
+ if (bif_is_ldstmask (*bifaddr))
+ return rs6000_expand_ldst_mask (target, fcode, arg[0]);
- return target;
- }
+ if (bif_is_stvec (*bifaddr))
+ {
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return stv_expand_builtin (icode, op, mode[0], mode[1]);
+ }
- case ALTIVEC_BUILTIN_VCFUX:
- case ALTIVEC_BUILTIN_VCFSX:
- case ALTIVEC_BUILTIN_VCTUXS:
- case ALTIVEC_BUILTIN_VCTSXS:
- /* FIXME: There's got to be a nicer way to handle this case than
- constructing a new CALL_EXPR. */
- if (call_expr_nargs (exp) == 1)
+ if (bif_is_ldvec (*bifaddr))
{
- exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
- 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return ldv_expand_builtin (target, icode, op, mode[0]);
}
- break;
- /* For the pack and unpack int128 routines, fix up the builtin so it
- uses the correct IBM128 type. */
- case MISC_BUILTIN_PACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ if (bif_is_mma (*bifaddr))
+ return new_mma_expand_builtin (exp, target, fcode, icode);
+
+ if (fcode == MISC_BUILTIN_PACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_packtf;
fcode = MISC_BUILTIN_PACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
-
- case MISC_BUILTIN_UNPACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ else if (fcode == MISC_BUILTIN_UNPACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_unpacktf;
fcode = MISC_BUILTIN_UNPACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
- default:
- break;
- }
+ if (target == 0
+ || GET_MODE (target) != mode[0]
+ || !(*insn_data[icode].operand[0].predicate) (target, mode[0]))
+ target = gen_reg_rtx (mode[0]);
- if (TARGET_MMA)
- {
- ret = mma_expand_builtin (exp, target, &success);
+ for (int i = 0; i < nargs; i++)
+ if (! (*insn_data[icode].operand[i+1].predicate) (op[i], mode[i+1]))
+ op[i] = copy_to_mode_reg (mode[i+1], op[i]);
- if (success)
- return ret;
- }
- if (TARGET_ALTIVEC)
- {
- ret = altivec_expand_builtin (exp, target, &success);
+ switch (nargs)
+ {
+ default:
+ gcc_assert (MAX_BUILTIN_ARGS == 5);
+ gcc_unreachable ();
+ case 0:
+ pat = GEN_FCN (icode) (target);
+ break;
+ case 1:
+ pat = GEN_FCN (icode) (target, op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
+ break;
+ }
- if (success)
- return ret;
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
}
- if (TARGET_HTM)
+ else
{
- ret = htm_expand_builtin (exp, target, &success);
+ switch (fcode)
+ {
+ case RS6000_BUILTIN_RECIP:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
- if (success)
- return ret;
- }
+ case RS6000_BUILTIN_RECIPF:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
- unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_OPND_MASK;
- /* RS6000_BTC_SPECIAL represents no-operand operators. */
- gcc_assert (attr == RS6000_BTC_UNARY
- || attr == RS6000_BTC_BINARY
- || attr == RS6000_BTC_TERNARY
- || attr == RS6000_BTC_QUATERNARY
- || attr == RS6000_BTC_SPECIAL);
-
- /* Handle simple unary operations. */
- d = bdesc_1arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_unop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRTF:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
- /* Handle simple binary operations. */
- d = bdesc_2arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_binop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRT:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
- /* Handle simple ternary operations. */
- d = bdesc_3arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_ternop_builtin (icode, exp, target);
+ case POWER7_BUILTIN_BPERMD:
+ return rs6000_expand_binop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_bpermd_di
+ : CODE_FOR_bpermd_si),
+ exp, target);
- /* Handle simple quaternary operations. */
- d = bdesc_4arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_quaternop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_GET_TB:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
+ target);
- /* Handle simple no-argument operations. */
- d = bdesc_0arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_zeroop_builtin (icode, target);
+ case RS6000_BUILTIN_MFTB:
+ return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_rs6000_mftb_di
+ : CODE_FOR_rs6000_mftb_si),
+ target);
- gcc_unreachable ();
+ case RS6000_BUILTIN_MFFS:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+
+ case RS6000_BUILTIN_MTFSB0:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+
+ case RS6000_BUILTIN_MTFSB1:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_RN:
+ return rs6000_expand_set_fpscr_rn_builtin
+ (CODE_FOR_rs6000_set_fpscr_rn, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_DRN:
+ return
+ rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
+ exp);
+
+ case RS6000_BUILTIN_MFFSL:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+
+ case RS6000_BUILTIN_MTFSF:
+ return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+
+ case RS6000_BUILTIN_CPU_INIT:
+ case RS6000_BUILTIN_CPU_IS:
+ case RS6000_BUILTIN_CPU_SUPPORTS:
+ return cpu_expand_builtin (fcode, exp, target);
+
+ case MISC_BUILTIN_SPEC_BARRIER:
+ {
+ emit_insn (gen_speculation_barrier ());
+ return NULL_RTX;
+ }
+
+ case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
+ case ALTIVEC_BUILTIN_MASK_FOR_STORE:
+ {
+ int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+ : (int) CODE_FOR_altivec_lvsl_direct);
+ machine_mode tmode = insn_data[icode2].operand[0].mode;
+ machine_mode mode = insn_data[icode2].operand[1].mode;
+ tree arg;
+ rtx op, addr, pat;
+
+ gcc_assert (TARGET_ALTIVEC);
+
+ arg = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
+ op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+ addr = memory_address (mode, op);
+ if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
+ op = addr;
+ else
+ {
+ /* For the load case need to negate the address. */
+ op = gen_reg_rtx (GET_MODE (addr));
+ emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr),
+ addr)));
+ }
+ op = gen_rtx_MEM (mode, op);
+
+ if (target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ pat = GEN_FCN (icode2) (target, op);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
+ }
+
+ case ALTIVEC_BUILTIN_VCFUX:
+ case ALTIVEC_BUILTIN_VCFSX:
+ case ALTIVEC_BUILTIN_VCTUXS:
+ case ALTIVEC_BUILTIN_VCTSXS:
+ /* #### Replace this nonsense with a separate built-in for the
+ vectorizer to use, which I believe is the only way we get
+ into this situation. */
+ /* FIXME: There's got to be a nicer way to handle this case than
+ constructing a new CALL_EXPR. */
+ if (call_expr_nargs (exp) == 1)
+ {
+ exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
+ 2, CALL_EXPR_ARG (exp, 0),
+ integer_zero_node);
+ }
+ break;
+
+ /* For the pack and unpack int128 routines, fix up the builtin so it
+ uses the correct IBM128 type. */
+ case MISC_BUILTIN_PACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_packtf;
+ fcode = MISC_BUILTIN_PACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ case MISC_BUILTIN_UNPACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_unpacktf;
+ fcode = MISC_BUILTIN_UNPACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (TARGET_MMA)
+ {
+ ret = mma_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_ALTIVEC)
+ {
+ ret = altivec_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_HTM)
+ {
+ ret = htm_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+
+ unsigned attr = (rs6000_builtin_info[uns_fcode].attr
+ & RS6000_BTC_TYPE_MASK);
+ /* RS6000_BTC_SPECIAL represents no-operand operators. */
+ gcc_assert (attr == RS6000_BTC_UNARY
+ || attr == RS6000_BTC_BINARY
+ || attr == RS6000_BTC_TERNARY
+ || attr == RS6000_BTC_QUATERNARY
+ || attr == RS6000_BTC_SPECIAL);
+
+ /* Handle simple unary operations. */
+ d = bdesc_1arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_unop_builtin (icode, exp, target);
+
+ /* Handle simple binary operations. */
+ d = bdesc_2arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_binop_builtin (icode, exp, target);
+
+ /* Handle simple ternary operations. */
+ d = bdesc_3arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_ternop_builtin (icode, exp, target);
+
+ /* Handle simple quaternary operations. */
+ d = bdesc_4arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_quaternop_builtin (icode, exp, target);
+
+ /* Handle simple no-argument operations. */
+ d = bdesc_0arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_zeroop_builtin (icode, target);
+
+ gcc_unreachable ();
+ }
}
/* Create a builtin vector type with a name. Taking care not to give
@@ -13525,7 +13933,10 @@ rs6000_init_builtins (void)
general purpose extensions (970 and newer) to allow the use of
the target attribute. */
if (TARGET_EXTRA_BUILTINS)
- altivec_init_builtins ();
+ {
+ altivec_init_builtins ();
+ mma_init_builtins ();
+ }
if (TARGET_HTM)
htm_init_builtins ();
^ permalink raw reply [flat|nested] 10+ messages in thread
* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Support two builtin expansion algorithms
@ 2020-08-18 18:46 William Schmidt
0 siblings, 0 replies; 10+ messages in thread
From: William Schmidt @ 2020-08-18 18:46 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:8812f83bb3671a8553cc19a0c693d2a9c781c279
commit 8812f83bb3671a8553cc19a0c693d2a9c781c279
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date: Wed Jun 17 12:10:56 2020 -0500
rs6000: Support two builtin expansion algorithms
Note: For future rebasing, put new_mma_expand_builtin into the
previous patch.
2020-07-26 Bill Schmidt <wschmidt@linux.ibm.com>
* config/rs6000/rs6000-call.c (new_mma_expand_builtin): New.
(rs6000_expand_builtin): Support two kinds of masks and icodes;
use a new algorithm under control of new_builtins_are_live,
while still supporting the old one; do lazy enablement to
support #pragma target.
Diff:
---
gcc/config/rs6000/rs6000-call.c | 737 +++++++++++++++++++++++++++++++---------
1 file changed, 574 insertions(+), 163 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index d46efce06bf..e9fc4a3d9df 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9977,7 +9977,6 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
/* Expand the MMA built-in in EXP.
Store true in *EXPANDEDP if we found a built-in to expand. */
-
static rtx
mma_expand_builtin (tree exp, rtx target, bool *expandedp)
{
@@ -10102,6 +10101,110 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
return target;
}
+/* Expand the MMA built-in in EXP. */
+static rtx
+new_mma_expand_builtin (tree exp, rtx target, rs6000_builtins fcode,
+ insn_code icode)
+{
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ tree arg;
+ call_expr_arg_iterator iter;
+ const struct insn_operand_data *insn_op;
+ rtx op[MAX_MMA_OPERANDS];
+ unsigned nopnds = 0;
+ /* ### This attr/void_func business needs replacing. */
+ unsigned attr = rs6000_builtin_info[fcode].attr;
+ bool void_func = (attr & RS6000_BTC_VOID);
+ machine_mode tmode = VOIDmode;
+
+ if (TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node)
+ {
+ tmode = insn_data[icode].operand[0].mode;
+ if (!target
+ || GET_MODE (target) != tmode
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ op[nopnds++] = target;
+ }
+ else
+ target = const0_rtx;
+
+ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+ {
+ if (arg == error_mark_node)
+ return const0_rtx;
+
+ rtx opnd;
+ insn_op = &insn_data[icode].operand[nopnds];
+ if (TREE_CODE (arg) == ADDR_EXPR
+ && MEM_P (DECL_RTL (TREE_OPERAND (arg, 0))))
+ opnd = DECL_RTL (TREE_OPERAND (arg, 0));
+ else
+ opnd = expand_normal (arg);
+
+ if (!(*insn_op->predicate) (opnd, insn_op->mode))
+ {
+ if (!strcmp (insn_op->constraint, "n"))
+ {
+ if (!CONST_INT_P (opnd))
+ error ("argument %d must be an unsigned literal", nopnds);
+ else
+ error ("argument %d is an unsigned literal that is "
+ "out of range", nopnds);
+ return const0_rtx;
+ }
+ opnd = copy_to_mode_reg (insn_op->mode, opnd);
+ }
+
+ /* Some MMA instructions have INOUT accumulator operands, so force
+ their target register to be the same as their input register. */
+ /* ### The void_func test needs to happen some other way,
+ probably with TREE_TYPE (TREE_TYPE (fndecl)) as earlier. */
+ if (!void_func
+ && nopnds == 1
+ && !strcmp (insn_op->constraint, "0")
+ && insn_op->mode == tmode
+ && REG_P (opnd)
+ && (*insn_data[icode].operand[0].predicate) (opnd, tmode))
+ target = op[0] = opnd;
+
+ op[nopnds++] = opnd;
+ }
+
+ rtx pat;
+ switch (nopnds)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
+ break;
+ case 6:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
+ break;
+ case 7:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5], op[6]);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (!pat)
+ return NULL_RTX;
+ emit_insn (pat);
+
+ return target;
+}
+
static rtx
stv_expand_builtin (insn_code icode, rtx *op,
machine_mode tmode, machine_mode smode)
@@ -13016,6 +13119,10 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ /* #### This needs to be rs6000_gen_builtins now. Can't make this
+ whole until the full set of builtins has been added, and there
+ is no overlap between the two enumerations, so we can run the
+ two in parallel. */
enum rs6000_builtins fcode
= (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
size_t uns_fcode = (size_t)fcode;
@@ -13023,9 +13130,18 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
size_t i;
rtx ret;
bool success;
- HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
+ HOST_WIDE_INT mask = (new_builtins_are_live
+ ? 0
+ : rs6000_builtin_info[uns_fcode].mask);
+ /*
+ bif_enable enable = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].enable
+ : (bif_enable) 0);
+ */
bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
- enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
+ enum insn_code icode = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].icode
+ : rs6000_builtin_info[uns_fcode].icode);
/* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
floating point type, depending on whether long double is the IBM extended
@@ -13056,7 +13172,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
}
- if (TARGET_DEBUG_BUILTIN)
+ if (!new_builtins_are_live && TARGET_DEBUG_BUILTIN)
{
const char *name1 = rs6000_builtin_info[uns_fcode].name;
const char *name2 = (icode != CODE_FOR_nothing)
@@ -13094,207 +13210,499 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
return expand_call (exp, target, ignore);
}
- switch (fcode)
+ if (new_builtins_are_live)
{
- case RS6000_BUILTIN_RECIP:
- return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
-
- case RS6000_BUILTIN_RECIPF:
- return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
+ bifdata *bifaddr = &rs6000_builtin_info_x[uns_fcode];
- case RS6000_BUILTIN_RSQRTF:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
-
- case RS6000_BUILTIN_RSQRT:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
-
- case POWER7_BUILTIN_BPERMD:
- return rs6000_expand_binop_builtin (((TARGET_64BIT)
- ? CODE_FOR_bpermd_di
- : CODE_FOR_bpermd_si), exp, target);
-
- case RS6000_BUILTIN_GET_TB:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
- target);
-
- case RS6000_BUILTIN_MFTB:
- return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
- ? CODE_FOR_rs6000_mftb_di
- : CODE_FOR_rs6000_mftb_si),
- target);
-
- case RS6000_BUILTIN_MFFS:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+ /* In case of "#pragma target" changes, we initialize all builtins
+ but check for actual availability during expand time. */
+ switch (bifaddr->enable)
+ {
+ default:
+ gcc_unreachable ();
+ case ENB_ALWAYS:
+ break;
+ case ENB_P5:
+ if (!TARGET_POPCNTB)
+ return const0_rtx;
+ break;
+ case ENB_P6:
+ if (!TARGET_CMPB)
+ return const0_rtx;
+ break;
+ case ENB_ALTIVEC:
+ if (!TARGET_ALTIVEC)
+ return const0_rtx;
+ break;
+ case ENB_VSX:
+ if (!TARGET_VSX)
+ return const0_rtx;
+ break;
+ case ENB_P7:
+ if (!TARGET_POPCNTD)
+ return const0_rtx;
+ break;
+ case ENB_P7_64:
+ if (!TARGET_POPCNTD || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P8:
+ if (!TARGET_DIRECT_MOVE)
+ return const0_rtx;
+ break;
+ case ENB_P8V:
+ if (!TARGET_P8_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_P9:
+ if (!TARGET_MODULO)
+ return const0_rtx;
+ break;
+ case ENB_P9_64:
+ if (!TARGET_MODULO || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P9V:
+ if (!TARGET_P9_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_IEEE128_HW:
+ if (!TARGET_FLOAT128_HW)
+ return const0_rtx;
+ break;
+ case ENB_DFP:
+ if (!TARGET_DFP)
+ return const0_rtx;
+ break;
+ case ENB_CRYPTO:
+ if (!TARGET_CRYPTO)
+ return const0_rtx;
+ break;
+ case ENB_HTM:
+ if (!TARGET_HTM)
+ return const0_rtx;
+ break;
+ case ENB_P10:
+ if (!TARGET_POWER10)
+ return const0_rtx;
+ break;
+ case ENB_MMA:
+ if (!TARGET_MMA)
+ return const0_rtx;
+ break;
+ };
- case RS6000_BUILTIN_MTFSB0:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+ if (bif_is_nosoft (*bifaddr)
+ && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
+ {
+ error ("%<%s%> not supported with %<-msoft-float%>",
+ bifaddr->bifname);
+ return const0_rtx;
+ }
- case RS6000_BUILTIN_MTFSB1:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+ if (bif_is_no32bit (*bifaddr) && TARGET_32BIT)
+ fatal_error (input_location,
+ "%<%s%> is not supported in 32-bit mode",
+ bifaddr->bifname);
- case RS6000_BUILTIN_SET_FPSCR_RN:
- return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
- exp);
+ if (bif_is_cpu (*bifaddr))
+ return cpu_expand_builtin (fcode, exp, target);
- case RS6000_BUILTIN_SET_FPSCR_DRN:
- return
- rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
- exp);
+ if (bif_is_init (*bifaddr))
+ return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
- case RS6000_BUILTIN_MFFSL:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+ if (bif_is_set (*bifaddr))
+ return altivec_expand_vec_set_builtin (exp);
- case RS6000_BUILTIN_MTFSF:
- return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+ if (bif_is_extract (*bifaddr))
+ return altivec_expand_vec_ext_builtin (exp, target);
- case RS6000_BUILTIN_CPU_INIT:
- case RS6000_BUILTIN_CPU_IS:
- case RS6000_BUILTIN_CPU_SUPPORTS:
- return cpu_expand_builtin (fcode, exp, target);
+ if (bif_is_predicate (*bifaddr))
+ return altivec_expand_predicate_builtin (icode, exp, target);
- case MISC_BUILTIN_SPEC_BARRIER:
- {
- emit_insn (gen_speculation_barrier ());
- return NULL_RTX;
- }
+ if (bif_is_htm (*bifaddr))
+ return new_htm_expand_builtin (bifaddr, fcode, exp, target);
- case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
- case ALTIVEC_BUILTIN_MASK_FOR_STORE:
- {
- int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
- : (int) CODE_FOR_altivec_lvsl_direct);
- machine_mode tmode = insn_data[icode2].operand[0].mode;
- machine_mode mode = insn_data[icode2].operand[1].mode;
- tree arg;
- rtx op, addr, pat;
+ rtx pat;
+ const int MAX_BUILTIN_ARGS = 5;
+ tree arg[MAX_BUILTIN_ARGS];
+ rtx op[MAX_BUILTIN_ARGS];
+ machine_mode mode[MAX_BUILTIN_ARGS + 1];
- gcc_assert (TARGET_ALTIVEC);
+ int nargs = bifaddr->nargs;
+ gcc_assert (nargs <= MAX_BUILTIN_ARGS);
- arg = CALL_EXPR_ARG (exp, 0);
- gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
- op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
- addr = memory_address (mode, op);
- if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
- op = addr;
- else
- {
- /* For the load case need to negate the address. */
- op = gen_reg_rtx (GET_MODE (addr));
- emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
- }
- op = gen_rtx_MEM (mode, op);
+ mode[0] = insn_data[icode].operand[0].mode;
+ for (int i = 0; i < nargs; i++)
+ {
+ arg[i] = CALL_EXPR_ARG (exp, i);
+ if (arg[i] == error_mark_node)
+ return const0_rtx;
+ op[i] = expand_normal (arg[i]);
+ mode[i+1] = insn_data[icode].operand[i+1].mode;
+ }
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
+ /* Check for restricted constant arguments. */
+ for (int i = 0; i < 2; i++)
+ {
+ switch (bifaddr->restr[i])
+ {
+ default:
+ case RES_NONE:
+ break;
+ case RES_BITS:
+ {
+ size_t mask = (1 << bifaddr->restr_val1[i]) - 1;
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || TREE_INT_CST_LOW (restr_arg) & ~mask)
+ {
+ error ("argument %d must be a %d-bit unsigned literal",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a literal between %d and %d,"
+ " inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VAR_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) == INTEGER_CST
+ && !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a variable or a literal "
+ "between %d and %d, inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VALUES:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || (tree_to_shwi (restr_arg) != bifaddr->restr_val1[i]
+ && tree_to_shwi (restr_arg) != bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be either a literal %d or a "
+ "literal %d",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ }
+ }
- pat = GEN_FCN (icode2) (target, op);
- if (!pat)
- return 0;
- emit_insn (pat);
+ if (bif_is_ldstmask (*bifaddr))
+ return rs6000_expand_ldst_mask (target, fcode, arg[0]);
- return target;
- }
+ if (bif_is_stvec (*bifaddr))
+ {
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return stv_expand_builtin (icode, op, mode[0], mode[1]);
+ }
- case ALTIVEC_BUILTIN_VCFUX:
- case ALTIVEC_BUILTIN_VCFSX:
- case ALTIVEC_BUILTIN_VCTUXS:
- case ALTIVEC_BUILTIN_VCTSXS:
- /* FIXME: There's got to be a nicer way to handle this case than
- constructing a new CALL_EXPR. */
- if (call_expr_nargs (exp) == 1)
+ if (bif_is_ldvec (*bifaddr))
{
- exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
- 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return ldv_expand_builtin (target, icode, op, mode[0]);
}
- break;
- /* For the pack and unpack int128 routines, fix up the builtin so it
- uses the correct IBM128 type. */
- case MISC_BUILTIN_PACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ if (bif_is_mma (*bifaddr))
+ return new_mma_expand_builtin (exp, target, fcode, icode);
+
+ if (fcode == MISC_BUILTIN_PACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_packtf;
fcode = MISC_BUILTIN_PACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
-
- case MISC_BUILTIN_UNPACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ else if (fcode == MISC_BUILTIN_UNPACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_unpacktf;
fcode = MISC_BUILTIN_UNPACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
- default:
- break;
- }
+ if (target == 0
+ || GET_MODE (target) != mode[0]
+ || !(*insn_data[icode].operand[0].predicate) (target, mode[0]))
+ target = gen_reg_rtx (mode[0]);
- if (TARGET_MMA)
- {
- ret = mma_expand_builtin (exp, target, &success);
+ for (int i = 0; i < nargs; i++)
+ if (! (*insn_data[icode].operand[i+1].predicate) (op[i], mode[i+1]))
+ op[i] = copy_to_mode_reg (mode[i+1], op[i]);
- if (success)
- return ret;
- }
- if (TARGET_ALTIVEC)
- {
- ret = altivec_expand_builtin (exp, target, &success);
+ switch (nargs)
+ {
+ default:
+ gcc_assert (MAX_BUILTIN_ARGS == 5);
+ gcc_unreachable ();
+ case 0:
+ pat = GEN_FCN (icode) (target);
+ break;
+ case 1:
+ pat = GEN_FCN (icode) (target, op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
+ break;
+ }
- if (success)
- return ret;
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
}
- if (TARGET_HTM)
+ else
{
- ret = htm_expand_builtin (exp, target, &success);
+ switch (fcode)
+ {
+ case RS6000_BUILTIN_RECIP:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
- if (success)
- return ret;
- }
+ case RS6000_BUILTIN_RECIPF:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
- unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_OPND_MASK;
- /* RS6000_BTC_SPECIAL represents no-operand operators. */
- gcc_assert (attr == RS6000_BTC_UNARY
- || attr == RS6000_BTC_BINARY
- || attr == RS6000_BTC_TERNARY
- || attr == RS6000_BTC_QUATERNARY
- || attr == RS6000_BTC_SPECIAL);
-
- /* Handle simple unary operations. */
- d = bdesc_1arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_unop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRTF:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
- /* Handle simple binary operations. */
- d = bdesc_2arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_binop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRT:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
- /* Handle simple ternary operations. */
- d = bdesc_3arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_ternop_builtin (icode, exp, target);
+ case POWER7_BUILTIN_BPERMD:
+ return rs6000_expand_binop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_bpermd_di
+ : CODE_FOR_bpermd_si),
+ exp, target);
- /* Handle simple quaternary operations. */
- d = bdesc_4arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_quaternop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_GET_TB:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
+ target);
- /* Handle simple no-argument operations. */
- d = bdesc_0arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_zeroop_builtin (icode, target);
+ case RS6000_BUILTIN_MFTB:
+ return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_rs6000_mftb_di
+ : CODE_FOR_rs6000_mftb_si),
+ target);
- gcc_unreachable ();
+ case RS6000_BUILTIN_MFFS:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+
+ case RS6000_BUILTIN_MTFSB0:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+
+ case RS6000_BUILTIN_MTFSB1:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_RN:
+ return rs6000_expand_set_fpscr_rn_builtin
+ (CODE_FOR_rs6000_set_fpscr_rn, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_DRN:
+ return
+ rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
+ exp);
+
+ case RS6000_BUILTIN_MFFSL:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+
+ case RS6000_BUILTIN_MTFSF:
+ return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+
+ case RS6000_BUILTIN_CPU_INIT:
+ case RS6000_BUILTIN_CPU_IS:
+ case RS6000_BUILTIN_CPU_SUPPORTS:
+ return cpu_expand_builtin (fcode, exp, target);
+
+ case MISC_BUILTIN_SPEC_BARRIER:
+ {
+ emit_insn (gen_speculation_barrier ());
+ return NULL_RTX;
+ }
+
+ case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
+ case ALTIVEC_BUILTIN_MASK_FOR_STORE:
+ {
+ int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+ : (int) CODE_FOR_altivec_lvsl_direct);
+ machine_mode tmode = insn_data[icode2].operand[0].mode;
+ machine_mode mode = insn_data[icode2].operand[1].mode;
+ tree arg;
+ rtx op, addr, pat;
+
+ gcc_assert (TARGET_ALTIVEC);
+
+ arg = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
+ op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+ addr = memory_address (mode, op);
+ if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
+ op = addr;
+ else
+ {
+ /* For the load case need to negate the address. */
+ op = gen_reg_rtx (GET_MODE (addr));
+ emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr),
+ addr)));
+ }
+ op = gen_rtx_MEM (mode, op);
+
+ if (target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ pat = GEN_FCN (icode2) (target, op);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
+ }
+
+ case ALTIVEC_BUILTIN_VCFUX:
+ case ALTIVEC_BUILTIN_VCFSX:
+ case ALTIVEC_BUILTIN_VCTUXS:
+ case ALTIVEC_BUILTIN_VCTSXS:
+ /* #### Replace this nonsense with a separate built-in for the
+ vectorizer to use, which I believe is the only way we get
+ into this situation. */
+ /* FIXME: There's got to be a nicer way to handle this case than
+ constructing a new CALL_EXPR. */
+ if (call_expr_nargs (exp) == 1)
+ {
+ exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
+ 2, CALL_EXPR_ARG (exp, 0),
+ integer_zero_node);
+ }
+ break;
+
+ /* For the pack and unpack int128 routines, fix up the builtin so it
+ uses the correct IBM128 type. */
+ case MISC_BUILTIN_PACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_packtf;
+ fcode = MISC_BUILTIN_PACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ case MISC_BUILTIN_UNPACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_unpacktf;
+ fcode = MISC_BUILTIN_UNPACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (TARGET_MMA)
+ {
+ ret = mma_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_ALTIVEC)
+ {
+ ret = altivec_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_HTM)
+ {
+ ret = htm_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+
+ unsigned attr = (rs6000_builtin_info[uns_fcode].attr
+ & RS6000_BTC_TYPE_MASK);
+ /* RS6000_BTC_SPECIAL represents no-operand operators. */
+ gcc_assert (attr == RS6000_BTC_UNARY
+ || attr == RS6000_BTC_BINARY
+ || attr == RS6000_BTC_TERNARY
+ || attr == RS6000_BTC_QUATERNARY
+ || attr == RS6000_BTC_SPECIAL);
+
+ /* Handle simple unary operations. */
+ d = bdesc_1arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_unop_builtin (icode, exp, target);
+
+ /* Handle simple binary operations. */
+ d = bdesc_2arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_binop_builtin (icode, exp, target);
+
+ /* Handle simple ternary operations. */
+ d = bdesc_3arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_ternop_builtin (icode, exp, target);
+
+ /* Handle simple quaternary operations. */
+ d = bdesc_4arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_quaternop_builtin (icode, exp, target);
+
+ /* Handle simple no-argument operations. */
+ d = bdesc_0arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_zeroop_builtin (icode, target);
+
+ gcc_unreachable ();
+ }
}
/* Create a builtin vector type with a name. Taking care not to give
@@ -13525,7 +13933,10 @@ rs6000_init_builtins (void)
general purpose extensions (970 and newer) to allow the use of
the target attribute. */
if (TARGET_EXTRA_BUILTINS)
- altivec_init_builtins ();
+ {
+ altivec_init_builtins ();
+ mma_init_builtins ();
+ }
if (TARGET_HTM)
htm_init_builtins ();
^ permalink raw reply [flat|nested] 10+ messages in thread
* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Support two builtin expansion algorithms
@ 2020-07-27 18:49 William Schmidt
0 siblings, 0 replies; 10+ messages in thread
From: William Schmidt @ 2020-07-27 18:49 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:2cb1b8f3b01aa3b39798817c7a5a7e6ae88dace9
commit 2cb1b8f3b01aa3b39798817c7a5a7e6ae88dace9
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date: Wed Jun 17 12:10:56 2020 -0500
rs6000: Support two builtin expansion algorithms
Note: For future rebasing, put new_mma_expand_builtin into the
previous patch.
2020-07-26 Bill Schmidt <wschmidt@linux.ibm.com>
* config/rs6000/rs6000-call.c (new_mma_expand_builtin): New.
(rs6000_expand_builtin): Support two kinds of masks and icodes;
use a new algorithm under control of new_builtins_are_live,
while still supporting the old one; do lazy enablement to
support #pragma target.
Diff:
---
gcc/config/rs6000/rs6000-call.c | 737 +++++++++++++++++++++++++++++++---------
1 file changed, 574 insertions(+), 163 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 6463bfc77c6..35d963284dd 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -9736,7 +9736,6 @@ altivec_expand_stv_builtin (enum insn_code icode, tree exp)
/* Expand the MMA built-in in EXP.
Store true in *EXPANDEDP if we found a built-in to expand. */
-
static rtx
mma_expand_builtin (tree exp, rtx target, bool *expandedp)
{
@@ -9861,6 +9860,110 @@ mma_expand_builtin (tree exp, rtx target, bool *expandedp)
return target;
}
+/* Expand the MMA built-in in EXP. */
+static rtx
+new_mma_expand_builtin (tree exp, rtx target, rs6000_builtins fcode,
+ insn_code icode)
+{
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ tree arg;
+ call_expr_arg_iterator iter;
+ const struct insn_operand_data *insn_op;
+ rtx op[MAX_MMA_OPERANDS];
+ unsigned nopnds = 0;
+ /* ### This attr/void_func business needs replacing. */
+ unsigned attr = rs6000_builtin_info[fcode].attr;
+ bool void_func = (attr & RS6000_BTC_VOID);
+ machine_mode tmode = VOIDmode;
+
+ if (TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node)
+ {
+ tmode = insn_data[icode].operand[0].mode;
+ if (!target
+ || GET_MODE (target) != tmode
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ op[nopnds++] = target;
+ }
+ else
+ target = const0_rtx;
+
+ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+ {
+ if (arg == error_mark_node)
+ return const0_rtx;
+
+ rtx opnd;
+ insn_op = &insn_data[icode].operand[nopnds];
+ if (TREE_CODE (arg) == ADDR_EXPR
+ && MEM_P (DECL_RTL (TREE_OPERAND (arg, 0))))
+ opnd = DECL_RTL (TREE_OPERAND (arg, 0));
+ else
+ opnd = expand_normal (arg);
+
+ if (!(*insn_op->predicate) (opnd, insn_op->mode))
+ {
+ if (!strcmp (insn_op->constraint, "n"))
+ {
+ if (!CONST_INT_P (opnd))
+ error ("argument %d must be an unsigned literal", nopnds);
+ else
+ error ("argument %d is an unsigned literal that is "
+ "out of range", nopnds);
+ return const0_rtx;
+ }
+ opnd = copy_to_mode_reg (insn_op->mode, opnd);
+ }
+
+ /* Some MMA instructions have INOUT accumulator operands, so force
+ their target register to be the same as their input register. */
+ /* ### The void_func test needs to happen some other way,
+ probably with TREE_TYPE (TREE_TYPE (fndecl)) as earlier. */
+ if (!void_func
+ && nopnds == 1
+ && !strcmp (insn_op->constraint, "0")
+ && insn_op->mode == tmode
+ && REG_P (opnd)
+ && (*insn_data[icode].operand[0].predicate) (opnd, tmode))
+ target = op[0] = opnd;
+
+ op[nopnds++] = opnd;
+ }
+
+ rtx pat;
+ switch (nopnds)
+ {
+ case 1:
+ pat = GEN_FCN (icode) (op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
+ break;
+ case 6:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
+ break;
+ case 7:
+ pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5], op[6]);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (!pat)
+ return NULL_RTX;
+ emit_insn (pat);
+
+ return target;
+}
+
static rtx
stv_expand_builtin (insn_code icode, rtx *op,
machine_mode tmode, machine_mode smode)
@@ -12714,6 +12817,10 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ /* #### This needs to be rs6000_gen_builtins now. Can't make this
+ whole until the full set of builtins has been added, and there
+ is no overlap between the two enumerations, so we can run the
+ two in parallel. */
enum rs6000_builtins fcode
= (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
size_t uns_fcode = (size_t)fcode;
@@ -12721,9 +12828,18 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
size_t i;
rtx ret;
bool success;
- HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
+ HOST_WIDE_INT mask = (new_builtins_are_live
+ ? 0
+ : rs6000_builtin_info[uns_fcode].mask);
+ /*
+ bif_enable enable = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].enable
+ : (bif_enable) 0);
+ */
bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
- enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
+ enum insn_code icode = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].icode
+ : rs6000_builtin_info[uns_fcode].icode);
/* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
floating point type, depending on whether long double is the IBM extended
@@ -12754,7 +12870,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
}
- if (TARGET_DEBUG_BUILTIN)
+ if (!new_builtins_are_live && TARGET_DEBUG_BUILTIN)
{
const char *name1 = rs6000_builtin_info[uns_fcode].name;
const char *name2 = (icode != CODE_FOR_nothing)
@@ -12792,207 +12908,499 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
return expand_call (exp, target, ignore);
}
- switch (fcode)
+ if (new_builtins_are_live)
{
- case RS6000_BUILTIN_RECIP:
- return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
-
- case RS6000_BUILTIN_RECIPF:
- return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
+ bifdata *bifaddr = &rs6000_builtin_info_x[uns_fcode];
- case RS6000_BUILTIN_RSQRTF:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
-
- case RS6000_BUILTIN_RSQRT:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
-
- case POWER7_BUILTIN_BPERMD:
- return rs6000_expand_binop_builtin (((TARGET_64BIT)
- ? CODE_FOR_bpermd_di
- : CODE_FOR_bpermd_si), exp, target);
-
- case RS6000_BUILTIN_GET_TB:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
- target);
-
- case RS6000_BUILTIN_MFTB:
- return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
- ? CODE_FOR_rs6000_mftb_di
- : CODE_FOR_rs6000_mftb_si),
- target);
-
- case RS6000_BUILTIN_MFFS:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+ /* In case of "#pragma target" changes, we initialize all builtins
+ but check for actual availability during expand time. */
+ switch (bifaddr->enable)
+ {
+ default:
+ gcc_unreachable ();
+ case ENB_ALWAYS:
+ break;
+ case ENB_P5:
+ if (!TARGET_POPCNTB)
+ return const0_rtx;
+ break;
+ case ENB_P6:
+ if (!TARGET_CMPB)
+ return const0_rtx;
+ break;
+ case ENB_ALTIVEC:
+ if (!TARGET_ALTIVEC)
+ return const0_rtx;
+ break;
+ case ENB_VSX:
+ if (!TARGET_VSX)
+ return const0_rtx;
+ break;
+ case ENB_P7:
+ if (!TARGET_POPCNTD)
+ return const0_rtx;
+ break;
+ case ENB_P7_64:
+ if (!TARGET_POPCNTD || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P8:
+ if (!TARGET_DIRECT_MOVE)
+ return const0_rtx;
+ break;
+ case ENB_P8V:
+ if (!TARGET_P8_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_P9:
+ if (!TARGET_MODULO)
+ return const0_rtx;
+ break;
+ case ENB_P9_64:
+ if (!TARGET_MODULO || !TARGET_POWERPC64)
+ return const0_rtx;
+ break;
+ case ENB_P9V:
+ if (!TARGET_P9_VECTOR)
+ return const0_rtx;
+ break;
+ case ENB_IEEE128_HW:
+ if (!TARGET_FLOAT128_HW)
+ return const0_rtx;
+ break;
+ case ENB_DFP:
+ if (!TARGET_DFP)
+ return const0_rtx;
+ break;
+ case ENB_CRYPTO:
+ if (!TARGET_CRYPTO)
+ return const0_rtx;
+ break;
+ case ENB_HTM:
+ if (!TARGET_HTM)
+ return const0_rtx;
+ break;
+ case ENB_P10:
+ if (!TARGET_POWER10)
+ return const0_rtx;
+ break;
+ case ENB_MMA:
+ if (!TARGET_MMA)
+ return const0_rtx;
+ break;
+ };
- case RS6000_BUILTIN_MTFSB0:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+ if (bif_is_nosoft (*bifaddr)
+ && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
+ {
+ error ("%<%s%> not supported with %<-msoft-float%>",
+ bifaddr->bifname);
+ return const0_rtx;
+ }
- case RS6000_BUILTIN_MTFSB1:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+ if (bif_is_no32bit (*bifaddr) && TARGET_32BIT)
+ fatal_error (input_location,
+ "%<%s%> is not supported in 32-bit mode",
+ bifaddr->bifname);
- case RS6000_BUILTIN_SET_FPSCR_RN:
- return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
- exp);
+ if (bif_is_cpu (*bifaddr))
+ return cpu_expand_builtin (fcode, exp, target);
- case RS6000_BUILTIN_SET_FPSCR_DRN:
- return
- rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
- exp);
+ if (bif_is_init (*bifaddr))
+ return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
- case RS6000_BUILTIN_MFFSL:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+ if (bif_is_set (*bifaddr))
+ return altivec_expand_vec_set_builtin (exp);
- case RS6000_BUILTIN_MTFSF:
- return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+ if (bif_is_extract (*bifaddr))
+ return altivec_expand_vec_ext_builtin (exp, target);
- case RS6000_BUILTIN_CPU_INIT:
- case RS6000_BUILTIN_CPU_IS:
- case RS6000_BUILTIN_CPU_SUPPORTS:
- return cpu_expand_builtin (fcode, exp, target);
+ if (bif_is_predicate (*bifaddr))
+ return altivec_expand_predicate_builtin (icode, exp, target);
- case MISC_BUILTIN_SPEC_BARRIER:
- {
- emit_insn (gen_speculation_barrier ());
- return NULL_RTX;
- }
+ if (bif_is_htm (*bifaddr))
+ return new_htm_expand_builtin (bifaddr, fcode, exp, target);
- case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
- case ALTIVEC_BUILTIN_MASK_FOR_STORE:
- {
- int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
- : (int) CODE_FOR_altivec_lvsl_direct);
- machine_mode tmode = insn_data[icode2].operand[0].mode;
- machine_mode mode = insn_data[icode2].operand[1].mode;
- tree arg;
- rtx op, addr, pat;
+ rtx pat;
+ const int MAX_BUILTIN_ARGS = 5;
+ tree arg[MAX_BUILTIN_ARGS];
+ rtx op[MAX_BUILTIN_ARGS];
+ machine_mode mode[MAX_BUILTIN_ARGS + 1];
- gcc_assert (TARGET_ALTIVEC);
+ int nargs = bifaddr->nargs;
+ gcc_assert (nargs <= MAX_BUILTIN_ARGS);
- arg = CALL_EXPR_ARG (exp, 0);
- gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
- op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
- addr = memory_address (mode, op);
- if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
- op = addr;
- else
- {
- /* For the load case need to negate the address. */
- op = gen_reg_rtx (GET_MODE (addr));
- emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
- }
- op = gen_rtx_MEM (mode, op);
+ mode[0] = insn_data[icode].operand[0].mode;
+ for (int i = 0; i < nargs; i++)
+ {
+ arg[i] = CALL_EXPR_ARG (exp, i);
+ if (arg[i] == error_mark_node)
+ return const0_rtx;
+ op[i] = expand_normal (arg[i]);
+ mode[i+1] = insn_data[icode].operand[i+1].mode;
+ }
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
+ /* Check for restricted constant arguments. */
+ for (int i = 0; i < 2; i++)
+ {
+ switch (bifaddr->restr[i])
+ {
+ default:
+ case RES_NONE:
+ break;
+ case RES_BITS:
+ {
+ size_t mask = (1 << bifaddr->restr_val1[i]) - 1;
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || TREE_INT_CST_LOW (restr_arg) & ~mask)
+ {
+ error ("argument %d must be a %d-bit unsigned literal",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a literal between %d and %d,"
+ " inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VAR_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) == INTEGER_CST
+ && !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a variable or a literal "
+ "between %d and %d, inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VALUES:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || (tree_to_shwi (restr_arg) != bifaddr->restr_val1[i]
+ && tree_to_shwi (restr_arg) != bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be either a literal %d or a "
+ "literal %d",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ }
+ }
- pat = GEN_FCN (icode2) (target, op);
- if (!pat)
- return 0;
- emit_insn (pat);
+ if (bif_is_ldstmask (*bifaddr))
+ return rs6000_expand_ldst_mask (target, fcode, arg[0]);
- return target;
- }
+ if (bif_is_stvec (*bifaddr))
+ {
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return stv_expand_builtin (icode, op, mode[0], mode[1]);
+ }
- case ALTIVEC_BUILTIN_VCFUX:
- case ALTIVEC_BUILTIN_VCFSX:
- case ALTIVEC_BUILTIN_VCTUXS:
- case ALTIVEC_BUILTIN_VCTSXS:
- /* FIXME: There's got to be a nicer way to handle this case than
- constructing a new CALL_EXPR. */
- if (call_expr_nargs (exp) == 1)
+ if (bif_is_ldvec (*bifaddr))
{
- exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
- 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return ldv_expand_builtin (target, icode, op, mode[0]);
}
- break;
- /* For the pack and unpack int128 routines, fix up the builtin so it
- uses the correct IBM128 type. */
- case MISC_BUILTIN_PACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ if (bif_is_mma (*bifaddr))
+ return new_mma_expand_builtin (exp, target, fcode, icode);
+
+ if (fcode == MISC_BUILTIN_PACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_packtf;
fcode = MISC_BUILTIN_PACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
-
- case MISC_BUILTIN_UNPACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ else if (fcode == MISC_BUILTIN_UNPACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_unpacktf;
fcode = MISC_BUILTIN_UNPACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
- default:
- break;
- }
+ if (target == 0
+ || GET_MODE (target) != mode[0]
+ || !(*insn_data[icode].operand[0].predicate) (target, mode[0]))
+ target = gen_reg_rtx (mode[0]);
- if (TARGET_MMA)
- {
- ret = mma_expand_builtin (exp, target, &success);
+ for (int i = 0; i < nargs; i++)
+ if (! (*insn_data[icode].operand[i+1].predicate) (op[i], mode[i+1]))
+ op[i] = copy_to_mode_reg (mode[i+1], op[i]);
- if (success)
- return ret;
- }
- if (TARGET_ALTIVEC)
- {
- ret = altivec_expand_builtin (exp, target, &success);
+ switch (nargs)
+ {
+ default:
+ gcc_assert (MAX_BUILTIN_ARGS == 5);
+ gcc_unreachable ();
+ case 0:
+ pat = GEN_FCN (icode) (target);
+ break;
+ case 1:
+ pat = GEN_FCN (icode) (target, op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
+ break;
+ }
- if (success)
- return ret;
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
}
- if (TARGET_HTM)
+ else
{
- ret = htm_expand_builtin (exp, target, &success);
+ switch (fcode)
+ {
+ case RS6000_BUILTIN_RECIP:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
- if (success)
- return ret;
- }
+ case RS6000_BUILTIN_RECIPF:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
- unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_OPND_MASK;
- /* RS6000_BTC_SPECIAL represents no-operand operators. */
- gcc_assert (attr == RS6000_BTC_UNARY
- || attr == RS6000_BTC_BINARY
- || attr == RS6000_BTC_TERNARY
- || attr == RS6000_BTC_QUATERNARY
- || attr == RS6000_BTC_SPECIAL);
-
- /* Handle simple unary operations. */
- d = bdesc_1arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_unop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRTF:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
- /* Handle simple binary operations. */
- d = bdesc_2arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_binop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRT:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
- /* Handle simple ternary operations. */
- d = bdesc_3arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_ternop_builtin (icode, exp, target);
+ case POWER7_BUILTIN_BPERMD:
+ return rs6000_expand_binop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_bpermd_di
+ : CODE_FOR_bpermd_si),
+ exp, target);
- /* Handle simple quaternary operations. */
- d = bdesc_4arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_quaternop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_GET_TB:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
+ target);
- /* Handle simple no-argument operations. */
- d = bdesc_0arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_zeroop_builtin (icode, target);
+ case RS6000_BUILTIN_MFTB:
+ return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_rs6000_mftb_di
+ : CODE_FOR_rs6000_mftb_si),
+ target);
- gcc_unreachable ();
+ case RS6000_BUILTIN_MFFS:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+
+ case RS6000_BUILTIN_MTFSB0:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+
+ case RS6000_BUILTIN_MTFSB1:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_RN:
+ return rs6000_expand_set_fpscr_rn_builtin
+ (CODE_FOR_rs6000_set_fpscr_rn, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_DRN:
+ return
+ rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
+ exp);
+
+ case RS6000_BUILTIN_MFFSL:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+
+ case RS6000_BUILTIN_MTFSF:
+ return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+
+ case RS6000_BUILTIN_CPU_INIT:
+ case RS6000_BUILTIN_CPU_IS:
+ case RS6000_BUILTIN_CPU_SUPPORTS:
+ return cpu_expand_builtin (fcode, exp, target);
+
+ case MISC_BUILTIN_SPEC_BARRIER:
+ {
+ emit_insn (gen_speculation_barrier ());
+ return NULL_RTX;
+ }
+
+ case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
+ case ALTIVEC_BUILTIN_MASK_FOR_STORE:
+ {
+ int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+ : (int) CODE_FOR_altivec_lvsl_direct);
+ machine_mode tmode = insn_data[icode2].operand[0].mode;
+ machine_mode mode = insn_data[icode2].operand[1].mode;
+ tree arg;
+ rtx op, addr, pat;
+
+ gcc_assert (TARGET_ALTIVEC);
+
+ arg = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
+ op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+ addr = memory_address (mode, op);
+ if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
+ op = addr;
+ else
+ {
+ /* For the load case need to negate the address. */
+ op = gen_reg_rtx (GET_MODE (addr));
+ emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr),
+ addr)));
+ }
+ op = gen_rtx_MEM (mode, op);
+
+ if (target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ pat = GEN_FCN (icode2) (target, op);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
+ }
+
+ case ALTIVEC_BUILTIN_VCFUX:
+ case ALTIVEC_BUILTIN_VCFSX:
+ case ALTIVEC_BUILTIN_VCTUXS:
+ case ALTIVEC_BUILTIN_VCTSXS:
+ /* #### Replace this nonsense with a separate built-in for the
+ vectorizer to use, which I believe is the only way we get
+ into this situation. */
+ /* FIXME: There's got to be a nicer way to handle this case than
+ constructing a new CALL_EXPR. */
+ if (call_expr_nargs (exp) == 1)
+ {
+ exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
+ 2, CALL_EXPR_ARG (exp, 0),
+ integer_zero_node);
+ }
+ break;
+
+ /* For the pack and unpack int128 routines, fix up the builtin so it
+ uses the correct IBM128 type. */
+ case MISC_BUILTIN_PACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_packtf;
+ fcode = MISC_BUILTIN_PACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ case MISC_BUILTIN_UNPACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_unpacktf;
+ fcode = MISC_BUILTIN_UNPACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (TARGET_MMA)
+ {
+ ret = mma_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_ALTIVEC)
+ {
+ ret = altivec_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_HTM)
+ {
+ ret = htm_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+
+ unsigned attr = (rs6000_builtin_info[uns_fcode].attr
+ & RS6000_BTC_TYPE_MASK);
+ /* RS6000_BTC_SPECIAL represents no-operand operators. */
+ gcc_assert (attr == RS6000_BTC_UNARY
+ || attr == RS6000_BTC_BINARY
+ || attr == RS6000_BTC_TERNARY
+ || attr == RS6000_BTC_QUATERNARY
+ || attr == RS6000_BTC_SPECIAL);
+
+ /* Handle simple unary operations. */
+ d = bdesc_1arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_unop_builtin (icode, exp, target);
+
+ /* Handle simple binary operations. */
+ d = bdesc_2arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_binop_builtin (icode, exp, target);
+
+ /* Handle simple ternary operations. */
+ d = bdesc_3arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_ternop_builtin (icode, exp, target);
+
+ /* Handle simple quaternary operations. */
+ d = bdesc_4arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_quaternop_builtin (icode, exp, target);
+
+ /* Handle simple no-argument operations. */
+ d = bdesc_0arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_zeroop_builtin (icode, target);
+
+ gcc_unreachable ();
+ }
}
/* Create a builtin vector type with a name. Taking care not to give
@@ -13223,7 +13631,10 @@ rs6000_init_builtins (void)
general purpose extensions (970 and newer) to allow the use of
the target attribute. */
if (TARGET_EXTRA_BUILTINS)
- altivec_init_builtins ();
+ {
+ altivec_init_builtins ();
+ mma_init_builtins ();
+ }
if (TARGET_HTM)
htm_init_builtins ();
^ permalink raw reply [flat|nested] 10+ messages in thread
* [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Support two builtin expansion algorithms
@ 2020-06-17 20:06 William Schmidt
0 siblings, 0 replies; 10+ messages in thread
From: William Schmidt @ 2020-06-17 20:06 UTC (permalink / raw)
To: gcc-cvs
https://gcc.gnu.org/g:65a24a885f2ff5acfe26e71e986db00b3d0ba2fb
commit 65a24a885f2ff5acfe26e71e986db00b3d0ba2fb
Author: Bill Schmidt <wschmidt@linux.ibm.com>
Date: Wed Jun 17 12:10:56 2020 -0500
rs6000: Support two builtin expansion algorithms
2020-06-17 Bill Schmidt <wschmidt@linux.ibm.com>
* config/rs6000/rs6000-call.c (rs6000_expand_builtin): Support two
kinds of masks and icodes; use a new algorithm under control of
new_builtins_are_live, while still supporting the old one.
Diff:
---
gcc/config/rs6000/rs6000-call.c | 530 ++++++++++++++++++++++++++++------------
1 file changed, 374 insertions(+), 156 deletions(-)
diff --git a/gcc/config/rs6000/rs6000-call.c b/gcc/config/rs6000/rs6000-call.c
index 1c56540ca86..6848b5c8be3 100644
--- a/gcc/config/rs6000/rs6000-call.c
+++ b/gcc/config/rs6000/rs6000-call.c
@@ -12303,6 +12303,10 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED)
{
tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ /* #### This needs to be rs6000_gen_builtins now. Can't make this
+ whole until the full set of builtins has been added, and there
+ is no overlap between the two enumerations, so we can run the
+ two in parallel. */
enum rs6000_builtins fcode
= (enum rs6000_builtins) DECL_MD_FUNCTION_CODE (fndecl);
size_t uns_fcode = (size_t)fcode;
@@ -12310,9 +12314,13 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
size_t i;
rtx ret;
bool success;
- HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
+ HOST_WIDE_INT mask = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].mask
+ : rs6000_builtin_info[uns_fcode].mask);
bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
- enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
+ enum insn_code icode = (new_builtins_are_live
+ ? rs6000_builtin_info_x[uns_fcode].icode
+ : rs6000_builtin_info[uns_fcode].icode);
/* We have two different modes (KFmode, TFmode) that are the IEEE 128-bit
floating point type, depending on whether long double is the IBM extended
@@ -12343,7 +12351,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
case CODE_FOR_xststdcqp_kf: icode = CODE_FOR_xststdcqp_tf; break;
}
- if (TARGET_DEBUG_BUILTIN)
+ if (!new_builtins_are_live && TARGET_DEBUG_BUILTIN)
{
const char *name1 = rs6000_builtin_info[uns_fcode].name;
const char *name2 = (icode != CODE_FOR_nothing)
@@ -12381,200 +12389,410 @@ rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
return expand_call (exp, target, ignore);
}
- switch (fcode)
+ if (new_builtins_are_live)
{
- case RS6000_BUILTIN_RECIP:
- return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
-
- case RS6000_BUILTIN_RECIPF:
- return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
-
- case RS6000_BUILTIN_RSQRTF:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
-
- case RS6000_BUILTIN_RSQRT:
- return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
+ bifdata *bifaddr = &rs6000_builtin_info_x[uns_fcode];
- case POWER7_BUILTIN_BPERMD:
- return rs6000_expand_binop_builtin (((TARGET_64BIT)
- ? CODE_FOR_bpermd_di
- : CODE_FOR_bpermd_si), exp, target);
-
- case RS6000_BUILTIN_GET_TB:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
- target);
-
- case RS6000_BUILTIN_MFTB:
- return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
- ? CODE_FOR_rs6000_mftb_di
- : CODE_FOR_rs6000_mftb_si),
- target);
-
- case RS6000_BUILTIN_MFFS:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
-
- case RS6000_BUILTIN_MTFSB0:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+ if (bif_is_nosoft (*bifaddr)
+ && rs6000_isa_flags & OPTION_MASK_SOFT_FLOAT)
+ {
+ error ("%<%s%> not supported with %<-msoft-float%>",
+ bifaddr->bifname);
+ return const0_rtx;
+ }
- case RS6000_BUILTIN_MTFSB1:
- return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+ if (bif_is_no32bit (*bifaddr) && TARGET_32BIT)
+ fatal_error (input_location,
+ "%<%s%> is not supported in 32-bit mode",
+ bifaddr->bifname);
- case RS6000_BUILTIN_SET_FPSCR_RN:
- return rs6000_expand_set_fpscr_rn_builtin (CODE_FOR_rs6000_set_fpscr_rn,
- exp);
+ if (bif_is_cpu (*bifaddr))
+ return cpu_expand_builtin (fcode, exp, target);
- case RS6000_BUILTIN_SET_FPSCR_DRN:
- return
- rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
- exp);
+ if (bif_is_init (*bifaddr))
+ return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
- case RS6000_BUILTIN_MFFSL:
- return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+ if (bif_is_set (*bifaddr))
+ return altivec_expand_vec_set_builtin (exp);
- case RS6000_BUILTIN_MTFSF:
- return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+ if (bif_is_extract (*bifaddr))
+ return altivec_expand_vec_ext_builtin (exp, target);
- case RS6000_BUILTIN_CPU_INIT:
- case RS6000_BUILTIN_CPU_IS:
- case RS6000_BUILTIN_CPU_SUPPORTS:
- return cpu_expand_builtin (fcode, exp, target);
+ if (bif_is_predicate (*bifaddr))
+ return altivec_expand_predicate_builtin (icode, exp, target);
- case MISC_BUILTIN_SPEC_BARRIER:
- {
- emit_insn (gen_speculation_barrier ());
- return NULL_RTX;
- }
+ if (bif_is_htm (*bifaddr))
+ return new_htm_expand_builtin (bifaddr, fcode, exp, target);
- case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
- case ALTIVEC_BUILTIN_MASK_FOR_STORE:
- {
- int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
- : (int) CODE_FOR_altivec_lvsl_direct);
- machine_mode tmode = insn_data[icode2].operand[0].mode;
- machine_mode mode = insn_data[icode2].operand[1].mode;
- tree arg;
- rtx op, addr, pat;
+ rtx pat;
+ const int MAX_BUILTIN_ARGS = 5;
+ tree arg[MAX_BUILTIN_ARGS];
+ rtx op[MAX_BUILTIN_ARGS];
+ machine_mode mode[MAX_BUILTIN_ARGS + 1];
- gcc_assert (TARGET_ALTIVEC);
+ int nargs = bifaddr->nargs;
+ gcc_assert (nargs <= MAX_BUILTIN_ARGS);
- arg = CALL_EXPR_ARG (exp, 0);
- gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
- op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
- addr = memory_address (mode, op);
- if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
- op = addr;
- else
- {
- /* For the load case need to negate the address. */
- op = gen_reg_rtx (GET_MODE (addr));
- emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
- }
- op = gen_rtx_MEM (mode, op);
+ mode[0] = insn_data[icode].operand[0].mode;
+ for (int i = 0; i < nargs; i++)
+ {
+ arg[i] = CALL_EXPR_ARG (exp, i);
+ if (arg[i] == error_mark_node)
+ return const0_rtx;
+ op[i] = expand_normal (arg[i]);
+ mode[i+1] = insn_data[icode].operand[i+1].mode;
+ }
- if (target == 0
- || GET_MODE (target) != tmode
- || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
- target = gen_reg_rtx (tmode);
+ /* Check for restricted constant arguments. */
+ for (int i = 0; i < 2; i++)
+ {
+ switch (bifaddr->restr[i])
+ {
+ default:
+ case RES_NONE:
+ break;
+ case RES_BITS:
+ {
+ size_t mask = (1 << bifaddr->restr_val1[i]) - 1;
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || TREE_INT_CST_LOW (restr_arg) & ~mask)
+ {
+ error ("argument %d must be a %d-bit unsigned literal",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a literal between %d and %d,"
+ " inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VAR_RANGE:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) == INTEGER_CST
+ && !IN_RANGE (tree_to_shwi (restr_arg),
+ bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be a variable or a literal "
+ "between %d and %d, inclusive",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ case RES_VALUES:
+ {
+ tree restr_arg = arg[bifaddr->restr_opnd[i] - 1];
+ STRIP_NOPS (restr_arg);
+ if (TREE_CODE (restr_arg) != INTEGER_CST
+ || (tree_to_shwi (restr_arg) != bifaddr->restr_val1[i]
+ && tree_to_shwi (restr_arg) != bifaddr->restr_val2[i]))
+ {
+ error ("argument %d must be either a literal %d or a "
+ "literal %d",
+ bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
+ bifaddr->restr_val2[i]);
+ return CONST0_RTX (mode[0]);
+ }
+ break;
+ }
+ }
+ }
- pat = GEN_FCN (icode2) (target, op);
- if (!pat)
- return 0;
- emit_insn (pat);
+ if (bif_is_ldstmask (*bifaddr))
+ return rs6000_expand_ldst_mask (target, fcode, arg[0]);
- return target;
- }
+ if (bif_is_stvec (*bifaddr))
+ {
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return stv_expand_builtin (icode, op, mode[0], mode[1]);
+ }
- case ALTIVEC_BUILTIN_VCFUX:
- case ALTIVEC_BUILTIN_VCFSX:
- case ALTIVEC_BUILTIN_VCTUXS:
- case ALTIVEC_BUILTIN_VCTSXS:
- /* FIXME: There's got to be a nicer way to handle this case than
- constructing a new CALL_EXPR. */
- if (call_expr_nargs (exp) == 1)
+ if (bif_is_ldvec (*bifaddr))
{
- exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
- 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
+ if (bif_is_reve (*bifaddr))
+ icode = elemrev_icode (fcode);
+ return ldv_expand_builtin (target, icode, op, mode[0]);
}
- break;
- /* For the pack and unpack int128 routines, fix up the builtin so it
- uses the correct IBM128 type. */
- case MISC_BUILTIN_PACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ if (fcode == MISC_BUILTIN_PACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_packtf;
fcode = MISC_BUILTIN_PACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
-
- case MISC_BUILTIN_UNPACK_IF:
- if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ else if (fcode == MISC_BUILTIN_UNPACK_IF
+ && TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
{
icode = CODE_FOR_unpacktf;
fcode = MISC_BUILTIN_UNPACK_TF;
uns_fcode = (size_t)fcode;
}
- break;
- default:
- break;
- }
+ if (target == 0
+ || GET_MODE (target) != mode[0]
+ || !(*insn_data[icode].operand[0].predicate) (target, mode[0]))
+ target = gen_reg_rtx (mode[0]);
- if (TARGET_ALTIVEC)
- {
- ret = altivec_expand_builtin (exp, target, &success);
+ for (int i = 0; i < nargs; i++)
+ if (! (*insn_data[icode].operand[i+1].predicate) (op[i], mode[i+1]))
+ op[i] = copy_to_mode_reg (mode[i+1], op[i]);
+
+ switch (nargs)
+ {
+ default:
+ gcc_assert (MAX_BUILTIN_ARGS == 5);
+ gcc_unreachable ();
+ case 0:
+ pat = GEN_FCN (icode) (target);
+ break;
+ case 1:
+ pat = GEN_FCN (icode) (target, op[0]);
+ break;
+ case 2:
+ pat = GEN_FCN (icode) (target, op[0], op[1]);
+ break;
+ case 3:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
+ break;
+ case 4:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
+ break;
+ case 5:
+ pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
+ break;
+ }
+
+ if (!pat)
+ return 0;
+ emit_insn (pat);
- if (success)
- return ret;
+ return target;
}
- if (TARGET_HTM)
+ else
{
- ret = htm_expand_builtin (exp, target, &success);
+ switch (fcode)
+ {
+ case RS6000_BUILTIN_RECIP:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
- if (success)
- return ret;
- }
+ case RS6000_BUILTIN_RECIPF:
+ return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
- unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
- /* RS6000_BTC_SPECIAL represents no-operand operators. */
- gcc_assert (attr == RS6000_BTC_UNARY
- || attr == RS6000_BTC_BINARY
- || attr == RS6000_BTC_TERNARY
- || attr == RS6000_BTC_QUATERNARY
- || attr == RS6000_BTC_SPECIAL);
-
- /* Handle simple unary operations. */
- d = bdesc_1arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_unop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRTF:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
- /* Handle simple binary operations. */
- d = bdesc_2arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_binop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_RSQRT:
+ return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
- /* Handle simple ternary operations. */
- d = bdesc_3arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_ternop_builtin (icode, exp, target);
+ case POWER7_BUILTIN_BPERMD:
+ return rs6000_expand_binop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_bpermd_di
+ : CODE_FOR_bpermd_si),
+ exp, target);
- /* Handle simple quaternary operations. */
- d = bdesc_4arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_quaternop_builtin (icode, exp, target);
+ case RS6000_BUILTIN_GET_TB:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
+ target);
- /* Handle simple no-argument operations. */
- d = bdesc_0arg;
- for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
- if (d->code == fcode)
- return rs6000_expand_zeroop_builtin (icode, target);
+ case RS6000_BUILTIN_MFTB:
+ return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
+ ? CODE_FOR_rs6000_mftb_di
+ : CODE_FOR_rs6000_mftb_si),
+ target);
- gcc_unreachable ();
+ case RS6000_BUILTIN_MFFS:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
+
+ case RS6000_BUILTIN_MTFSB0:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb0, exp);
+
+ case RS6000_BUILTIN_MTFSB1:
+ return rs6000_expand_mtfsb_builtin (CODE_FOR_rs6000_mtfsb1, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_RN:
+ return rs6000_expand_set_fpscr_rn_builtin
+ (CODE_FOR_rs6000_set_fpscr_rn, exp);
+
+ case RS6000_BUILTIN_SET_FPSCR_DRN:
+ return
+ rs6000_expand_set_fpscr_drn_builtin (CODE_FOR_rs6000_set_fpscr_drn,
+ exp);
+
+ case RS6000_BUILTIN_MFFSL:
+ return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffsl, target);
+
+ case RS6000_BUILTIN_MTFSF:
+ return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
+
+ case RS6000_BUILTIN_CPU_INIT:
+ case RS6000_BUILTIN_CPU_IS:
+ case RS6000_BUILTIN_CPU_SUPPORTS:
+ return cpu_expand_builtin (fcode, exp, target);
+
+ case MISC_BUILTIN_SPEC_BARRIER:
+ {
+ emit_insn (gen_speculation_barrier ());
+ return NULL_RTX;
+ }
+
+ case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
+ case ALTIVEC_BUILTIN_MASK_FOR_STORE:
+ {
+ int icode2 = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
+ : (int) CODE_FOR_altivec_lvsl_direct);
+ machine_mode tmode = insn_data[icode2].operand[0].mode;
+ machine_mode mode = insn_data[icode2].operand[1].mode;
+ tree arg;
+ rtx op, addr, pat;
+
+ gcc_assert (TARGET_ALTIVEC);
+
+ arg = CALL_EXPR_ARG (exp, 0);
+ gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
+ op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
+ addr = memory_address (mode, op);
+ if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
+ op = addr;
+ else
+ {
+ /* For the load case need to negate the address. */
+ op = gen_reg_rtx (GET_MODE (addr));
+ emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr),
+ addr)));
+ }
+ op = gen_rtx_MEM (mode, op);
+
+ if (target == 0
+ || GET_MODE (target) != tmode
+ || ! (*insn_data[icode2].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+
+ pat = GEN_FCN (icode2) (target, op);
+ if (!pat)
+ return 0;
+ emit_insn (pat);
+
+ return target;
+ }
+
+ case ALTIVEC_BUILTIN_VCFUX:
+ case ALTIVEC_BUILTIN_VCFSX:
+ case ALTIVEC_BUILTIN_VCTUXS:
+ case ALTIVEC_BUILTIN_VCTSXS:
+ /* #### Replace this nonsense with a separate built-in for the
+ vectorizer to use, which I believe is the only way we get
+ into this situation. */
+ /* FIXME: There's got to be a nicer way to handle this case than
+ constructing a new CALL_EXPR. */
+ if (call_expr_nargs (exp) == 1)
+ {
+ exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
+ 2, CALL_EXPR_ARG (exp, 0),
+ integer_zero_node);
+ }
+ break;
+
+ /* For the pack and unpack int128 routines, fix up the builtin so it
+ uses the correct IBM128 type. */
+ case MISC_BUILTIN_PACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_packtf;
+ fcode = MISC_BUILTIN_PACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ case MISC_BUILTIN_UNPACK_IF:
+ if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
+ {
+ icode = CODE_FOR_unpacktf;
+ fcode = MISC_BUILTIN_UNPACK_TF;
+ uns_fcode = (size_t)fcode;
+ }
+ break;
+
+ default:
+ break;
+ }
+
+ if (TARGET_ALTIVEC)
+ {
+ ret = altivec_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+ if (TARGET_HTM)
+ {
+ ret = htm_expand_builtin (exp, target, &success);
+
+ if (success)
+ return ret;
+ }
+
+ unsigned attr = (rs6000_builtin_info[uns_fcode].attr
+ & RS6000_BTC_TYPE_MASK);
+ /* RS6000_BTC_SPECIAL represents no-operand operators. */
+ gcc_assert (attr == RS6000_BTC_UNARY
+ || attr == RS6000_BTC_BINARY
+ || attr == RS6000_BTC_TERNARY
+ || attr == RS6000_BTC_SPECIAL);
+
+ /* Handle simple unary operations. */
+ d = bdesc_1arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_unop_builtin (icode, exp, target);
+
+ /* Handle simple binary operations. */
+ d = bdesc_2arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_binop_builtin (icode, exp, target);
+
+ /* Handle simple ternary operations. */
+ d = bdesc_3arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_ternop_builtin (icode, exp, target);
+
+ /* Handle simple quaternary operations. */
+ d = bdesc_4arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_4arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_quaternop_builtin (icode, exp, target);
+
+ /* Handle simple no-argument operations. */
+ d = bdesc_0arg;
+ for (i = 0; i < ARRAY_SIZE (bdesc_0arg); i++, d++)
+ if (d->code == fcode)
+ return rs6000_expand_zeroop_builtin (icode, target);
+
+ gcc_unreachable ();
+ }
}
/* Create a builtin vector type with a name. Taking care not to give
^ permalink raw reply [flat|nested] 10+ messages in thread
end of thread, other threads:[~2020-10-29 19:52 UTC | newest]
Thread overview: 10+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2020-07-17 17:24 [gcc(refs/users/wschmidt/heads/builtins3)] rs6000: Support two builtin expansion algorithms William Schmidt
-- strict thread matches above, loose matches on Subject: below --
2020-10-29 19:52 William Schmidt
2020-10-27 16:29 William Schmidt
2020-09-16 21:31 William Schmidt
2020-09-14 13:59 William Schmidt
2020-08-28 20:09 William Schmidt
2020-08-20 16:40 William Schmidt
2020-08-18 18:46 William Schmidt
2020-07-27 18:49 William Schmidt
2020-06-17 20:06 William Schmidt
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).