From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1729) id 305BD38582BF; Fri, 9 Sep 2022 13:13:48 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 305BD38582BF DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1662729228; bh=+5PF0irR2syhkfsZxrMW/YEaSQnQma8iRe4JGHv9bCQ=; h=From:To:Subject:Date:From; b=Z/8CWQi6eV41LRV4muYa+MU4cnycPW83TKBAcIhL/nRwJxBJg4V/1U3be50QcQLH2 N4nC7HUOROrag7KjAouBeGprfhGleUHdLctstscAnzdoWO26npbZQWl2oxas0e7EM3 DSGbVibGtvUmx3gbjwVQkLWj0yCwFISR+73QOwv4= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: Kwok Yeung To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-2561] amdgcn: Add support for additional natively supported floating-point operations X-Act-Checkin: gcc X-Git-Author: Kwok Cheung Yeung X-Git-Refname: refs/heads/master X-Git-Oldrev: a8b0b13da7379feb31950a9d2ad74b98a29c547f X-Git-Newrev: eff73c104a3db882f3bc7f567f322e40470c7571 Message-Id: <20220909131348.305BD38582BF@sourceware.org> Date: Fri, 9 Sep 2022 13:13:48 +0000 (GMT) List-Id: https://gcc.gnu.org/g:eff73c104a3db882f3bc7f567f322e40470c7571 commit r13-2561-geff73c104a3db882f3bc7f567f322e40470c7571 Author: Kwok Cheung Yeung Date: Fri Sep 9 13:10:07 2022 +0000 amdgcn: Add support for additional natively supported floating-point operations This adds support for the following natively supported floating-point operations, in scalar and vectorized modes: floor, ceil, exp2*, log2*, sin*, cos*, ldexp, frexp * These operations are single-precision float only and are only active if unsafe_math_optimizations are enabled (due to potential numerical precision issues). 2022-09-09 Kwok Cheung Yeung gcc/ * config/gcn/gcn-builtins.def (FABSVF, LDEXPVF, LDEXPV, FREXPVF_EXP, FREXPVF_MANT, FREXPV_EXP, FREXPV_MANT): Add new builtins. * config/gcn/gcn-protos.h (gcn_dconst1over2pi): New prototype. * config/gcn/gcn-valu.md (MATH_UNOP_1OR2REG, MATH_UNOP_1REG, MATH_UNOP_TRIG): New iterators. (math_unop): New attributes. (2, 2, 2, 2, *2_insn, *2_insn, ldexp3, ldexp3, frexp_exp2, frexp_mant2, frexp_exp2, frexp_mant2): New instructions. (2, 2): New expanders. * config/gcn/gcn.cc (init_ext_gcn_constants): Update definition of dconst1over2pi. (gcn_dconst1over2pi): New. (gcn_builtin_type_index): Add entry for v64df type. (v64df_type_node): New. (gcn_init_builtin_types): Initialize v64df_type_node. (gcn_expand_builtin_1): Expand new builtins to instructions. (print_operand): Fix assembler output for 1/(2*PI) constant. * config/gcn/gcn.md (unspec): Add new entries. Diff: --- gcc/config/gcn/gcn-builtins.def | 35 ++++++++ gcc/config/gcn/gcn-protos.h | 1 + gcc/config/gcn/gcn-valu.md | 181 ++++++++++++++++++++++++++++++++++++++++ gcc/config/gcn/gcn.cc | 114 ++++++++++++++++++++++++- gcc/config/gcn/gcn.md | 4 +- 5 files changed, 332 insertions(+), 3 deletions(-) diff --git a/gcc/config/gcn/gcn-builtins.def b/gcc/config/gcn/gcn-builtins.def index 54e4ea4e953..27691909925 100644 --- a/gcc/config/gcn/gcn-builtins.def +++ b/gcc/config/gcn/gcn-builtins.def @@ -59,6 +59,41 @@ DEF_BUILTIN (SQRTF, 3 /*CODE_FOR_sqrtf */, _A2 (GCN_BTI_SF, GCN_BTI_SF), gcn_expand_builtin_1) +DEF_BUILTIN (FABSVF, 3 /*CODE_FOR_fabsvf */, + "fabsvf", B_INSN, + _A2 (GCN_BTI_V64SF, GCN_BTI_V64SF), + gcn_expand_builtin_1) + +DEF_BUILTIN (LDEXPVF, 3 /*CODE_FOR_ldexpvf */, + "ldexpvf", B_INSN, + _A3 (GCN_BTI_V64SF, GCN_BTI_V64SF, GCN_BTI_V64SI), + gcn_expand_builtin_1) + +DEF_BUILTIN (LDEXPV, 3 /*CODE_FOR_ldexpv */, + "ldexpv", B_INSN, + _A3 (GCN_BTI_V64DF, GCN_BTI_V64DF, GCN_BTI_V64SI), + gcn_expand_builtin_1) + +DEF_BUILTIN (FREXPVF_EXP, 3 /*CODE_FOR_frexpvf_exp */, + "frexpvf_exp", B_INSN, + _A2 (GCN_BTI_V64SI, GCN_BTI_V64SF), + gcn_expand_builtin_1) + +DEF_BUILTIN (FREXPVF_MANT, 3 /*CODE_FOR_frexpvf_mant */, + "frexpvf_mant", B_INSN, + _A2 (GCN_BTI_V64SF, GCN_BTI_V64SF), + gcn_expand_builtin_1) + +DEF_BUILTIN (FREXPV_EXP, 3 /*CODE_FOR_frexpv_exp */, + "frexpv_exp", B_INSN, + _A2 (GCN_BTI_V64SI, GCN_BTI_V64DF), + gcn_expand_builtin_1) + +DEF_BUILTIN (FREXPV_MANT, 3 /*CODE_FOR_frexpv_mant */, + "frexpv_mant", B_INSN, + _A2 (GCN_BTI_V64DF, GCN_BTI_V64DF), + gcn_expand_builtin_1) + DEF_BUILTIN (CMP_SWAP, -1, "cmp_swap", B_INSN, _A4 (GCN_BTI_UINT, GCN_BTI_VOIDPTR, GCN_BTI_UINT, GCN_BTI_UINT), diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h index 38197b929fd..ca804609c09 100644 --- a/gcc/config/gcn/gcn-protos.h +++ b/gcc/config/gcn/gcn-protos.h @@ -54,6 +54,7 @@ extern int gcn_hard_regno_nregs (int regno, machine_mode mode); extern void gcn_hsa_declare_function_name (FILE *file, const char *name, tree decl); extern HOST_WIDE_INT gcn_initial_elimination_offset (int, int); +extern REAL_VALUE_TYPE gcn_dconst1over2pi (void); extern bool gcn_inline_constant64_p (rtx, bool); extern bool gcn_inline_constant_p (rtx); extern int gcn_inline_fp_constant_p (rtx, bool); diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 8c33ae0c717..3bfdf8213fc 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -2290,6 +2290,187 @@ [(set_attr "type" "vop1") (set_attr "length" "8")]) +; These FP unops have f64, f32 and f16 versions. +(define_int_iterator MATH_UNOP_1OR2REG + [UNSPEC_FLOOR UNSPEC_CEIL]) + +; These FP unops only have f16/f32 versions. +(define_int_iterator MATH_UNOP_1REG + [UNSPEC_EXP2 UNSPEC_LOG2]) + +(define_int_iterator MATH_UNOP_TRIG + [UNSPEC_SIN UNSPEC_COS]) + +(define_int_attr math_unop + [(UNSPEC_FLOOR "floor") + (UNSPEC_CEIL "ceil") + (UNSPEC_EXP2 "exp2") + (UNSPEC_LOG2 "log2") + (UNSPEC_SIN "sin") + (UNSPEC_COS "cos")]) + +(define_insn "2" + [(set (match_operand:FP 0 "register_operand" "= v") + (unspec:FP + [(match_operand:FP 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_1OR2REG))] + "" + "v_%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "2" + [(set (match_operand:V_FP 0 "register_operand" "= v") + (unspec:V_FP + [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_1OR2REG))] + "" + "v_%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "2" + [(set (match_operand:FP_1REG 0 "register_operand" "= v") + (unspec:FP_1REG + [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_1REG))] + "flag_unsafe_math_optimizations" + "v_%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "2" + [(set (match_operand:V_FP_1REG 0 "register_operand" "= v") + (unspec:V_FP_1REG + [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_1REG))] + "flag_unsafe_math_optimizations" + "v_%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "*2_insn" + [(set (match_operand:FP_1REG 0 "register_operand" "= v") + (unspec:FP_1REG + [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_TRIG))] + "flag_unsafe_math_optimizations" + "v_%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "*2_insn" + [(set (match_operand:V_FP_1REG 0 "register_operand" "= v") + (unspec:V_FP_1REG + [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_TRIG))] + "flag_unsafe_math_optimizations" + "v_%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +; Trigonometric functions need their input scaled by 1/(2*PI) first. + +(define_expand "2" + [(set (match_dup 2) + (mult:FP_1REG + (match_dup 3) + (match_operand:FP_1REG 1 "gcn_alu_operand"))) + (set (match_operand:FP_1REG 0 "register_operand") + (unspec:FP_1REG + [(match_dup 2)] + MATH_UNOP_TRIG))] + "flag_unsafe_math_optimizations" + { + operands[2] = gen_reg_rtx (mode); + operands[3] = const_double_from_real_value (gcn_dconst1over2pi (), + mode); + }) + +(define_expand "2" + [(set (match_dup 2) + (mult:V_FP_1REG + (match_dup 3) + (match_operand:V_FP_1REG 1 "gcn_alu_operand"))) + (set (match_operand:V_FP_1REG 0 "register_operand") + (unspec:V_FP_1REG + [(match_dup 2)] + MATH_UNOP_TRIG))] + "flag_unsafe_math_optimizations" + { + operands[2] = gen_reg_rtx (mode); + operands[3] = + gcn_vec_constant (mode, + const_double_from_real_value (gcn_dconst1over2pi (), + mode)); + }) + +; Implement ldexp pattern + +(define_insn "ldexp3" + [(set (match_operand:FP 0 "register_operand" "=v") + (unspec:FP + [(match_operand:FP 1 "gcn_alu_operand" "vB") + (match_operand:SI 2 "gcn_alu_operand" "vSvA")] + UNSPEC_LDEXP))] + "" + "v_ldexp%i0\t%0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_insn "ldexp3" + [(set (match_operand:V_FP 0 "register_operand" "=v") + (unspec:V_FP + [(match_operand:V_FP 1 "gcn_alu_operand" "vB") + (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")] + UNSPEC_LDEXP))] + "" + "v_ldexp%i0\t%0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +; Implement frexp patterns + +(define_insn "frexp_exp2" + [(set (match_operand:SI 0 "register_operand" "=v") + (unspec:SI + [(match_operand:FP 1 "gcn_alu_operand" "vB")] + UNSPEC_FREXP_EXP))] + "" + "v_frexp_exp_i32%i1\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "frexp_mant2" + [(set (match_operand:FP 0 "register_operand" "=v") + (unspec:FP + [(match_operand:FP 1 "gcn_alu_operand" "vB")] + UNSPEC_FREXP_MANT))] + "" + "v_frexp_mant%i1\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "frexp_exp2" + [(set (match_operand:V64SI 0 "register_operand" "=v") + (unspec:V64SI + [(match_operand:V_FP 1 "gcn_alu_operand" "vB")] + UNSPEC_FREXP_EXP))] + "" + "v_frexp_exp_i32%i1\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "frexp_mant2" + [(set (match_operand:V_FP 0 "register_operand" "=v") + (unspec:V_FP + [(match_operand:V_FP 1 "gcn_alu_operand" "vB")] + UNSPEC_FREXP_MANT))] + "" + "v_frexp_mant%i1\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + ;; }}} ;; {{{ FP fused multiply and add diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 82667556512..eb822e20dd1 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -779,12 +779,20 @@ init_ext_gcn_constants (void) /* FIXME: this constant probably does not match what hardware really loads. Reality check it eventually. */ real_from_string (&dconst1over2pi, - "0.1591549430918953357663423455968866839"); + "0.15915494309189532"); real_convert (&dconst1over2pi, SFmode, &dconst1over2pi); ext_gcn_constants_init = 1; } +REAL_VALUE_TYPE +gcn_dconst1over2pi (void) +{ + if (!ext_gcn_constants_init) + init_ext_gcn_constants (); + return dconst1over2pi; +} + /* Return non-zero if X is a constant that can appear as an inline operand. This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi) Or a vector of those. @@ -3605,6 +3613,7 @@ enum gcn_builtin_type_index GCN_BTI_SF, GCN_BTI_V64SI, GCN_BTI_V64SF, + GCN_BTI_V64DF, GCN_BTI_V64PTR, GCN_BTI_SIPTR, GCN_BTI_SFPTR, @@ -3621,6 +3630,7 @@ static GTY(()) tree gcn_builtin_types[GCN_BTI_MAX]; #define sf_type_node (gcn_builtin_types[GCN_BTI_SF]) #define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI]) #define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF]) +#define v64df_type_node (gcn_builtin_types[GCN_BTI_V64DF]) #define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR]) #define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR]) #define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR]) @@ -3710,6 +3720,7 @@ gcn_init_builtin_types (void) sf_type_node = float32_type_node; v64si_type_node = build_vector_type (intSI_type_node, 64); v64sf_type_node = build_vector_type (float_type_node, 64); + v64df_type_node = build_vector_type (double_type_node, 64); v64ptr_type_node = build_vector_type (unsigned_intDI_type_node /*build_pointer_type (integer_type_node) */ @@ -3977,6 +3988,105 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , emit_insn (gen_sqrtsf2 (target, arg)); return target; } + case GCN_BUILTIN_FABSVF: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg = force_reg (V64SFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64SFmode, + EXPAND_NORMAL)); + emit_insn (gen_absv64sf2_exec + (target, arg, gcn_gen_undef (V64SFmode), exec)); + return target; + } + case GCN_BUILTIN_LDEXPVF: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg1 = force_reg (V64SFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64SFmode, + EXPAND_NORMAL)); + rtx arg2 = force_reg (V64SImode, + expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, + V64SImode, + EXPAND_NORMAL)); + emit_insn (gen_ldexpv64sf3_exec + (target, arg1, arg2, gcn_gen_undef (V64SFmode), exec)); + return target; + } + case GCN_BUILTIN_LDEXPV: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg1 = force_reg (V64DFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64SFmode, + EXPAND_NORMAL)); + rtx arg2 = force_reg (V64SImode, + expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, + V64SImode, + EXPAND_NORMAL)); + emit_insn (gen_ldexpv64df3_exec + (target, arg1, arg2, gcn_gen_undef (V64DFmode), exec)); + return target; + } + case GCN_BUILTIN_FREXPVF_EXP: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg = force_reg (V64SFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64SFmode, + EXPAND_NORMAL)); + emit_insn (gen_frexpv64sf_exp2_exec + (target, arg, gcn_gen_undef (V64SImode), exec)); + return target; + } + case GCN_BUILTIN_FREXPVF_MANT: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg = force_reg (V64SFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64SFmode, + EXPAND_NORMAL)); + emit_insn (gen_frexpv64sf_mant2_exec + (target, arg, gcn_gen_undef (V64SFmode), exec)); + return target; + } + case GCN_BUILTIN_FREXPV_EXP: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg = force_reg (V64DFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64DFmode, + EXPAND_NORMAL)); + emit_insn (gen_frexpv64df_exp2_exec + (target, arg, gcn_gen_undef (V64SImode), exec)); + return target; + } + case GCN_BUILTIN_FREXPV_MANT: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg = force_reg (V64DFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64DFmode, + EXPAND_NORMAL)); + emit_insn (gen_frexpv64df_mant2_exec + (target, arg, gcn_gen_undef (V64DFmode), exec)); + return target; + } case GCN_BUILTIN_OMP_DIM_SIZE: { if (ignore) @@ -6476,7 +6586,7 @@ print_operand (FILE *file, rtx x, int code) str = "-4.0"; break; case 248: - str = "1/pi"; + str = "0.15915494"; break; default: rtx ix = simplify_gen_subreg (GET_MODE (x) == DFmode diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 7805e867901..a3c9523cd6d 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -82,7 +82,9 @@ UNSPEC_GATHER UNSPEC_SCATTER UNSPEC_RCP - UNSPEC_FLBIT_INT]) + UNSPEC_FLBIT_INT + UNSPEC_FLOOR UNSPEC_CEIL UNSPEC_SIN UNSPEC_COS UNSPEC_EXP2 UNSPEC_LOG2 + UNSPEC_LDEXP UNSPEC_FREXP_EXP UNSPEC_FREXP_MANT]) ;; }}} ;; {{{ Attributes