From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 1534) id 4842C3857354; Fri, 9 Sep 2022 13:37:51 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 4842C3857354 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1662730671; bh=DY+WMZ78mwvp6ycEV4fauUxKxRrr0zHs+O9gVDLdCuQ=; h=From:To:Subject:Date:From; b=ND03ZidjBShx60bef+Y5PRzVzfh09lcWFyDG5l7nQVRwxATGDAs/qqA/BrgQy7nXY +nbZFJ+KQrmMYF6piCtnyFQcVgRykwsVj24Feol7vJIivePTR6RjlXrcXj9wlxYcpR UJ8lEJM6s8PaVj9tSSLSsC1kI/8IJulatIQ15PcY= Content-Type: text/plain; charset="us-ascii" MIME-Version: 1.0 Content-Transfer-Encoding: 7bit From: Tobias Burnus To: gcc-cvs@gcc.gnu.org Subject: [gcc/devel/omp/gcc-12] amdgcn: Add support for additional natively supported floating-point operations X-Act-Checkin: gcc X-Git-Author: Kwok Cheung Yeung X-Git-Refname: refs/heads/devel/omp/gcc-12 X-Git-Oldrev: 1aa03128944dad6b3ad094b1320ed950f62d9cd7 X-Git-Newrev: de28004fa5f808cf6d9fca0366468204599256b6 Message-Id: <20220909133751.4842C3857354@sourceware.org> Date: Fri, 9 Sep 2022 13:37:51 +0000 (GMT) List-Id: https://gcc.gnu.org/g:de28004fa5f808cf6d9fca0366468204599256b6 commit de28004fa5f808cf6d9fca0366468204599256b6 Author: Kwok Cheung Yeung Date: Fri Sep 9 15:36:42 2022 +0200 amdgcn: Add support for additional natively supported floating-point operations This adds support for the following natively supported floating-point operations, in scalar and vectorized modes: floor, ceil, exp2*, log2*, sin*, cos*, ldexp, frexp * These operations are single-precision float only and are only active if unsafe_math_optimizations are enabled (due to potential numerical precision issues). 2022-09-09 Kwok Cheung Yeung gcc/ * config/gcn/gcn-builtins.def (FABSVF, LDEXPVF, LDEXPV, FREXPVF_EXP, FREXPVF_MANT, FREXPV_EXP, FREXPV_MANT): Add new builtins. * config/gcn/gcn-protos.h (gcn_dconst1over2pi): New prototype. * config/gcn/gcn-valu.md (MATH_UNOP_1OR2REG, MATH_UNOP_1REG, MATH_UNOP_TRIG): New iterators. (math_unop): New attributes. (2, 2, 2, 2, *2_insn, *2_insn, ldexp3, ldexp3, frexp_exp2, frexp_mant2, frexp_exp2, frexp_mant2): New instructions. (2, 2): New expanders. * config/gcn/gcn.cc (init_ext_gcn_constants): Update definition of dconst1over2pi. (gcn_dconst1over2pi): New. (gcn_builtin_type_index): Add entry for v64df type. (v64df_type_node): New. (gcn_init_builtin_types): Initialize v64df_type_node. (gcn_expand_builtin_1): Expand new builtins to instructions. (print_operand): Fix assembler output for 1/(2*PI) constant. * config/gcn/gcn.md (unspec): Add new entries. (cherry picked from commit eff73c104a3db882f3bc7f567f322e40470c7571) Diff: --- gcc/ChangeLog.omp | 28 +++++++ gcc/config/gcn/gcn-builtins.def | 35 ++++++++ gcc/config/gcn/gcn-protos.h | 1 + gcc/config/gcn/gcn-valu.md | 181 ++++++++++++++++++++++++++++++++++++++++ gcc/config/gcn/gcn.cc | 114 ++++++++++++++++++++++++- gcc/config/gcn/gcn.md | 4 +- 6 files changed, 360 insertions(+), 3 deletions(-) diff --git a/gcc/ChangeLog.omp b/gcc/ChangeLog.omp index ae066d893cc..0ad8f78103c 100644 --- a/gcc/ChangeLog.omp +++ b/gcc/ChangeLog.omp @@ -1,3 +1,31 @@ +2022-09-09 Tobias Burnus + + Backport from mainline: + 2022-09-09 Kwok Cheung Yeung + + * config/gcn/gcn-builtins.def (FABSVF, LDEXPVF, LDEXPV, FREXPVF_EXP, + FREXPVF_MANT, FREXPV_EXP, FREXPV_MANT): Add new builtins. + * config/gcn/gcn-protos.h (gcn_dconst1over2pi): New prototype. + * config/gcn/gcn-valu.md (MATH_UNOP_1OR2REG, MATH_UNOP_1REG, + MATH_UNOP_TRIG): New iterators. + (math_unop): New attributes. + (2, 2, + 2, 2, + *2_insn, *2_insn, + ldexp3, ldexp3, + frexp_exp2, frexp_mant2, + frexp_exp2, frexp_mant2): New instructions. + (2, 2): New expanders. + * config/gcn/gcn.cc (init_ext_gcn_constants): Update definition of + dconst1over2pi. + (gcn_dconst1over2pi): New. + (gcn_builtin_type_index): Add entry for v64df type. + (v64df_type_node): New. + (gcn_init_builtin_types): Initialize v64df_type_node. + (gcn_expand_builtin_1): Expand new builtins to instructions. + (print_operand): Fix assembler output for 1/(2*PI) constant. + * config/gcn/gcn.md (unspec): Add new entries. + 2022-09-08 Tobias Burnus Backport from mainline: diff --git a/gcc/config/gcn/gcn-builtins.def b/gcc/config/gcn/gcn-builtins.def index 54e4ea4e953..27691909925 100644 --- a/gcc/config/gcn/gcn-builtins.def +++ b/gcc/config/gcn/gcn-builtins.def @@ -59,6 +59,41 @@ DEF_BUILTIN (SQRTF, 3 /*CODE_FOR_sqrtf */, _A2 (GCN_BTI_SF, GCN_BTI_SF), gcn_expand_builtin_1) +DEF_BUILTIN (FABSVF, 3 /*CODE_FOR_fabsvf */, + "fabsvf", B_INSN, + _A2 (GCN_BTI_V64SF, GCN_BTI_V64SF), + gcn_expand_builtin_1) + +DEF_BUILTIN (LDEXPVF, 3 /*CODE_FOR_ldexpvf */, + "ldexpvf", B_INSN, + _A3 (GCN_BTI_V64SF, GCN_BTI_V64SF, GCN_BTI_V64SI), + gcn_expand_builtin_1) + +DEF_BUILTIN (LDEXPV, 3 /*CODE_FOR_ldexpv */, + "ldexpv", B_INSN, + _A3 (GCN_BTI_V64DF, GCN_BTI_V64DF, GCN_BTI_V64SI), + gcn_expand_builtin_1) + +DEF_BUILTIN (FREXPVF_EXP, 3 /*CODE_FOR_frexpvf_exp */, + "frexpvf_exp", B_INSN, + _A2 (GCN_BTI_V64SI, GCN_BTI_V64SF), + gcn_expand_builtin_1) + +DEF_BUILTIN (FREXPVF_MANT, 3 /*CODE_FOR_frexpvf_mant */, + "frexpvf_mant", B_INSN, + _A2 (GCN_BTI_V64SF, GCN_BTI_V64SF), + gcn_expand_builtin_1) + +DEF_BUILTIN (FREXPV_EXP, 3 /*CODE_FOR_frexpv_exp */, + "frexpv_exp", B_INSN, + _A2 (GCN_BTI_V64SI, GCN_BTI_V64DF), + gcn_expand_builtin_1) + +DEF_BUILTIN (FREXPV_MANT, 3 /*CODE_FOR_frexpv_mant */, + "frexpv_mant", B_INSN, + _A2 (GCN_BTI_V64DF, GCN_BTI_V64DF), + gcn_expand_builtin_1) + DEF_BUILTIN (CMP_SWAP, -1, "cmp_swap", B_INSN, _A4 (GCN_BTI_UINT, GCN_BTI_VOIDPTR, GCN_BTI_UINT, GCN_BTI_UINT), diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h index 38197b929fd..ca804609c09 100644 --- a/gcc/config/gcn/gcn-protos.h +++ b/gcc/config/gcn/gcn-protos.h @@ -54,6 +54,7 @@ extern int gcn_hard_regno_nregs (int regno, machine_mode mode); extern void gcn_hsa_declare_function_name (FILE *file, const char *name, tree decl); extern HOST_WIDE_INT gcn_initial_elimination_offset (int, int); +extern REAL_VALUE_TYPE gcn_dconst1over2pi (void); extern bool gcn_inline_constant64_p (rtx, bool); extern bool gcn_inline_constant_p (rtx); extern int gcn_inline_fp_constant_p (rtx, bool); diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index a3099f7db17..5c66f4f680b 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -2293,6 +2293,187 @@ [(set_attr "type" "vop1") (set_attr "length" "8")]) +; These FP unops have f64, f32 and f16 versions. +(define_int_iterator MATH_UNOP_1OR2REG + [UNSPEC_FLOOR UNSPEC_CEIL]) + +; These FP unops only have f16/f32 versions. +(define_int_iterator MATH_UNOP_1REG + [UNSPEC_EXP2 UNSPEC_LOG2]) + +(define_int_iterator MATH_UNOP_TRIG + [UNSPEC_SIN UNSPEC_COS]) + +(define_int_attr math_unop + [(UNSPEC_FLOOR "floor") + (UNSPEC_CEIL "ceil") + (UNSPEC_EXP2 "exp2") + (UNSPEC_LOG2 "log2") + (UNSPEC_SIN "sin") + (UNSPEC_COS "cos")]) + +(define_insn "2" + [(set (match_operand:FP 0 "register_operand" "= v") + (unspec:FP + [(match_operand:FP 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_1OR2REG))] + "" + "v_%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "2" + [(set (match_operand:V_FP 0 "register_operand" "= v") + (unspec:V_FP + [(match_operand:V_FP 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_1OR2REG))] + "" + "v_%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "2" + [(set (match_operand:FP_1REG 0 "register_operand" "= v") + (unspec:FP_1REG + [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_1REG))] + "flag_unsafe_math_optimizations" + "v_%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "2" + [(set (match_operand:V_FP_1REG 0 "register_operand" "= v") + (unspec:V_FP_1REG + [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_1REG))] + "flag_unsafe_math_optimizations" + "v_%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "*2_insn" + [(set (match_operand:FP_1REG 0 "register_operand" "= v") + (unspec:FP_1REG + [(match_operand:FP_1REG 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_TRIG))] + "flag_unsafe_math_optimizations" + "v_%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "*2_insn" + [(set (match_operand:V_FP_1REG 0 "register_operand" "= v") + (unspec:V_FP_1REG + [(match_operand:V_FP_1REG 1 "gcn_alu_operand" "vSvB")] + MATH_UNOP_TRIG))] + "flag_unsafe_math_optimizations" + "v_%i0\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +; Trigonometric functions need their input scaled by 1/(2*PI) first. + +(define_expand "2" + [(set (match_dup 2) + (mult:FP_1REG + (match_dup 3) + (match_operand:FP_1REG 1 "gcn_alu_operand"))) + (set (match_operand:FP_1REG 0 "register_operand") + (unspec:FP_1REG + [(match_dup 2)] + MATH_UNOP_TRIG))] + "flag_unsafe_math_optimizations" + { + operands[2] = gen_reg_rtx (mode); + operands[3] = const_double_from_real_value (gcn_dconst1over2pi (), + mode); + }) + +(define_expand "2" + [(set (match_dup 2) + (mult:V_FP_1REG + (match_dup 3) + (match_operand:V_FP_1REG 1 "gcn_alu_operand"))) + (set (match_operand:V_FP_1REG 0 "register_operand") + (unspec:V_FP_1REG + [(match_dup 2)] + MATH_UNOP_TRIG))] + "flag_unsafe_math_optimizations" + { + operands[2] = gen_reg_rtx (mode); + operands[3] = + gcn_vec_constant (mode, + const_double_from_real_value (gcn_dconst1over2pi (), + mode)); + }) + +; Implement ldexp pattern + +(define_insn "ldexp3" + [(set (match_operand:FP 0 "register_operand" "=v") + (unspec:FP + [(match_operand:FP 1 "gcn_alu_operand" "vB") + (match_operand:SI 2 "gcn_alu_operand" "vSvA")] + UNSPEC_LDEXP))] + "" + "v_ldexp%i0\t%0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +(define_insn "ldexp3" + [(set (match_operand:V_FP 0 "register_operand" "=v") + (unspec:V_FP + [(match_operand:V_FP 1 "gcn_alu_operand" "vB") + (match_operand:V64SI 2 "gcn_alu_operand" "vSvA")] + UNSPEC_LDEXP))] + "" + "v_ldexp%i0\t%0, %1, %2" + [(set_attr "type" "vop3a") + (set_attr "length" "8")]) + +; Implement frexp patterns + +(define_insn "frexp_exp2" + [(set (match_operand:SI 0 "register_operand" "=v") + (unspec:SI + [(match_operand:FP 1 "gcn_alu_operand" "vB")] + UNSPEC_FREXP_EXP))] + "" + "v_frexp_exp_i32%i1\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "frexp_mant2" + [(set (match_operand:FP 0 "register_operand" "=v") + (unspec:FP + [(match_operand:FP 1 "gcn_alu_operand" "vB")] + UNSPEC_FREXP_MANT))] + "" + "v_frexp_mant%i1\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "frexp_exp2" + [(set (match_operand:V64SI 0 "register_operand" "=v") + (unspec:V64SI + [(match_operand:V_FP 1 "gcn_alu_operand" "vB")] + UNSPEC_FREXP_EXP))] + "" + "v_frexp_exp_i32%i1\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + +(define_insn "frexp_mant2" + [(set (match_operand:V_FP 0 "register_operand" "=v") + (unspec:V_FP + [(match_operand:V_FP 1 "gcn_alu_operand" "vB")] + UNSPEC_FREXP_MANT))] + "" + "v_frexp_mant%i1\t%0, %1" + [(set_attr "type" "vop1") + (set_attr "length" "8")]) + ;; }}} ;; {{{ FP fused multiply and add diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 557e051603c..ef6bca53d9d 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -789,12 +789,20 @@ init_ext_gcn_constants (void) /* FIXME: this constant probably does not match what hardware really loads. Reality check it eventually. */ real_from_string (&dconst1over2pi, - "0.1591549430918953357663423455968866839"); + "0.15915494309189532"); real_convert (&dconst1over2pi, SFmode, &dconst1over2pi); ext_gcn_constants_init = 1; } +REAL_VALUE_TYPE +gcn_dconst1over2pi (void) +{ + if (!ext_gcn_constants_init) + init_ext_gcn_constants (); + return dconst1over2pi; +} + /* Return non-zero if X is a constant that can appear as an inline operand. This is 0, 0.5, -0.5, 1, -1, 2, -2, 4,-4, 1/(2*pi) Or a vector of those. @@ -3636,6 +3644,7 @@ enum gcn_builtin_type_index GCN_BTI_SF, GCN_BTI_V64SI, GCN_BTI_V64SF, + GCN_BTI_V64DF, GCN_BTI_V64PTR, GCN_BTI_SIPTR, GCN_BTI_SFPTR, @@ -3652,6 +3661,7 @@ static GTY(()) tree gcn_builtin_types[GCN_BTI_MAX]; #define sf_type_node (gcn_builtin_types[GCN_BTI_SF]) #define v64si_type_node (gcn_builtin_types[GCN_BTI_V64SI]) #define v64sf_type_node (gcn_builtin_types[GCN_BTI_V64SF]) +#define v64df_type_node (gcn_builtin_types[GCN_BTI_V64DF]) #define v64ptr_type_node (gcn_builtin_types[GCN_BTI_V64PTR]) #define siptr_type_node (gcn_builtin_types[GCN_BTI_SIPTR]) #define sfptr_type_node (gcn_builtin_types[GCN_BTI_SFPTR]) @@ -3741,6 +3751,7 @@ gcn_init_builtin_types (void) sf_type_node = float32_type_node; v64si_type_node = build_vector_type (intSI_type_node, 64); v64sf_type_node = build_vector_type (float_type_node, 64); + v64df_type_node = build_vector_type (double_type_node, 64); v64ptr_type_node = build_vector_type (unsigned_intDI_type_node /*build_pointer_type (integer_type_node) */ @@ -4008,6 +4019,105 @@ gcn_expand_builtin_1 (tree exp, rtx target, rtx /*subtarget */ , emit_insn (gen_sqrtsf2 (target, arg)); return target; } + case GCN_BUILTIN_FABSVF: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg = force_reg (V64SFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64SFmode, + EXPAND_NORMAL)); + emit_insn (gen_absv64sf2_exec + (target, arg, gcn_gen_undef (V64SFmode), exec)); + return target; + } + case GCN_BUILTIN_LDEXPVF: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg1 = force_reg (V64SFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64SFmode, + EXPAND_NORMAL)); + rtx arg2 = force_reg (V64SImode, + expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, + V64SImode, + EXPAND_NORMAL)); + emit_insn (gen_ldexpv64sf3_exec + (target, arg1, arg2, gcn_gen_undef (V64SFmode), exec)); + return target; + } + case GCN_BUILTIN_LDEXPV: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg1 = force_reg (V64DFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64SFmode, + EXPAND_NORMAL)); + rtx arg2 = force_reg (V64SImode, + expand_expr (CALL_EXPR_ARG (exp, 1), NULL_RTX, + V64SImode, + EXPAND_NORMAL)); + emit_insn (gen_ldexpv64df3_exec + (target, arg1, arg2, gcn_gen_undef (V64DFmode), exec)); + return target; + } + case GCN_BUILTIN_FREXPVF_EXP: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg = force_reg (V64SFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64SFmode, + EXPAND_NORMAL)); + emit_insn (gen_frexpv64sf_exp2_exec + (target, arg, gcn_gen_undef (V64SImode), exec)); + return target; + } + case GCN_BUILTIN_FREXPVF_MANT: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg = force_reg (V64SFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64SFmode, + EXPAND_NORMAL)); + emit_insn (gen_frexpv64sf_mant2_exec + (target, arg, gcn_gen_undef (V64SFmode), exec)); + return target; + } + case GCN_BUILTIN_FREXPV_EXP: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg = force_reg (V64DFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64DFmode, + EXPAND_NORMAL)); + emit_insn (gen_frexpv64df_exp2_exec + (target, arg, gcn_gen_undef (V64SImode), exec)); + return target; + } + case GCN_BUILTIN_FREXPV_MANT: + { + if (ignore) + return target; + rtx exec = gcn_full_exec_reg (); + rtx arg = force_reg (V64DFmode, + expand_expr (CALL_EXPR_ARG (exp, 0), NULL_RTX, + V64DFmode, + EXPAND_NORMAL)); + emit_insn (gen_frexpv64df_mant2_exec + (target, arg, gcn_gen_undef (V64DFmode), exec)); + return target; + } case GCN_BUILTIN_OMP_DIM_SIZE: { if (ignore) @@ -6507,7 +6617,7 @@ print_operand (FILE *file, rtx x, int code) str = "-4.0"; break; case 248: - str = "1/pi"; + str = "0.15915494"; break; default: rtx ix = simplify_gen_subreg (GET_MODE (x) == DFmode diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index 70d88adc59a..d13e7d2678d 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -82,7 +82,9 @@ UNSPEC_GATHER UNSPEC_SCATTER UNSPEC_RCP - UNSPEC_FLBIT_INT]) + UNSPEC_FLBIT_INT + UNSPEC_FLOOR UNSPEC_CEIL UNSPEC_SIN UNSPEC_COS UNSPEC_EXP2 UNSPEC_LOG2 + UNSPEC_LDEXP UNSPEC_FREXP_EXP UNSPEC_FREXP_MANT]) ;; }}} ;; {{{ Attributes