From 5cd2a628086af3656b3242f0c4f41784646f52b1 Mon Sep 17 00:00:00 2001 From: Evandro Menezes Date: Thu, 17 Mar 2016 14:44:55 -0500 Subject: [PATCH] [AArch64] Emit division using the Newton series 2016-03-17 Evandro Menezes gcc/ * config/aarch64/aarch64-tuning-flags.def (AARCH64_EXTRA_TUNE_APPROX_DIV_{SF,DF}: New tuning macros. * config/aarch64/aarch64-protos.h (AARCH64_EXTRA_TUNE_APPROX_DIV): New macro. (aarch64_emit_approx_div): Declare new function. * config/aarch64/aarch64.c (aarch64_emit_approx_div): Define new function. * config/aarch64/aarch64.md ("div3"): New expansion. * config/aarch64/aarch64-simd.md ("div3"): Likewise. --- gcc/config/aarch64/aarch64-protos.h | 4 ++ gcc/config/aarch64/aarch64-simd.md | 14 +++++- gcc/config/aarch64/aarch64-tuning-flags.def | 3 +- gcc/config/aarch64/aarch64.c | 73 +++++++++++++++++++++++++++++ gcc/config/aarch64/aarch64.md | 19 ++++++-- 5 files changed, 107 insertions(+), 6 deletions(-) diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index dced209..52c4838 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -263,6 +263,9 @@ enum aarch64_extra_tuning_flags }; #undef AARCH64_EXTRA_TUNING_OPTION +#define AARCH64_EXTRA_TUNE_APPROX_DIV \ + (AARCH64_EXTRA_TUNE_APPROX_DIV_DF | AARCH64_EXTRA_TUNE_APPROX_DIV_SF) + extern struct tune_params aarch64_tune_params; HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned); @@ -362,6 +365,7 @@ void aarch64_relayout_simd_types (void); void aarch64_reset_previous_fndecl (void); void aarch64_save_restore_target_globals (tree); void aarch64_emit_approx_rsqrt (rtx, rtx); +bool aarch64_emit_approx_div (rtx, rtx, rtx); /* Initialize builtins for SIMD intrinsics. */ void init_aarch64_simd_builtins (void); diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index bd73bce..99be92e 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -1509,7 +1509,19 @@ [(set_attr "type" "neon_fp_mul_")] ) -(define_insn "div3" +(define_expand "div3" + [(set (match_operand:VDQF 0 "register_operand") + (div:VDQF (match_operand:VDQF 1 "general_operand") + (match_operand:VDQF 2 "register_operand")))] + "TARGET_SIMD" +{ + if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) + DONE; + + operands[1] = force_reg (mode, operands[1]); +}) + +(define_insn "*div3" [(set (match_operand:VDQF 0 "register_operand" "=w") (div:VDQF (match_operand:VDQF 1 "register_operand" "w") (match_operand:VDQF 2 "register_operand" "w")))] diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def index 7e45a0c..ececdc1 100644 --- a/gcc/config/aarch64/aarch64-tuning-flags.def +++ b/gcc/config/aarch64/aarch64-tuning-flags.def @@ -30,4 +30,5 @@ AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS) AARCH64_EXTRA_TUNING_OPTION ("approx_rsqrt", APPROX_RSQRT) - +AARCH64_EXTRA_TUNING_OPTION ("approx_div", APPROX_DIV_DF) +AARCH64_EXTRA_TUNING_OPTION ("approx_divf", APPROX_DIV_SF) diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 12e498d..2c878ce 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -7540,6 +7540,79 @@ aarch64_emit_approx_rsqrt (rtx dst, rtx src) emit_move_insn (dst, x0); } +/* Emit the instruction sequence to compute the approximation for a reciprocal. */ + +bool +aarch64_emit_approx_div (rtx quo, rtx num, rtx div) +{ + machine_mode mode = GET_MODE (quo); + + if (!flag_finite_math_only + || flag_trapping_math + || !flag_unsafe_math_optimizations + || optimize_function_for_size_p (cfun) + || ((GET_MODE_INNER (mode) != SFmode + || !(aarch64_tune_params.extra_tuning_flags + & AARCH64_EXTRA_TUNE_APPROX_DIV_SF)) + && (GET_MODE_INNER (mode) != DFmode + || !(aarch64_tune_params.extra_tuning_flags + & AARCH64_EXTRA_TUNE_APPROX_DIV_DF)))) + return false; + + /* Estimate the approximate reciprocal. */ + rtx xrcp = gen_reg_rtx (mode); + switch (mode) + { + case SFmode: + emit_insn (gen_aarch64_frecpesf (xrcp, div)); break; + case V2SFmode: + emit_insn (gen_aarch64_frecpev2sf (xrcp, div)); break; + case V4SFmode: + emit_insn (gen_aarch64_frecpev4sf (xrcp, div)); break; + case DFmode: + emit_insn (gen_aarch64_frecpedf (xrcp, div)); break; + case V2DFmode: + emit_insn (gen_aarch64_frecpev2df (xrcp, div)); break; + default: + gcc_unreachable (); + } + + /* Iterate over the series twice for SF and thrice for DF. */ + int iterations = (GET_MODE_INNER (mode) == DFmode) ? 3 : 2; + + rtx xtmp = gen_reg_rtx (mode); + while (iterations--) + { + switch (mode) + { + case SFmode: + emit_insn (gen_aarch64_frecpssf (xtmp, xrcp, div)); break; + case V2SFmode: + emit_insn (gen_aarch64_frecpsv2sf (xtmp, xrcp, div)); break; + case V4SFmode: + emit_insn (gen_aarch64_frecpsv4sf (xtmp, xrcp, div)); break; + case DFmode: + emit_insn (gen_aarch64_frecpsdf (xtmp, xrcp, div)); break; + case V2DFmode: + emit_insn (gen_aarch64_frecpsv2df (xtmp, xrcp, div)); break; + default: + gcc_unreachable (); + } + + emit_set_insn (xrcp, gen_rtx_MULT (mode, xrcp, xtmp)); + } + + if (num != CONST1_RTX (mode)) + { + rtx xnum = force_reg (mode, num); + emit_set_insn (quo, gen_rtx_MULT (mode, xnum, xrcp)); + } + else + emit_move_insn (quo, xrcp); + + return true; +} + /* Return the number of instructions that can be issued per cycle. */ static int aarch64_sched_issue_rate (void) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 68676c9..985915e 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4647,11 +4647,22 @@ [(set_attr "type" "fmul")] ) -(define_insn "div3" +(define_expand "div3" + [(set (match_operand:GPF 0 "register_operand") + (div:GPF (match_operand:GPF 1 "general_operand") + (match_operand:GPF 2 "register_operand")))] + "TARGET_SIMD" +{ + if (aarch64_emit_approx_div (operands[0], operands[1], operands[2])) + DONE; + + operands[1] = force_reg (mode, operands[1]); +}) + +(define_insn "*div3" [(set (match_operand:GPF 0 "register_operand" "=w") - (div:GPF - (match_operand:GPF 1 "register_operand" "w") - (match_operand:GPF 2 "register_operand" "w")))] + (div:GPF (match_operand:GPF 1 "register_operand" "w") + (match_operand:GPF 2 "register_operand" "w")))] "TARGET_FLOAT" "fdiv\\t%0, %1, %2" [(set_attr "type" "fdiv")] -- 1.9.1