public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: hongtao Liu <liuhongt@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-2694] Modernize ix86_builtin_vectorized_function with corresponding expanders. Date: Fri, 16 Sep 2022 07:45:08 +0000 (GMT) [thread overview] Message-ID: <20220916074508.563783954420@sourceware.org> (raw) https://gcc.gnu.org/g:3e8c4b925a9825fdb8c81f47b621f63108894362 commit r13-2694-g3e8c4b925a9825fdb8c81f47b621f63108894362 Author: liuhongt <hongtao.liu@intel.com> Date: Thu Sep 15 18:43:16 2022 +0800 Modernize ix86_builtin_vectorized_function with corresponding expanders. For ifloor/lfloor/iceil/lceil/irint/lrint/iround/lround when size of in_mode is not equal out_mode, vectorizer doesn't go to internal fn way,still left that part in the ix86_builtin_vectorized_function. Remove others builtins and add corresponding expanders. gcc/ChangeLog: PR target/106910 * config/i386/i386-builtins.cc (ix86_builtin_vectorized_function): Modernized with corresponding expanders. * config/i386/sse.md (lrint<mode><sseintvecmodelower>2): New expander. (floor<mode>2): Ditto. (lfloor<mode><sseintvecmodelower>2): Ditto. (ceil<mode>2): Ditto. (lceil<mode><sseintvecmodelower>2): Ditto. (btrunc<mode>2): Ditto. (lround<mode><sseintvecmodelower>2): Ditto. (exp2<mode>2): Ditto. Diff: --- gcc/config/i386/i386-builtins.cc | 185 +-------------------------------------- gcc/config/i386/sse.md | 80 +++++++++++++++++ 2 files changed, 84 insertions(+), 181 deletions(-) diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc index 6a04fb57e65..af2faee245b 100644 --- a/gcc/config/i386/i386-builtins.cc +++ b/gcc/config/i386/i386-builtins.cc @@ -1540,21 +1540,16 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, switch (fn) { - CASE_CFN_EXP2: - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_EXP2PS); - } - break; - CASE_CFN_IFLOOR: CASE_CFN_LFLOOR: - CASE_CFN_LLFLOOR: /* The round insn does not trap on denormals. */ if (flag_trapping_math || !TARGET_SSE4_1) break; + /* PR106910, currently vectorizer doesn't go direct internal fn way + when out_n != in_n, so let's still keep this. + Otherwise, it relies on expander of + lceilmn2/lfloormn2/lroundmn2/lrintmn2. */ if (out_mode == SImode && in_mode == DFmode) { if (out_n == 4 && in_n == 2) @@ -1564,20 +1559,10 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, else if (out_n == 16 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512); } - if (out_mode == SImode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512); - } break; CASE_CFN_ICEIL: CASE_CFN_LCEIL: - CASE_CFN_LLCEIL: /* The round insn does not trap on denormals. */ if (flag_trapping_math || !TARGET_SSE4_1) break; @@ -1591,20 +1576,10 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, else if (out_n == 16 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512); } - if (out_mode == SImode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512); - } break; CASE_CFN_IRINT: CASE_CFN_LRINT: - CASE_CFN_LLRINT: if (out_mode == SImode && in_mode == DFmode) { if (out_n == 4 && in_n == 2) @@ -1614,20 +1589,10 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, else if (out_n == 16 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512); } - if (out_mode == SImode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512); - } break; CASE_CFN_IROUND: CASE_CFN_LROUND: - CASE_CFN_LLROUND: /* The round insn does not trap on denormals. */ if (flag_trapping_math || !TARGET_SSE4_1) break; @@ -1641,150 +1606,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, else if (out_n == 16 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512); } - if (out_mode == SImode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512); - } break; - CASE_CFN_FLOOR: - /* The round insn does not trap on denormals. */ - if (flag_trapping_math || !TARGET_SSE4_1) - break; - - if (out_mode == DFmode && in_mode == DFmode) - { - if (out_n == 2 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_FLOORPD); - else if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_FLOORPD256); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_FLOORPD512); - } - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS512); - } - if (out_mode == HFmode && in_mode == HFmode) - { - /* V8HF/V16HF is supported in ix86_vector_mode_supported_p - under TARGET_AVX512FP16, TARGET_AVX512VL is needed here. */ - if (out_n < 32 && !TARGET_AVX512VL) - break; - - if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_FLOORPH); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_FLOORPH256); - else if (out_n == 32 && in_n == 32) - return ix86_get_builtin (IX86_BUILTIN_FLOORPH512); - } - break; - - CASE_CFN_CEIL: - /* The round insn does not trap on denormals. */ - if (flag_trapping_math || !TARGET_SSE4_1) - break; - - if (out_mode == DFmode && in_mode == DFmode) - { - if (out_n == 2 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_CEILPD); - else if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_CEILPD256); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_CEILPD512); - } - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_CEILPS); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_CEILPS256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_CEILPS512); - } - if (out_mode == HFmode && in_mode == HFmode) - { - /* V8HF/V16HF is supported in ix86_vector_mode_supported_p - under TARGET_AVX512FP16, TARGET_AVX512VL is needed here. */ - if (out_n < 32 && !TARGET_AVX512VL) - break; - - if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_CEILPH); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_CEILPH256); - else if (out_n == 32 && in_n == 32) - return ix86_get_builtin (IX86_BUILTIN_CEILPH512); - } - break; - - CASE_CFN_TRUNC: - /* The round insn does not trap on denormals. */ - if (flag_trapping_math || !TARGET_SSE4_1) - break; - - if (out_mode == DFmode && in_mode == DFmode) - { - if (out_n == 2 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPD); - else if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512); - } - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPS); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512); - } - if (out_mode == HFmode && in_mode == HFmode) - { - /* V8HF/V16HF is supported in ix86_vector_mode_supported_p - under TARGET_AVX512FP16, TARGET_AVX512VL is needed here. */ - if (out_n < 32 && !TARGET_AVX512VL) - break; - - if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPH); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPH256); - else if (out_n == 32 && in_n == 32) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPH512); - } - break; - - CASE_CFN_FMA: - if (out_mode == DFmode && in_mode == DFmode) - { - if (out_n == 2 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_VFMADDPD); - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256); - } - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_VFMADDPS); - if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256); - } - break; default: break; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d535c0af043..b60c0d34855 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -321,6 +321,11 @@ [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) +(define_mode_iterator VF1_VF2_AVX512DQ + [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ && TARGET_AVX512VL") + (V2DF "TARGET_AVX512DQ && TARGET_AVX512VL")]) + (define_mode_iterator VFH [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") @@ -23177,6 +23182,14 @@ "TARGET_SSE4_1" "operands[2] = GEN_INT (ROUND_MXCSR);") +;; Note vcvtpd2qq require avx512dq for all vector lengths. +(define_expand "lrint<mode><sseintvecmodelower>2" + [(set (match_operand:<sseintvecmode> 0 "register_operand") + (unspec:<sseintvecmode> + [(match_operand:VF1_VF2_AVX512DQ 1 "register_operand")] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE2") + (define_insn "<sse4_1>_round<ssemodesuffix><avxsizesuffix>" [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") (unspec:VF_128_256 @@ -23316,6 +23329,55 @@ (set_attr "prefix" "orig,orig,vex,evex") (set_attr "mode" "<MODE>")]) +(define_expand "floor<mode>2" + [(set (match_operand:VFH 0 "register_operand") + (unspec:VFH + [(match_operand:VFH 1 "vector_operand") + (match_dup 2)] + UNSPEC_ROUND))] + "TARGET_SSE4_1 && !flag_trapping_math" + "operands[2] = GEN_INT (ROUND_FLOOR | ROUND_NO_EXC);") + +(define_expand "lfloor<mode><sseintvecmodelower>2" + [(match_operand:<sseintvecmode> 0 "register_operand") + (match_operand:VF1_VF2_AVX512DQ 1 "register_operand")] + "TARGET_SSE4_1 && !flag_trapping_math" +{ + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_insn (gen_floor<mode>2 (tmp, operands[1])); + emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); + DONE; +}) + +(define_expand "ceil<mode>2" + [(set (match_operand:VFH 0 "register_operand") + (unspec:VFH + [(match_operand:VFH 1 "vector_operand") + (match_dup 2)] + UNSPEC_ROUND))] + "TARGET_SSE4_1 && !flag_trapping_math" + "operands[2] = GEN_INT (ROUND_CEIL | ROUND_NO_EXC);") + +(define_expand "lceil<mode><sseintvecmodelower>2" + [(match_operand:<sseintvecmode> 0 "register_operand") + (match_operand:VF1_VF2_AVX512DQ 1 "register_operand")] + "TARGET_SSE4_1 && !flag_trapping_math" +{ + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_insn (gen_ceil<mode>2 (tmp, operands[1])); + emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); + DONE; +}) + +(define_expand "btrunc<mode>2" + [(set (match_operand:VFH 0 "register_operand") + (unspec:VFH + [(match_operand:VFH 1 "vector_operand") + (match_dup 2)] + UNSPEC_ROUND))] + "TARGET_SSE4_1 && !flag_trapping_math" + "operands[2] = GEN_INT (ROUND_TRUNC | ROUND_NO_EXC);") + (define_expand "round<mode>2" [(set (match_dup 3) (plus:VF @@ -23350,6 +23412,17 @@ operands[4] = GEN_INT (ROUND_TRUNC); }) +(define_expand "lround<mode><sseintvecmodelower>2" + [(match_operand:<sseintvecmode> 0 "register_operand") + (match_operand:VF1_VF2_AVX512DQ 1 "register_operand")] + "TARGET_SSE4_1 && !flag_trapping_math" +{ + rtx tmp = gen_reg_rtx (<MODE>mode); + emit_insn (gen_round<mode>2 (tmp, operands[1])); + emit_insn (gen_fix_trunc<mode><sseintvecmodelower>2 (operands[0], tmp)); + DONE; +}) + (define_expand "round<mode>2_sfix" [(match_operand:<sseintvecmode> 0 "register_operand") (match_operand:VF1 1 "register_operand")] @@ -23868,6 +23941,13 @@ (set_attr "prefix" "evex") (set_attr "mode" "XI")]) +(define_expand "exp2<mode>2" + [(set (match_operand:VF_512 0 "register_operand") + (unspec:VF_512 + [(match_operand:VF_512 1 "vector_operand")] + UNSPEC_EXP2))] + "TARGET_AVX512ER") + (define_insn "avx512er_exp2<mode><mask_name><round_saeonly_name>" [(set (match_operand:VF_512 0 "register_operand" "=v") (unspec:VF_512
reply other threads:[~2022-09-16 7:45 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20220916074508.563783954420@sourceware.org \ --to=liuhongt@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).