From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: Received: by sourceware.org (Postfix, from userid 2078) id 563783954420; Fri, 16 Sep 2022 07:45:08 +0000 (GMT) DKIM-Filter: OpenDKIM Filter v2.11.0 sourceware.org 563783954420 DKIM-Signature: v=1; a=rsa-sha256; c=relaxed/relaxed; d=gcc.gnu.org; s=default; t=1663314308; bh=v/1UTSv+8EWbsI8tpma6xSDbD72qJ6D/agFH8vmLldI=; h=From:To:Subject:Date:From; b=QOVb82DE01Vy8VQ0kQyIfNbGHxavMHm20596VD+FqBy+BBMXl7gnCL2zQKgZhbmMq 1ZOtYYUT9QTmrZxnzGzbdNOrHh7mgjd2X4Yg2B0Qm84fnu4OjDJl5V2hy+xLu1OZTa y4JdmoZ9SHoljIHoCWfi2SNooBqbQcvI2/afFCjc= MIME-Version: 1.0 Content-Transfer-Encoding: 7bit Content-Type: text/plain; charset="utf-8" From: hongtao Liu To: gcc-cvs@gcc.gnu.org Subject: [gcc r13-2694] Modernize ix86_builtin_vectorized_function with corresponding expanders. X-Act-Checkin: gcc X-Git-Author: liuhongt X-Git-Refname: refs/heads/master X-Git-Oldrev: d0fc05e86027463eef60ffe16fb8d1fe74acc4e8 X-Git-Newrev: 3e8c4b925a9825fdb8c81f47b621f63108894362 Message-Id: <20220916074508.563783954420@sourceware.org> Date: Fri, 16 Sep 2022 07:45:08 +0000 (GMT) List-Id: https://gcc.gnu.org/g:3e8c4b925a9825fdb8c81f47b621f63108894362 commit r13-2694-g3e8c4b925a9825fdb8c81f47b621f63108894362 Author: liuhongt Date: Thu Sep 15 18:43:16 2022 +0800 Modernize ix86_builtin_vectorized_function with corresponding expanders. For ifloor/lfloor/iceil/lceil/irint/lrint/iround/lround when size of in_mode is not equal out_mode, vectorizer doesn't go to internal fn way,still left that part in the ix86_builtin_vectorized_function. Remove others builtins and add corresponding expanders. gcc/ChangeLog: PR target/106910 * config/i386/i386-builtins.cc (ix86_builtin_vectorized_function): Modernized with corresponding expanders. * config/i386/sse.md (lrint2): New expander. (floor2): Ditto. (lfloor2): Ditto. (ceil2): Ditto. (lceil2): Ditto. (btrunc2): Ditto. (lround2): Ditto. (exp22): Ditto. Diff: --- gcc/config/i386/i386-builtins.cc | 185 +-------------------------------------- gcc/config/i386/sse.md | 80 +++++++++++++++++ 2 files changed, 84 insertions(+), 181 deletions(-) diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc index 6a04fb57e65..af2faee245b 100644 --- a/gcc/config/i386/i386-builtins.cc +++ b/gcc/config/i386/i386-builtins.cc @@ -1540,21 +1540,16 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, switch (fn) { - CASE_CFN_EXP2: - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_EXP2PS); - } - break; - CASE_CFN_IFLOOR: CASE_CFN_LFLOOR: - CASE_CFN_LLFLOOR: /* The round insn does not trap on denormals. */ if (flag_trapping_math || !TARGET_SSE4_1) break; + /* PR106910, currently vectorizer doesn't go direct internal fn way + when out_n != in_n, so let's still keep this. + Otherwise, it relies on expander of + lceilmn2/lfloormn2/lroundmn2/lrintmn2. */ if (out_mode == SImode && in_mode == DFmode) { if (out_n == 4 && in_n == 2) @@ -1564,20 +1559,10 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, else if (out_n == 16 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512); } - if (out_mode == SImode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS_SFIX512); - } break; CASE_CFN_ICEIL: CASE_CFN_LCEIL: - CASE_CFN_LLCEIL: /* The round insn does not trap on denormals. */ if (flag_trapping_math || !TARGET_SSE4_1) break; @@ -1591,20 +1576,10 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, else if (out_n == 16 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512); } - if (out_mode == SImode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_CEILPS_SFIX512); - } break; CASE_CFN_IRINT: CASE_CFN_LRINT: - CASE_CFN_LLRINT: if (out_mode == SImode && in_mode == DFmode) { if (out_n == 4 && in_n == 2) @@ -1614,20 +1589,10 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, else if (out_n == 16 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_VEC_PACK_SFIX512); } - if (out_mode == SImode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_CVTPS2DQ512); - } break; CASE_CFN_IROUND: CASE_CFN_LROUND: - CASE_CFN_LLROUND: /* The round insn does not trap on denormals. */ if (flag_trapping_math || !TARGET_SSE4_1) break; @@ -1641,150 +1606,8 @@ ix86_builtin_vectorized_function (unsigned int fn, tree type_out, else if (out_n == 16 && in_n == 8) return ix86_get_builtin (IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512); } - if (out_mode == SImode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_ROUNDPS_AZ_SFIX512); - } break; - CASE_CFN_FLOOR: - /* The round insn does not trap on denormals. */ - if (flag_trapping_math || !TARGET_SSE4_1) - break; - - if (out_mode == DFmode && in_mode == DFmode) - { - if (out_n == 2 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_FLOORPD); - else if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_FLOORPD256); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_FLOORPD512); - } - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_FLOORPS512); - } - if (out_mode == HFmode && in_mode == HFmode) - { - /* V8HF/V16HF is supported in ix86_vector_mode_supported_p - under TARGET_AVX512FP16, TARGET_AVX512VL is needed here. */ - if (out_n < 32 && !TARGET_AVX512VL) - break; - - if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_FLOORPH); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_FLOORPH256); - else if (out_n == 32 && in_n == 32) - return ix86_get_builtin (IX86_BUILTIN_FLOORPH512); - } - break; - - CASE_CFN_CEIL: - /* The round insn does not trap on denormals. */ - if (flag_trapping_math || !TARGET_SSE4_1) - break; - - if (out_mode == DFmode && in_mode == DFmode) - { - if (out_n == 2 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_CEILPD); - else if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_CEILPD256); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_CEILPD512); - } - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_CEILPS); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_CEILPS256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_CEILPS512); - } - if (out_mode == HFmode && in_mode == HFmode) - { - /* V8HF/V16HF is supported in ix86_vector_mode_supported_p - under TARGET_AVX512FP16, TARGET_AVX512VL is needed here. */ - if (out_n < 32 && !TARGET_AVX512VL) - break; - - if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_CEILPH); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_CEILPH256); - else if (out_n == 32 && in_n == 32) - return ix86_get_builtin (IX86_BUILTIN_CEILPH512); - } - break; - - CASE_CFN_TRUNC: - /* The round insn does not trap on denormals. */ - if (flag_trapping_math || !TARGET_SSE4_1) - break; - - if (out_mode == DFmode && in_mode == DFmode) - { - if (out_n == 2 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPD); - else if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPD256); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPD512); - } - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPS); - else if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPS256); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPS512); - } - if (out_mode == HFmode && in_mode == HFmode) - { - /* V8HF/V16HF is supported in ix86_vector_mode_supported_p - under TARGET_AVX512FP16, TARGET_AVX512VL is needed here. */ - if (out_n < 32 && !TARGET_AVX512VL) - break; - - if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPH); - else if (out_n == 16 && in_n == 16) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPH256); - else if (out_n == 32 && in_n == 32) - return ix86_get_builtin (IX86_BUILTIN_TRUNCPH512); - } - break; - - CASE_CFN_FMA: - if (out_mode == DFmode && in_mode == DFmode) - { - if (out_n == 2 && in_n == 2) - return ix86_get_builtin (IX86_BUILTIN_VFMADDPD); - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_VFMADDPD256); - } - if (out_mode == SFmode && in_mode == SFmode) - { - if (out_n == 4 && in_n == 4) - return ix86_get_builtin (IX86_BUILTIN_VFMADDPS); - if (out_n == 8 && in_n == 8) - return ix86_get_builtin (IX86_BUILTIN_VFMADDPS256); - } - break; default: break; diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index d535c0af043..b60c0d34855 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -321,6 +321,11 @@ [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")]) +(define_mode_iterator VF1_VF2_AVX512DQ + [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF + (V8DF "TARGET_AVX512DQ") (V4DF "TARGET_AVX512DQ && TARGET_AVX512VL") + (V2DF "TARGET_AVX512DQ && TARGET_AVX512VL")]) + (define_mode_iterator VFH [(V32HF "TARGET_AVX512FP16") (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") @@ -23177,6 +23182,14 @@ "TARGET_SSE4_1" "operands[2] = GEN_INT (ROUND_MXCSR);") +;; Note vcvtpd2qq require avx512dq for all vector lengths. +(define_expand "lrint2" + [(set (match_operand: 0 "register_operand") + (unspec: + [(match_operand:VF1_VF2_AVX512DQ 1 "register_operand")] + UNSPEC_FIX_NOTRUNC))] + "TARGET_SSE2") + (define_insn "_round" [(set (match_operand:VF_128_256 0 "register_operand" "=Yr,*x,x") (unspec:VF_128_256 @@ -23316,6 +23329,55 @@ (set_attr "prefix" "orig,orig,vex,evex") (set_attr "mode" "")]) +(define_expand "floor2" + [(set (match_operand:VFH 0 "register_operand") + (unspec:VFH + [(match_operand:VFH 1 "vector_operand") + (match_dup 2)] + UNSPEC_ROUND))] + "TARGET_SSE4_1 && !flag_trapping_math" + "operands[2] = GEN_INT (ROUND_FLOOR | ROUND_NO_EXC);") + +(define_expand "lfloor2" + [(match_operand: 0 "register_operand") + (match_operand:VF1_VF2_AVX512DQ 1 "register_operand")] + "TARGET_SSE4_1 && !flag_trapping_math" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_floor2 (tmp, operands[1])); + emit_insn (gen_fix_trunc2 (operands[0], tmp)); + DONE; +}) + +(define_expand "ceil2" + [(set (match_operand:VFH 0 "register_operand") + (unspec:VFH + [(match_operand:VFH 1 "vector_operand") + (match_dup 2)] + UNSPEC_ROUND))] + "TARGET_SSE4_1 && !flag_trapping_math" + "operands[2] = GEN_INT (ROUND_CEIL | ROUND_NO_EXC);") + +(define_expand "lceil2" + [(match_operand: 0 "register_operand") + (match_operand:VF1_VF2_AVX512DQ 1 "register_operand")] + "TARGET_SSE4_1 && !flag_trapping_math" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_ceil2 (tmp, operands[1])); + emit_insn (gen_fix_trunc2 (operands[0], tmp)); + DONE; +}) + +(define_expand "btrunc2" + [(set (match_operand:VFH 0 "register_operand") + (unspec:VFH + [(match_operand:VFH 1 "vector_operand") + (match_dup 2)] + UNSPEC_ROUND))] + "TARGET_SSE4_1 && !flag_trapping_math" + "operands[2] = GEN_INT (ROUND_TRUNC | ROUND_NO_EXC);") + (define_expand "round2" [(set (match_dup 3) (plus:VF @@ -23350,6 +23412,17 @@ operands[4] = GEN_INT (ROUND_TRUNC); }) +(define_expand "lround2" + [(match_operand: 0 "register_operand") + (match_operand:VF1_VF2_AVX512DQ 1 "register_operand")] + "TARGET_SSE4_1 && !flag_trapping_math" +{ + rtx tmp = gen_reg_rtx (mode); + emit_insn (gen_round2 (tmp, operands[1])); + emit_insn (gen_fix_trunc2 (operands[0], tmp)); + DONE; +}) + (define_expand "round2_sfix" [(match_operand: 0 "register_operand") (match_operand:VF1 1 "register_operand")] @@ -23868,6 +23941,13 @@ (set_attr "prefix" "evex") (set_attr "mode" "XI")]) +(define_expand "exp22" + [(set (match_operand:VF_512 0 "register_operand") + (unspec:VF_512 + [(match_operand:VF_512 1 "vector_operand")] + UNSPEC_EXP2))] + "TARGET_AVX512ER") + (define_insn "avx512er_exp2" [(set (match_operand:VF_512 0 "register_operand" "=v") (unspec:VF_512