public inbox for gcc-cvs@sourceware.org help / color / mirror / Atom feed
From: Jan Beulich <jbeulich@gcc.gnu.org> To: gcc-cvs@gcc.gnu.org Subject: [gcc r14-2009] x86: make VPTERNLOG* usable on less than 512-bit operands with just AVX512F Date: Wed, 21 Jun 2023 06:03:24 +0000 (GMT) [thread overview] Message-ID: <20230621060324.D9C303858425@sourceware.org> (raw) https://gcc.gnu.org/g:864c6471bdc6cdec6da60b66ac13e9fe3cd73fb8 commit r14-2009-g864c6471bdc6cdec6da60b66ac13e9fe3cd73fb8 Author: Jan Beulich <jbeulich@suse.com> Date: Wed Jun 21 08:03:05 2023 +0200 x86: make VPTERNLOG* usable on less than 512-bit operands with just AVX512F There's no reason to constrain this to AVX512VL, unless instructed so by -mprefer-vector-width=, as the wider operation is unusable for more narrow operands only when the possible memory source is a non-broadcast one. This way even the scalar copysign<mode>3 can benefit from the operation being a single-insn one (leaving aside moves which the compiler decides to insert for unclear reasons, and leaving aside the fact that bcst_mem_operand() is too restrictive for broadcast to be embedded right into VPTERNLOG*). While there also bring *<avx512>_vternlog<mode>_all's in sync with that of the three splitters. Along with this also request value duplication in ix86_expand_copysign()'s call to ix86_build_signbit_mask(), eliminating excess space allocation in .rodata.*, filled with zeros which are never read. gcc/ * config/i386/i386-expand.cc (ix86_expand_copysign): Request value duplication by ix86_build_signbit_mask() when AVX512F and not HFmode. * config/i386/sse.md (*<avx512>_vternlog<mode>_all): Convert to 2-alternative form. Adjust "mode" attribute. Add "enabled" attribute. (*<avx512>_vpternlog<mode>_1): Also permit when TARGET_AVX512F && !TARGET_PREFER_AVX256. (*<avx512>_vpternlog<mode>_2): Likewise. (*<avx512>_vpternlog<mode>_3): Likewise. gcc/testsuite/ * gcc.target/i386/avx512f-copysign.c: New test. Diff: --- gcc/config/i386/i386-expand.cc | 2 +- gcc/config/i386/sse.md | 36 +++++++++++++++++------- gcc/testsuite/gcc.target/i386/avx512f-copysign.c | 32 +++++++++++++++++++++ 3 files changed, 59 insertions(+), 11 deletions(-) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index ad2cd079fcb..7bb4d3912a9 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -2278,7 +2278,7 @@ ix86_expand_copysign (rtx operands[]) else dest = NULL_RTX; op1 = lowpart_subreg (vmode, force_reg (mode, operands[2]), mode); - mask = ix86_build_signbit_mask (vmode, 0, 0); + mask = ix86_build_signbit_mask (vmode, TARGET_AVX512F && mode != HFmode, 0); if (CONST_DOUBLE_P (operands[1])) { diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index b99becba516..f793258b6c2 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -12597,22 +12597,35 @@ (set_attr "mode" "<sseinsnmode>")]) (define_insn "*<avx512>_vternlog<mode>_all" - [(set (match_operand:V 0 "register_operand" "=v") + [(set (match_operand:V 0 "register_operand" "=v,v") (unspec:V - [(match_operand:V 1 "register_operand" "0") - (match_operand:V 2 "register_operand" "v") - (match_operand:V 3 "bcst_vector_operand" "vmBr") + [(match_operand:V 1 "register_operand" "0,0") + (match_operand:V 2 "register_operand" "v,v") + (match_operand:V 3 "bcst_vector_operand" "vBr,m") (match_operand:SI 4 "const_0_to_255_operand")] UNSPEC_VTERNLOG))] - "TARGET_AVX512F + "(<MODE_SIZE> == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) /* Disallow embeded broadcast for vector HFmode since it's not real AVX512FP16 instruction. */ && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) >= 4 || GET_CODE (operands[3]) != VEC_DUPLICATE)" - "vpternlog<ternlogsuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}" +{ + if (TARGET_AVX512VL) + return "vpternlog<ternlogsuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}"; + else + return "vpternlog<ternlogsuffix>\t{%4, %g3, %g2, %g0|%g0, %g2, %g3, %4}"; +} [(set_attr "type" "sselog") (set_attr "prefix" "evex") - (set_attr "mode" "<sseinsnmode>")]) + (set (attr "mode") + (if_then_else (match_test "TARGET_AVX512VL") + (const_string "<sseinsnmode>") + (const_string "XI"))) + (set (attr "enabled") + (if_then_else (eq_attr "alternative" "1") + (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL") + (const_string "*")))]) ;; There must be lots of other combinations like ;; @@ -12641,7 +12654,8 @@ (any_logic2:V (match_operand:V 3 "regmem_or_bitnot_regmem_operand") (match_operand:V 4 "regmem_or_bitnot_regmem_operand"))))] - "(<MODE_SIZE> == 64 || TARGET_AVX512VL) + "(<MODE_SIZE> == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && ix86_pre_reload_split () && (rtx_equal_p (STRIP_UNARY (operands[1]), STRIP_UNARY (operands[4])) @@ -12725,7 +12739,8 @@ (match_operand:V 2 "regmem_or_bitnot_regmem_operand")) (match_operand:V 3 "regmem_or_bitnot_regmem_operand")) (match_operand:V 4 "regmem_or_bitnot_regmem_operand")))] - "(<MODE_SIZE> == 64 || TARGET_AVX512VL) + "(<MODE_SIZE> == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && ix86_pre_reload_split () && (rtx_equal_p (STRIP_UNARY (operands[1]), STRIP_UNARY (operands[4])) @@ -12808,7 +12823,8 @@ (match_operand:V 1 "regmem_or_bitnot_regmem_operand") (match_operand:V 2 "regmem_or_bitnot_regmem_operand")) (match_operand:V 3 "regmem_or_bitnot_regmem_operand")))] - "(<MODE_SIZE> == 64 || TARGET_AVX512VL) + "(<MODE_SIZE> == 64 || TARGET_AVX512VL + || (TARGET_AVX512F && !TARGET_PREFER_AVX256)) && ix86_pre_reload_split ()" "#" "&& 1" diff --git a/gcc/testsuite/gcc.target/i386/avx512f-copysign.c b/gcc/testsuite/gcc.target/i386/avx512f-copysign.c new file mode 100644 index 00000000000..51ca0284e6c --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512f-copysign.c @@ -0,0 +1,32 @@ +/* { dg-do compile } */ +/* { dg-options "-mavx512f -mno-avx512vl -mprefer-vector-width=512 -O2" } */ +/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$(?:216|228|0xd8|0xe4)," 5 } } */ + +double cs_df (double x, double y) +{ + return __builtin_copysign (x, y); +} + +float cs_sf (float x, float y) +{ + return __builtin_copysignf (x, y); +} + +typedef double __attribute__ ((vector_size (16))) v2df; +typedef double __attribute__ ((vector_size (32))) v4df; +typedef double __attribute__ ((vector_size (64))) v8df; + +v2df cs_v2df (v2df x, v2df y) +{ + return __builtin_ia32_copysignpd (x, y); +} + +v4df cs_v4df (v4df x, v4df y) +{ + return __builtin_ia32_copysignpd256 (x, y); +} + +v8df cs_v8df (v8df x, v8df y) +{ + return __builtin_ia32_copysignpd512 (x, y); +}
reply other threads:[~2023-06-21 6:03 UTC|newest] Thread overview: [no followups] expand[flat|nested] mbox.gz Atom feed
Reply instructions: You may reply publicly to this message via plain-text email using any one of the following methods: * Save the following mbox file, import it into your mail client, and reply-to-all from there: mbox Avoid top-posting and favor interleaved quoting: https://en.wikipedia.org/wiki/Posting_style#Interleaved_style * Reply using the --to, --cc, and --in-reply-to switches of git-send-email(1): git send-email \ --in-reply-to=20230621060324.D9C303858425@sourceware.org \ --to=jbeulich@gcc.gnu.org \ --cc=gcc-cvs@gcc.gnu.org \ /path/to/YOUR_REPLY https://kernel.org/pub/software/scm/git/docs/git-send-email.html * If your mail client supports setting the In-Reply-To header via mailto: links, try the mailto: linkBe sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).