From: Jan Beulich <jbeulich@suse.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Cc: Kirill Yukhin <kirill.yukhin@gmail.com>,
Hongtao Liu <hongtao.liu@intel.com>
Subject: [PATCH v3] x86: make VPTERNLOG* usable on less than 512-bit operands with just AVX512F
Date: Tue, 20 Jun 2023 09:06:57 +0200 [thread overview]
Message-ID: <169ca252-3828-b466-4d47-a8fe720ec4ef@suse.com> (raw)
There's no reason to constrain this to AVX512VL, unless instructed so by
-mprefer-vector-width=, as the wider operation is unusable for more
narrow operands only when the possible memory source is a non-broadcast
one. This way even the scalar copysign<mode>3 can benefit from the
operation being a single-insn one (leaving aside moves which the
compiler decides to insert for unclear reasons, and leaving aside the
fact that bcst_mem_operand() is too restrictive for broadcast to be
embedded right into VPTERNLOG*).
While there also bring *<avx512>_vternlog<mode>_all's in sync with that
of the three splitters.
Along with this also request value duplication in
ix86_expand_copysign()'s call to ix86_build_signbit_mask(), eliminating
excess space allocation in .rodata.*, filled with zeros which are never
read.
gcc/
* config/i386/i386-expand.cc (ix86_expand_copysign): Request
value duplication by ix86_build_signbit_mask() when AVX512F and
not HFmode.
* config/i386/sse.md (*<avx512>_vternlog<mode>_all): Convert to
2-alternative form. Adjust "mode" attribute. Add "enabled"
attribute.
(*<avx512>_vpternlog<mode>_1): Also permit when TARGET_AVX512F
&& !TARGET_PREFER_AVX256.
(*<avx512>_vpternlog<mode>_2): Likewise.
(*<avx512>_vpternlog<mode>_3): Likewise.
gcc/testsuite/
* gcc.target/i386/avx512f-copysign.c: New test.
---
I haven't been able to find documentation on the dejagnu(?) regex syntax
(?:...). With ordinary (...) failing (producing twice as many matches),
I could only derive this from other scan-assembler patterns.
I guess the underlying pattern, going along the lines of what
<mask_codefor>one_cmpl<mode>2<mask_name> uses, can be applied elsewhere
as well.
HFmode could use embedded broadcast too for copysign and alike, but that
would need to be V2HF -> V8HF (for which I don't think there are any
existing patterns).
---
v3: Adjust insn conditional as well. Add testcase.
v2: Respect -mprefer-vector-width=.
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -2266,7 +2266,7 @@ ix86_expand_copysign (rtx operands[])
else
dest = NULL_RTX;
op1 = lowpart_subreg (vmode, force_reg (mode, operands[2]), mode);
- mask = ix86_build_signbit_mask (vmode, 0, 0);
+ mask = ix86_build_signbit_mask (vmode, TARGET_AVX512F && mode != HFmode, 0);
if (CONST_DOUBLE_P (operands[1]))
{
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -12399,22 +12399,35 @@
(set_attr "mode" "<sseinsnmode>")])
(define_insn "*<avx512>_vternlog<mode>_all"
- [(set (match_operand:V 0 "register_operand" "=v")
+ [(set (match_operand:V 0 "register_operand" "=v,v")
(unspec:V
- [(match_operand:V 1 "register_operand" "0")
- (match_operand:V 2 "register_operand" "v")
- (match_operand:V 3 "bcst_vector_operand" "vmBr")
+ [(match_operand:V 1 "register_operand" "0,0")
+ (match_operand:V 2 "register_operand" "v,v")
+ (match_operand:V 3 "bcst_vector_operand" "vBr,m")
(match_operand:SI 4 "const_0_to_255_operand")]
UNSPEC_VTERNLOG))]
- "TARGET_AVX512F
+ "(<MODE_SIZE> == 64 || TARGET_AVX512VL
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
/* Disallow embeded broadcast for vector HFmode since
it's not real AVX512FP16 instruction. */
&& (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) >= 4
|| GET_CODE (operands[3]) != VEC_DUPLICATE)"
- "vpternlog<ternlogsuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}"
+{
+ if (TARGET_AVX512VL)
+ return "vpternlog<ternlogsuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}";
+ else
+ return "vpternlog<ternlogsuffix>\t{%4, %g3, %g2, %g0|%g0, %g2, %g3, %4}";
+}
[(set_attr "type" "sselog")
(set_attr "prefix" "evex")
- (set_attr "mode" "<sseinsnmode>")])
+ (set (attr "mode")
+ (if_then_else (match_test "TARGET_AVX512VL")
+ (const_string "<sseinsnmode>")
+ (const_string "XI")))
+ (set (attr "enabled")
+ (if_then_else (eq_attr "alternative" "1")
+ (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
+ (const_string "*")))])
;; There must be lots of other combinations like
;;
@@ -12443,7 +12456,8 @@
(any_logic2:V
(match_operand:V 3 "regmem_or_bitnot_regmem_operand")
(match_operand:V 4 "regmem_or_bitnot_regmem_operand"))))]
- "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
+ "(<MODE_SIZE> == 64 || TARGET_AVX512VL
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& ix86_pre_reload_split ()
&& (rtx_equal_p (STRIP_UNARY (operands[1]),
STRIP_UNARY (operands[4]))
@@ -12527,7 +12541,8 @@
(match_operand:V 2 "regmem_or_bitnot_regmem_operand"))
(match_operand:V 3 "regmem_or_bitnot_regmem_operand"))
(match_operand:V 4 "regmem_or_bitnot_regmem_operand")))]
- "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
+ "(<MODE_SIZE> == 64 || TARGET_AVX512VL
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& ix86_pre_reload_split ()
&& (rtx_equal_p (STRIP_UNARY (operands[1]),
STRIP_UNARY (operands[4]))
@@ -12610,7 +12625,8 @@
(match_operand:V 1 "regmem_or_bitnot_regmem_operand")
(match_operand:V 2 "regmem_or_bitnot_regmem_operand"))
(match_operand:V 3 "regmem_or_bitnot_regmem_operand")))]
- "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
+ "(<MODE_SIZE> == 64 || TARGET_AVX512VL
+ || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
&& ix86_pre_reload_split ()"
"#"
"&& 1"
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/avx512f-copysign.c
@@ -0,0 +1,32 @@
+/* { dg-do compile } */
+/* { dg-options "-mavx512f -mno-avx512vl -O2" } */
+/* { dg-final { scan-assembler-times "vpternlog\[dq\]\[ \\t\]+\\\$(?:216|228|0xd8|0xe4)," 5 } } */
+
+double cs_df (double x, double y)
+{
+ return __builtin_copysign (x, y);
+}
+
+float cs_sf (float x, float y)
+{
+ return __builtin_copysignf (x, y);
+}
+
+typedef double __attribute__ ((vector_size (16))) v2df;
+typedef double __attribute__ ((vector_size (32))) v4df;
+typedef double __attribute__ ((vector_size (64))) v8df;
+
+v2df cs_v2df (v2df x, v2df y)
+{
+ return __builtin_ia32_copysignpd (x, y);
+}
+
+v4df cs_v4df (v4df x, v4df y)
+{
+ return __builtin_ia32_copysignpd256 (x, y);
+}
+
+v8df cs_v8df (v8df x, v8df y)
+{
+ return __builtin_ia32_copysignpd512 (x, y);
+}
next reply other threads:[~2023-06-20 7:07 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-20 7:06 Jan Beulich [this message]
2023-06-20 8:33 ` Hongtao Liu
2023-06-20 9:03 ` Jan Beulich
2023-06-20 9:34 ` Hongtao Liu
2023-06-27 5:11 ` Hongtao Liu
2023-07-04 15:29 ` Jan Beulich
2023-07-05 1:15 ` Liu, Hongtao
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=169ca252-3828-b466-4d47-a8fe720ec4ef@suse.com \
--to=jbeulich@suse.com \
--cc=gcc-patches@gcc.gnu.org \
--cc=hongtao.liu@intel.com \
--cc=kirill.yukhin@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).