public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
From: Jan Beulich <jbeulich@suse.com>
To: "gcc-patches@gcc.gnu.org" <gcc-patches@gcc.gnu.org>
Cc: Kirill Yukhin <kirill.yukhin@gmail.com>,
	Hongtao Liu <hongtao.liu@intel.com>
Subject: [PATCH v2] x86: make VPTERNLOG* usable on less than 512-bit operands with just AVX512F
Date: Fri, 16 Jun 2023 08:22:16 +0200	[thread overview]
Message-ID: <a342d677-867e-e5a2-dd56-b6ba784c1d50@suse.com> (raw)

There's no reason to constrain this to AVX512VL, unless instructed so by
-mprefer-vector-width=, as the wider operation is unusable for more
narrow operands only when the possible memory source is a non-broadcast
one. This way even the scalar copysign<mode>3 can benefit from the
operation being a single-insn one (leaving aside moves which the
compiler decides to insert for unclear reasons, and leaving aside the
fact that bcst_mem_operand() is too restrictive for broadcast to be
embedded right into VPTERNLOG*).

Along with this also request value duplication in
ix86_expand_copysign()'s call to ix86_build_signbit_mask(), eliminating
excess space allocation in .rodata.*, filled with zeros which are never
read.

gcc/

	* config/i386/i386-expand.cc (ix86_expand_copysign): Request
	value duplication by ix86_build_signbit_mask() when AVX512F and
	not HFmode.
	* config/i386/sse.md (*<avx512>_vternlog<mode>_all): Convert to
	2-alternative form. Adjust "mode" attribute. Add "enabled"
	attribute.
	(*<avx512>_vpternlog<mode>_1): Also permit when TARGET_AVX512F
	&& !TARGET_PREFER_AVX256.
	(*<avx512>_vpternlog<mode>_2): Likewise.
	(*<avx512>_vpternlog<mode>_3): Likewise.
---
I guess the underlying pattern, going along the lines of what
<mask_codefor>one_cmpl<mode>2<mask_name> uses, can be applied elsewhere
as well.

HFmode could use embedded broadcast too for copysign and alike, but that
would need to be V2HF -> V8HF (for which I don't think there are any
existing patterns).
---
v2: Respect -mprefer-vector-width=.

--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -2266,7 +2266,7 @@ ix86_expand_copysign (rtx operands[])
   else
     dest = NULL_RTX;
   op1 = lowpart_subreg (vmode, force_reg (mode, operands[2]), mode);
-  mask = ix86_build_signbit_mask (vmode, 0, 0);
+  mask = ix86_build_signbit_mask (vmode, TARGET_AVX512F && mode != HFmode, 0);
 
   if (CONST_DOUBLE_P (operands[1]))
     {
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -12597,11 +12597,11 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "*<avx512>_vternlog<mode>_all"
-  [(set (match_operand:V 0 "register_operand" "=v")
+  [(set (match_operand:V 0 "register_operand" "=v,v")
 	(unspec:V
-	  [(match_operand:V 1 "register_operand" "0")
-	   (match_operand:V 2 "register_operand" "v")
-	   (match_operand:V 3 "bcst_vector_operand" "vmBr")
+	  [(match_operand:V 1 "register_operand" "0,0")
+	   (match_operand:V 2 "register_operand" "v,v")
+	   (match_operand:V 3 "bcst_vector_operand" "vBr,m")
 	   (match_operand:SI 4 "const_0_to_255_operand")]
 	  UNSPEC_VTERNLOG))]
   "TARGET_AVX512F
@@ -12609,10 +12609,22 @@
    it's not real AVX512FP16 instruction.  */
   && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) >= 4
      || GET_CODE (operands[3]) != VEC_DUPLICATE)"
-  "vpternlog<ternlogsuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}"
+{
+  if (TARGET_AVX512VL)
+    return "vpternlog<ternlogsuffix>\t{%4, %3, %2, %0|%0, %2, %3, %4}";
+  else
+    return "vpternlog<ternlogsuffix>\t{%4, %g3, %g2, %g0|%g0, %g2, %g3, %4}";
+}
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
-   (set_attr "mode" "<sseinsnmode>")])
+   (set (attr "mode")
+        (if_then_else (match_test "TARGET_AVX512VL")
+		      (const_string "<sseinsnmode>")
+		      (const_string "XI")))
+   (set (attr "enabled")
+	(if_then_else (eq_attr "alternative" "1")
+		      (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
+		      (const_string "*")))])
 
 ;; There must be lots of other combinations like
 ;;
@@ -12641,7 +12653,8 @@
 	  (any_logic2:V
 	    (match_operand:V 3 "regmem_or_bitnot_regmem_operand")
 	    (match_operand:V 4 "regmem_or_bitnot_regmem_operand"))))]
-  "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
+  "(<MODE_SIZE> == 64 || TARGET_AVX512VL
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && ix86_pre_reload_split ()
    && (rtx_equal_p (STRIP_UNARY (operands[1]),
 		    STRIP_UNARY (operands[4]))
@@ -12725,7 +12738,8 @@
 	      (match_operand:V 2 "regmem_or_bitnot_regmem_operand"))
 	    (match_operand:V 3 "regmem_or_bitnot_regmem_operand"))
 	  (match_operand:V 4 "regmem_or_bitnot_regmem_operand")))]
-  "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
+  "(<MODE_SIZE> == 64 || TARGET_AVX512VL
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && ix86_pre_reload_split ()
    && (rtx_equal_p (STRIP_UNARY (operands[1]),
 		    STRIP_UNARY (operands[4]))
@@ -12808,7 +12822,8 @@
 	    (match_operand:V 1 "regmem_or_bitnot_regmem_operand")
 	    (match_operand:V 2 "regmem_or_bitnot_regmem_operand"))
 	  (match_operand:V 3 "regmem_or_bitnot_regmem_operand")))]
-  "(<MODE_SIZE> == 64 || TARGET_AVX512VL)
+  "(<MODE_SIZE> == 64 || TARGET_AVX512VL
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"

             reply	other threads:[~2023-06-16  6:22 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-06-16  6:22 Jan Beulich [this message]
2023-06-19  2:07 ` Liu, Hongtao
2023-06-19  7:09   ` Jan Beulich
2023-06-19  8:46     ` Hongtao Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a342d677-867e-e5a2-dd56-b6ba784c1d50@suse.com \
    --to=jbeulich@suse.com \
    --cc=gcc-patches@gcc.gnu.org \
    --cc=hongtao.liu@intel.com \
    --cc=kirill.yukhin@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).