public inbox for gcc-patches@gcc.gnu.org
 help / color / mirror / Atom feed
* [PATCH] i386: Fix some -mavx512vl -mno-avx512bw bugs [PR99321]
@ 2021-03-05 20:50 Jakub Jelinek
  2021-03-06 10:19 ` Uros Bizjak
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2021-03-05 20:50 UTC (permalink / raw)
  To: Uros Bizjak, Jeff Law, Kirill Yukhin; +Cc: gcc-patches

Hi!

As I wrote in the mail with the previous PR99321 fix, we have various
bugs where we emit instructions that need avx512bw and avx512vl
ISAs when compiling with -mavx512vl -mno-avx512bw.

Without the following patch,
/* PR target/99321 */
/* Would need some effective target for GNU as that supports -march=+noavx512bw etc. */
/* { dg-do assemble } */
/* { dg-options "-O2 -mavx512vl -mno-avx512bw -Wa,-march=+noavx512bw" } */

#include <x86intrin.h>

typedef unsigned char V1 __attribute__((vector_size (16)));
typedef unsigned char V2 __attribute__((vector_size (32)));
typedef unsigned short V3 __attribute__((vector_size (16)));
typedef unsigned short V4 __attribute__((vector_size (32)));

void f1 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
void f2 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
void f3 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
void f4 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
void f5 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
void f6 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
void f7 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
void f8 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
void f9 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a *= b; __asm ("" : : "v" (a)); }
void f10 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a *= b; __asm ("" : : "v" (a)); }
void f11 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_min_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f12 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_min_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
void f13 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_min_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f14 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_min_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
void f15 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_min_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f16 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_min_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
void f17 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_min_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f18 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_min_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
void f19 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_max_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f20 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_max_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
void f21 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_max_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f22 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_max_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
void f23 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_max_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f24 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_max_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
void f25 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_max_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
void f26 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_max_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
test fails with:
/tmp/ccW4PsfG.s: Assembler messages:
/tmp/ccW4PsfG.s:9: Error: unsupported instruction `vpaddb'
/tmp/ccW4PsfG.s:20: Error: unsupported instruction `vpaddb'
/tmp/ccW4PsfG.s:31: Error: unsupported instruction `vpaddw'
/tmp/ccW4PsfG.s:42: Error: unsupported instruction `vpaddw'
/tmp/ccW4PsfG.s:53: Error: unsupported instruction `vpsubb'
/tmp/ccW4PsfG.s:64: Error: unsupported instruction `vpsubb'
/tmp/ccW4PsfG.s:75: Error: unsupported instruction `vpsubw'
/tmp/ccW4PsfG.s:86: Error: unsupported instruction `vpsubw'
/tmp/ccW4PsfG.s:97: Error: unsupported instruction `vpmullw'
/tmp/ccW4PsfG.s:108: Error: unsupported instruction `vpmullw'
/tmp/ccW4PsfG.s:133: Error: unsupported instruction `vpminub'
/tmp/ccW4PsfG.s:144: Error: unsupported instruction `vpminuw'
/tmp/ccW4PsfG.s:155: Error: unsupported instruction `vpminuw'
/tmp/ccW4PsfG.s:166: Error: unsupported instruction `vpminsb'
/tmp/ccW4PsfG.s:177: Error: unsupported instruction `vpminsb'
/tmp/ccW4PsfG.s:202: Error: unsupported instruction `vpminsw'
/tmp/ccW4PsfG.s:227: Error: unsupported instruction `vpmaxub'
/tmp/ccW4PsfG.s:238: Error: unsupported instruction `vpmaxuw'
/tmp/ccW4PsfG.s:249: Error: unsupported instruction `vpmaxuw'
/tmp/ccW4PsfG.s:260: Error: unsupported instruction `vpmaxsb'
/tmp/ccW4PsfG.s:271: Error: unsupported instruction `vpmaxsb'
/tmp/ccW4PsfG.s:296: Error: unsupported instruction `vpmaxsw'

We already have Yw constraint which is equivalent to v for
-mavx512bw -mavx512vl and to nothing otherwise, so for
the instructions that need both we need to use xYw and
v for modes that don't need that.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

Do we want such a testcase in the testsuite?  I guess we'd need to
add an effective target whether -Wa,-march=+noavx512bw can be used
and also add effective target avx512vl.
And I'll need to fix a lot of other instructions that have the same problem.

2021-03-05  Jakub Jelinek  <jakub@redhat.com>

	PR target/99321
	* config/i386/sse.md (v_xYw): New define_mode_attr.
	(*<insn><mode>3, *mul<mode>3<mask_name>, *avx2_<code><mode>3,
	*sse4_1_<code><mode>3<mask_name>): Use <v_xYw> instead of v
	in constraints.

--- gcc/config/i386/sse.md.jj	2021-02-10 19:50:04.466086143 +0100
+++ gcc/config/i386/sse.md	2021-03-05 19:25:57.540752452 +0100
@@ -560,6 +560,14 @@ (define_mode_attr avx512
    (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
    (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
 
+(define_mode_attr v_xYw
+  [(V16QI "xYw") (V32QI "xYw") (V64QI "v")
+   (V8HI "xYw") (V16HI "xYw") (V32HI "v")
+   (V4SI "v") (V8SI "v") (V16SI "v")
+   (V2DI "v") (V4DI "v") (V8DI "v")
+   (V4SF "v") (V8SF "v") (V16SF "v")
+   (V2DF "v") (V4DF "v") (V8DF "v")])
+
 (define_mode_attr sse2_avx_avx512f
   [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
    (V8HI  "avx512vl") (V16HI  "avx512vl") (V32HI "avx512bw")
@@ -11677,10 +11685,10 @@ (define_expand "<insn><mode>3_mask"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*<insn><mode>3"
-  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
+  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,<v_xYw>")
 	(plusminus:VI_AVX2
-	  (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,v")
-	  (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,vmBr")))]
+	  (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,<v_xYw>")
+	  (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,<v_xYw>mBr")))]
   "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
    p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -11790,9 +11798,9 @@ (define_expand "mul<mode>3<mask_name>"
   "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
 
 (define_insn "*mul<mode>3<mask_name>"
-  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
-	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
-		       (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_xYw>")
+	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_xYw>")
+		       (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_xYw>m")))]
   "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
    && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
@@ -12618,10 +12626,10 @@ (define_expand "<code><mode>3"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*avx2_<code><mode>3"
-  [(set (match_operand:VI124_256 0 "register_operand" "=v")
+  [(set (match_operand:VI124_256 0 "register_operand" "=<v_xYw>")
 	(maxmin:VI124_256
-	  (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
-	  (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
+	  (match_operand:VI124_256 1 "nonimmediate_operand" "%<v_xYw>")
+	  (match_operand:VI124_256 2 "nonimmediate_operand" "<v_xYw>m")))]
   "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseiadd")
@@ -12745,10 +12753,10 @@ (define_expand "<code><mode>3"
 })
 
 (define_insn "*sse4_1_<code><mode>3<mask_name>"
-  [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,<v_xYw>")
 	(smaxmin:VI14_128
-	  (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
-	  (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
+	  (match_operand:VI14_128 1 "vector_operand" "%0,0,<v_xYw>")
+	  (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,<v_xYw>m")))]
   "TARGET_SSE4_1
    && <mask_mode512bit_condition>
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
@@ -12830,10 +12838,10 @@ (define_expand "<code><mode>3"
 })
 
 (define_insn "*sse4_1_<code><mode>3<mask_name>"
-  [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,<v_xYw>")
 	(umaxmin:VI24_128
-	  (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
-	  (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
+	  (match_operand:VI24_128 1 "vector_operand" "%0,0,<v_xYw>")
+	  (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,<v_xYw>m")))]
   "TARGET_SSE4_1
    && <mask_mode512bit_condition>
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"

	Jakub


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] i386: Fix some -mavx512vl -mno-avx512bw bugs [PR99321]
  2021-03-05 20:50 [PATCH] i386: Fix some -mavx512vl -mno-avx512bw bugs [PR99321] Jakub Jelinek
@ 2021-03-06 10:19 ` Uros Bizjak
  2021-03-06 10:34   ` Jakub Jelinek
  0 siblings, 1 reply; 6+ messages in thread
From: Uros Bizjak @ 2021-03-06 10:19 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: Jeff Law, Kirill Yukhin, gcc-patches

On Fri, Mar 5, 2021 at 9:51 PM Jakub Jelinek <jakub@redhat.com> wrote:
>
> Hi!
>
> As I wrote in the mail with the previous PR99321 fix, we have various
> bugs where we emit instructions that need avx512bw and avx512vl
> ISAs when compiling with -mavx512vl -mno-avx512bw.
>
> Without the following patch,
> /* PR target/99321 */
> /* Would need some effective target for GNU as that supports -march=+noavx512bw etc. */
> /* { dg-do assemble } */
> /* { dg-options "-O2 -mavx512vl -mno-avx512bw -Wa,-march=+noavx512bw" } */
>
> #include <x86intrin.h>
>
> typedef unsigned char V1 __attribute__((vector_size (16)));
> typedef unsigned char V2 __attribute__((vector_size (32)));
> typedef unsigned short V3 __attribute__((vector_size (16)));
> typedef unsigned short V4 __attribute__((vector_size (32)));
>
> void f1 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
> void f2 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
> void f3 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
> void f4 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
> void f5 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
> void f6 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
> void f7 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
> void f8 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
> void f9 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a *= b; __asm ("" : : "v" (a)); }
> void f10 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a *= b; __asm ("" : : "v" (a)); }
> void f11 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_min_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> void f12 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_min_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
> void f13 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_min_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> void f14 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_min_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
> void f15 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_min_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> void f16 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_min_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
> void f17 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_min_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> void f18 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_min_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
> void f19 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_max_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> void f20 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_max_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
> void f21 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_max_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> void f22 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_max_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
> void f23 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_max_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> void f24 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_max_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
> void f25 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_max_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> void f26 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_max_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
> test fails with:
> /tmp/ccW4PsfG.s: Assembler messages:
> /tmp/ccW4PsfG.s:9: Error: unsupported instruction `vpaddb'
> /tmp/ccW4PsfG.s:20: Error: unsupported instruction `vpaddb'
> /tmp/ccW4PsfG.s:31: Error: unsupported instruction `vpaddw'
> /tmp/ccW4PsfG.s:42: Error: unsupported instruction `vpaddw'
> /tmp/ccW4PsfG.s:53: Error: unsupported instruction `vpsubb'
> /tmp/ccW4PsfG.s:64: Error: unsupported instruction `vpsubb'
> /tmp/ccW4PsfG.s:75: Error: unsupported instruction `vpsubw'
> /tmp/ccW4PsfG.s:86: Error: unsupported instruction `vpsubw'
> /tmp/ccW4PsfG.s:97: Error: unsupported instruction `vpmullw'
> /tmp/ccW4PsfG.s:108: Error: unsupported instruction `vpmullw'
> /tmp/ccW4PsfG.s:133: Error: unsupported instruction `vpminub'
> /tmp/ccW4PsfG.s:144: Error: unsupported instruction `vpminuw'
> /tmp/ccW4PsfG.s:155: Error: unsupported instruction `vpminuw'
> /tmp/ccW4PsfG.s:166: Error: unsupported instruction `vpminsb'
> /tmp/ccW4PsfG.s:177: Error: unsupported instruction `vpminsb'
> /tmp/ccW4PsfG.s:202: Error: unsupported instruction `vpminsw'
> /tmp/ccW4PsfG.s:227: Error: unsupported instruction `vpmaxub'
> /tmp/ccW4PsfG.s:238: Error: unsupported instruction `vpmaxuw'
> /tmp/ccW4PsfG.s:249: Error: unsupported instruction `vpmaxuw'
> /tmp/ccW4PsfG.s:260: Error: unsupported instruction `vpmaxsb'
> /tmp/ccW4PsfG.s:271: Error: unsupported instruction `vpmaxsb'
> /tmp/ccW4PsfG.s:296: Error: unsupported instruction `vpmaxsw'
>
> We already have Yw constraint which is equivalent to v for
> -mavx512bw -mavx512vl and to nothing otherwise, so for
> the instructions that need both we need to use xYw and
> v for modes that don't need that.

Perhaps we should introduce another Y... constraint to return correct
SSE regset based on TARGET_... flags, instead of using compound xYw? I
think that introducing new constraint is the established approach we
should follow. The new mode_attr looks OK to me.

Uros.

> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> Do we want such a testcase in the testsuite?  I guess we'd need to
> add an effective target whether -Wa,-march=+noavx512bw can be used
> and also add effective target avx512vl.
> And I'll need to fix a lot of other instructions that have the same problem.
>
> 2021-03-05  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/99321
>         * config/i386/sse.md (v_xYw): New define_mode_attr.
>         (*<insn><mode>3, *mul<mode>3<mask_name>, *avx2_<code><mode>3,
>         *sse4_1_<code><mode>3<mask_name>): Use <v_xYw> instead of v
>         in constraints.
>
> --- gcc/config/i386/sse.md.jj   2021-02-10 19:50:04.466086143 +0100
> +++ gcc/config/i386/sse.md      2021-03-05 19:25:57.540752452 +0100
> @@ -560,6 +560,14 @@ (define_mode_attr avx512
>     (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
>     (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
>
> +(define_mode_attr v_xYw
> +  [(V16QI "xYw") (V32QI "xYw") (V64QI "v")
> +   (V8HI "xYw") (V16HI "xYw") (V32HI "v")
> +   (V4SI "v") (V8SI "v") (V16SI "v")
> +   (V2DI "v") (V4DI "v") (V8DI "v")
> +   (V4SF "v") (V8SF "v") (V16SF "v")
> +   (V2DF "v") (V4DF "v") (V8DF "v")])
> +
>  (define_mode_attr sse2_avx_avx512f
>    [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
>     (V8HI  "avx512vl") (V16HI  "avx512vl") (V32HI "avx512bw")
> @@ -11677,10 +11685,10 @@ (define_expand "<insn><mode>3_mask"
>    "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
>
>  (define_insn "*<insn><mode>3"
> -  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
> +  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,<v_xYw>")
>         (plusminus:VI_AVX2
> -         (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,v")
> -         (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,vmBr")))]
> +         (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,<v_xYw>")
> +         (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,<v_xYw>mBr")))]
>    "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
>    "@
>     p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
> @@ -11790,9 +11798,9 @@ (define_expand "mul<mode>3<mask_name>"
>    "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
>
>  (define_insn "*mul<mode>3<mask_name>"
> -  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
> -       (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
> -                      (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
> +  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_xYw>")
> +       (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_xYw>")
> +                      (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_xYw>m")))]
>    "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
>     && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
>    "@
> @@ -12618,10 +12626,10 @@ (define_expand "<code><mode>3"
>    "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
>
>  (define_insn "*avx2_<code><mode>3"
> -  [(set (match_operand:VI124_256 0 "register_operand" "=v")
> +  [(set (match_operand:VI124_256 0 "register_operand" "=<v_xYw>")
>         (maxmin:VI124_256
> -         (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
> -         (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
> +         (match_operand:VI124_256 1 "nonimmediate_operand" "%<v_xYw>")
> +         (match_operand:VI124_256 2 "nonimmediate_operand" "<v_xYw>m")))]
>    "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
>    "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
>    [(set_attr "type" "sseiadd")
> @@ -12745,10 +12753,10 @@ (define_expand "<code><mode>3"
>  })
>
>  (define_insn "*sse4_1_<code><mode>3<mask_name>"
> -  [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
> +  [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,<v_xYw>")
>         (smaxmin:VI14_128
> -         (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
> -         (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
> +         (match_operand:VI14_128 1 "vector_operand" "%0,0,<v_xYw>")
> +         (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,<v_xYw>m")))]
>    "TARGET_SSE4_1
>     && <mask_mode512bit_condition>
>     && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
> @@ -12830,10 +12838,10 @@ (define_expand "<code><mode>3"
>  })
>
>  (define_insn "*sse4_1_<code><mode>3<mask_name>"
> -  [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
> +  [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,<v_xYw>")
>         (umaxmin:VI24_128
> -         (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
> -         (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
> +         (match_operand:VI24_128 1 "vector_operand" "%0,0,<v_xYw>")
> +         (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,<v_xYw>m")))]
>    "TARGET_SSE4_1
>     && <mask_mode512bit_condition>
>     && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
>
>         Jakub
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] i386: Fix some -mavx512vl -mno-avx512bw bugs [PR99321]
  2021-03-06 10:19 ` Uros Bizjak
@ 2021-03-06 10:34   ` Jakub Jelinek
  2021-03-06 12:39     ` Uros Bizjak
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2021-03-06 10:34 UTC (permalink / raw)
  To: Uros Bizjak, hjl.tools; +Cc: Jeff Law, Kirill Yukhin, gcc-patches

On Sat, Mar 06, 2021 at 11:19:15AM +0100, Uros Bizjak wrote:
> > We already have Yw constraint which is equivalent to v for
> > -mavx512bw -mavx512vl and to nothing otherwise, so for
> > the instructions that need both we need to use xYw and
> > v for modes that don't need that.
> 
> Perhaps we should introduce another Y... constraint to return correct
> SSE regset based on TARGET_... flags, instead of using compound xYw? I
> think that introducing new constraint is the established approach we
> should follow. The new mode_attr looks OK to me.

One possibility would be to change the meaning of Yw, because it
is an internal undocumented constraint and all uses in GCC currently use it
as xYw:
constraints.md:(define_register_constraint "Yw"
mmx.md:  [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
mmx.md:          (match_operand:V4HI 1 "register_mmxmem_operand" "ym,xYw")
mmx.md:  [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
mmx.md:	    (match_operand:SI 1 "register_operand" "0,xYw"))))]
Would that be ok?

If not, I'll add
(define_register_constraint "Yl"
 "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
 "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target, otherwise any SSE register.")

	Jakub


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] i386: Fix some -mavx512vl -mno-avx512bw bugs [PR99321]
  2021-03-06 10:34   ` Jakub Jelinek
@ 2021-03-06 12:39     ` Uros Bizjak
  2021-03-07  8:40       ` [PATCH] i386, v2: " Jakub Jelinek
  0 siblings, 1 reply; 6+ messages in thread
From: Uros Bizjak @ 2021-03-06 12:39 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: H. J. Lu, Jeff Law, Kirill Yukhin, gcc-patches

On Sat, Mar 6, 2021 at 11:34 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Sat, Mar 06, 2021 at 11:19:15AM +0100, Uros Bizjak wrote:
> > > We already have Yw constraint which is equivalent to v for
> > > -mavx512bw -mavx512vl and to nothing otherwise, so for
> > > the instructions that need both we need to use xYw and
> > > v for modes that don't need that.
> >
> > Perhaps we should introduce another Y... constraint to return correct
> > SSE regset based on TARGET_... flags, instead of using compound xYw? I
> > think that introducing new constraint is the established approach we
> > should follow. The new mode_attr looks OK to me.
>
> One possibility would be to change the meaning of Yw, because it
> is an internal undocumented constraint and all uses in GCC currently use it
> as xYw:
> constraints.md:(define_register_constraint "Yw"
> mmx.md:  [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
> mmx.md:          (match_operand:V4HI 1 "register_mmxmem_operand" "ym,xYw")
> mmx.md:  [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
> mmx.md:     (match_operand:SI 1 "register_operand" "0,xYw"))))]
> Would that be ok?

Yes, this is an excellent idea.

Uros.

> If not, I'll add
> (define_register_constraint "Yl"
>  "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
>  "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target, otherwise any SSE register.")
>
>         Jakub
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH] i386, v2: Fix some -mavx512vl -mno-avx512bw bugs [PR99321]
  2021-03-06 12:39     ` Uros Bizjak
@ 2021-03-07  8:40       ` Jakub Jelinek
  2021-03-07  9:07         ` Uros Bizjak
  0 siblings, 1 reply; 6+ messages in thread
From: Jakub Jelinek @ 2021-03-07  8:40 UTC (permalink / raw)
  To: Uros Bizjak; +Cc: gcc-patches, Jeff Law

On Sat, Mar 06, 2021 at 01:39:23PM +0100, Uros Bizjak via Gcc-patches wrote:
> > One possibility would be to change the meaning of Yw, because it
> > is an internal undocumented constraint and all uses in GCC currently use it
> > as xYw:
> > constraints.md:(define_register_constraint "Yw"
> > mmx.md:  [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
> > mmx.md:          (match_operand:V4HI 1 "register_mmxmem_operand" "ym,xYw")
> > mmx.md:  [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
> > mmx.md:     (match_operand:SI 1 "register_operand" "0,xYw"))))]
> > Would that be ok?
> 
> Yes, this is an excellent idea.

Ok, here is the full patch, now including a testcase.
Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2021-03-07  Jakub Jelinek  <jakub@redhat.com>

	PR target/99321
	* config/i386/constraints.md (Yw): Use SSE_REGS if TARGET_SSE
	but TARGET_AVX512BW or TARGET_AVX512VL is not set.  Adjust description
	and comment.
	* config/i386/sse.md (v_Yw): New define_mode_attr.
	(*<insn><mode>3, *mul<mode>3<mask_name>, *avx2_<code><mode>3,
	*sse4_1_<code><mode>3<mask_name>): Use <v_Yw> instead of v
	in constraints.
	* config/i386/mmx.md (mmx_pshufw_1, *vec_dupv4hi): Use Yw instead of
	xYw in constraints.

	* lib/target-supports.exp
	(check_effective_target_assembler_march_noavx512bw): New effective
	target.
	* gcc.target/i386/avx512vl-pr99321-1.c: New test.

--- gcc/config/i386/constraints.md.jj	2021-01-04 10:25:45.116162680 +0100
+++ gcc/config/i386/constraints.md	2021-03-06 13:47:38.950644696 +0100
@@ -110,7 +110,7 @@ (define_register_constraint "v" "TARGET_
 ;;  v	any EVEX encodable SSE register for AVX512VL target,
 ;;	otherwise any SSE register
 ;;  w	any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL
-;;	target.
+;;	target, otherwise any SSE register.
 
 (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
  "First SSE register (@code{%xmm0}).")
@@ -148,8 +148,8 @@ (define_register_constraint "Yv"
  "@internal For AVX512VL, any EVEX encodable SSE register (@code{%xmm0-%xmm31}), otherwise any SSE register.")
 
 (define_register_constraint "Yw"
- "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : NO_REGS"
- "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target.")
+ "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
+ "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target, otherwise any SSE register.")
 
 ;; We use the B prefix to denote any number of internal operands:
 ;;  f  FLAGS_REG
--- gcc/config/i386/sse.md.jj	2021-03-05 21:51:33.728349881 +0100
+++ gcc/config/i386/sse.md	2021-03-06 13:48:13.283261323 +0100
@@ -560,6 +560,14 @@ (define_mode_attr avx512
    (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
    (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
 
+(define_mode_attr v_Yw
+  [(V16QI "Yw") (V32QI "Yw") (V64QI "v")
+   (V8HI "Yw") (V16HI "Yw") (V32HI "v")
+   (V4SI "v") (V8SI "v") (V16SI "v")
+   (V2DI "v") (V4DI "v") (V8DI "v")
+   (V4SF "v") (V8SF "v") (V16SF "v")
+   (V2DF "v") (V4DF "v") (V8DF "v")])
+
 (define_mode_attr sse2_avx_avx512f
   [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
    (V8HI  "avx512vl") (V16HI  "avx512vl") (V32HI "avx512bw")
@@ -11677,10 +11685,10 @@ (define_expand "<insn><mode>3_mask"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*<insn><mode>3"
-  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
+  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,<v_Yw>")
 	(plusminus:VI_AVX2
-	  (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,v")
-	  (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,vmBr")))]
+	  (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,<v_Yw>")
+	  (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,<v_Yw>mBr")))]
   "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
   "@
    p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -11790,9 +11798,9 @@ (define_expand "mul<mode>3<mask_name>"
   "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
 
 (define_insn "*mul<mode>3<mask_name>"
-  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
-	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
-		       (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
+  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
+	(mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>")
+		       (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))]
   "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
    && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
   "@
@@ -12618,10 +12626,10 @@ (define_expand "<code><mode>3"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*avx2_<code><mode>3"
-  [(set (match_operand:VI124_256 0 "register_operand" "=v")
+  [(set (match_operand:VI124_256 0 "register_operand" "=<v_Yw>")
 	(maxmin:VI124_256
-	  (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
-	  (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
+	  (match_operand:VI124_256 1 "nonimmediate_operand" "%<v_Yw>")
+	  (match_operand:VI124_256 2 "nonimmediate_operand" "<v_Yw>m")))]
   "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sseiadd")
@@ -12745,10 +12753,10 @@ (define_expand "<code><mode>3"
 })
 
 (define_insn "*sse4_1_<code><mode>3<mask_name>"
-  [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,<v_Yw>")
 	(smaxmin:VI14_128
-	  (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
-	  (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
+	  (match_operand:VI14_128 1 "vector_operand" "%0,0,<v_Yw>")
+	  (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
   "TARGET_SSE4_1
    && <mask_mode512bit_condition>
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
@@ -12830,10 +12838,10 @@ (define_expand "<code><mode>3"
 })
 
 (define_insn "*sse4_1_<code><mode>3<mask_name>"
-  [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
+  [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,<v_Yw>")
 	(umaxmin:VI24_128
-	  (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
-	  (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
+	  (match_operand:VI24_128 1 "vector_operand" "%0,0,<v_Yw>")
+	  (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
   "TARGET_SSE4_1
    && <mask_mode512bit_condition>
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
--- gcc/config/i386/mmx.md.jj	2021-02-16 08:57:21.151962030 +0100
+++ gcc/config/i386/mmx.md	2021-03-06 13:48:47.953874141 +0100
@@ -2021,9 +2021,9 @@ (define_expand "mmx_pshufw"
 })
 
 (define_insn "mmx_pshufw_1"
-  [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
         (vec_select:V4HI
-          (match_operand:V4HI 1 "register_mmxmem_operand" "ym,xYw")
+	  (match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yw")
           (parallel [(match_operand 2 "const_0_to_3_operand")
                      (match_operand 3 "const_0_to_3_operand")
                      (match_operand 4 "const_0_to_3_operand")
@@ -2105,10 +2105,10 @@ (define_insn "mmx_pswapdv2si2"
    (set_attr "mode" "DI,TI")])
 
 (define_insn "*vec_dupv4hi"
-  [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
+  [(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
 	(vec_duplicate:V4HI
 	  (truncate:HI
-	    (match_operand:SI 1 "register_operand" "0,xYw"))))]
+	    (match_operand:SI 1 "register_operand" "0,Yw"))))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && (TARGET_SSE || TARGET_3DNOW_A)"
   "@
--- gcc/testsuite/lib/target-supports.exp.jj	2021-03-02 18:20:07.988674666 +0100
+++ gcc/testsuite/lib/target-supports.exp	2021-03-06 15:58:01.587771549 +0100
@@ -8945,6 +8945,16 @@ proc check_effective_target_avx512bw { }
     } "-mavx512bw" ]
 }
 
+# Return 1 if -Wa,-march=+noavx512bw is supported.
+proc check_effective_target_assembler_march_noavx512bw {} {
+    if { [istarget i?86*-*-*] || [istarget x86_64*-*-*] } {
+	return [check_no_compiler_messages assembler_march_noavx512bw object {
+	    void foo (void) {}
+	} "-mno-avx512bw -Wa,-march=+noavx512bw"]
+    }
+    return 0
+}
+
 # Return 1 if avx512vp2intersect instructions can be compiled.
 proc check_effective_target_avx512vp2intersect { } {
     return [check_no_compiler_messages avx512vp2intersect object {
--- gcc/testsuite/gcc.target/i386/avx512vl-pr99321-1.c.jj	2021-03-06 16:00:20.862232850 +0100
+++ gcc/testsuite/gcc.target/i386/avx512vl-pr99321-1.c	2021-03-06 16:00:03.756421839 +0100
@@ -0,0 +1,39 @@
+/* PR target/99321 */
+/* { dg-do assemble { target lp64 } } */
+/* { dg-require-effective-target avx512vl } */
+/* { dg-require-effective-target assembler_march_noavx512bw } */
+/* { dg-options "-O2 -mavx512vl -mno-avx512bw -Wa,-march=+noavx512bw" } */
+
+#include <x86intrin.h>
+
+typedef unsigned char V1 __attribute__((vector_size (16)));
+typedef unsigned char V2 __attribute__((vector_size (32)));
+typedef unsigned short V3 __attribute__((vector_size (16)));
+typedef unsigned short V4 __attribute__((vector_size (32)));
+
+void f1 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
+void f2 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
+void f3 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
+void f4 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
+void f5 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
+void f6 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
+void f7 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
+void f8 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
+void f9 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a *= b; __asm ("" : : "v" (a)); }
+void f10 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a *= b; __asm ("" : : "v" (a)); }
+void f11 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_min_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f12 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_min_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f13 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_min_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f14 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_min_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f15 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_min_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f16 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_min_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f17 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_min_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f18 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_min_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f19 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_max_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f20 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_max_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f21 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_max_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f22 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_max_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f23 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_max_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f24 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_max_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
+void f25 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_max_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
+void f26 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_max_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }


	Jakub


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] i386, v2: Fix some -mavx512vl -mno-avx512bw bugs [PR99321]
  2021-03-07  8:40       ` [PATCH] i386, v2: " Jakub Jelinek
@ 2021-03-07  9:07         ` Uros Bizjak
  0 siblings, 0 replies; 6+ messages in thread
From: Uros Bizjak @ 2021-03-07  9:07 UTC (permalink / raw)
  To: Jakub Jelinek; +Cc: gcc-patches, Jeff Law

On Sun, Mar 7, 2021 at 9:41 AM Jakub Jelinek <jakub@redhat.com> wrote:
>
> On Sat, Mar 06, 2021 at 01:39:23PM +0100, Uros Bizjak via Gcc-patches wrote:
> > > One possibility would be to change the meaning of Yw, because it
> > > is an internal undocumented constraint and all uses in GCC currently use it
> > > as xYw:
> > > constraints.md:(define_register_constraint "Yw"
> > > mmx.md:  [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
> > > mmx.md:          (match_operand:V4HI 1 "register_mmxmem_operand" "ym,xYw")
> > > mmx.md:  [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
> > > mmx.md:     (match_operand:SI 1 "register_operand" "0,xYw"))))]
> > > Would that be ok?
> >
> > Yes, this is an excellent idea.
>
> Ok, here is the full patch, now including a testcase.
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?
>
> 2021-03-07  Jakub Jelinek  <jakub@redhat.com>
>
>         PR target/99321
>         * config/i386/constraints.md (Yw): Use SSE_REGS if TARGET_SSE
>         but TARGET_AVX512BW or TARGET_AVX512VL is not set.  Adjust description
>         and comment.
>         * config/i386/sse.md (v_Yw): New define_mode_attr.
>         (*<insn><mode>3, *mul<mode>3<mask_name>, *avx2_<code><mode>3,
>         *sse4_1_<code><mode>3<mask_name>): Use <v_Yw> instead of v
>         in constraints.
>         * config/i386/mmx.md (mmx_pshufw_1, *vec_dupv4hi): Use Yw instead of
>         xYw in constraints.
>
>         * lib/target-supports.exp
>         (check_effective_target_assembler_march_noavx512bw): New effective
>         target.
>         * gcc.target/i386/avx512vl-pr99321-1.c: New test.

OK.

Thanks,
Uros.

>
> --- gcc/config/i386/constraints.md.jj   2021-01-04 10:25:45.116162680 +0100
> +++ gcc/config/i386/constraints.md      2021-03-06 13:47:38.950644696 +0100
> @@ -110,7 +110,7 @@ (define_register_constraint "v" "TARGET_
>  ;;  v  any EVEX encodable SSE register for AVX512VL target,
>  ;;     otherwise any SSE register
>  ;;  w  any EVEX encodable SSE register for AVX512BW with TARGET_AVX512VL
> -;;     target.
> +;;     target, otherwise any SSE register.
>
>  (define_register_constraint "Yz" "TARGET_SSE ? SSE_FIRST_REG : NO_REGS"
>   "First SSE register (@code{%xmm0}).")
> @@ -148,8 +148,8 @@ (define_register_constraint "Yv"
>   "@internal For AVX512VL, any EVEX encodable SSE register (@code{%xmm0-%xmm31}), otherwise any SSE register.")
>
>  (define_register_constraint "Yw"
> - "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : NO_REGS"
> - "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target.")
> + "TARGET_AVX512BW && TARGET_AVX512VL ? ALL_SSE_REGS : TARGET_SSE ? SSE_REGS : NO_REGS"
> + "@internal Any EVEX encodable SSE register (@code{%xmm0-%xmm31}) for AVX512BW with TARGET_AVX512VL target, otherwise any SSE register.")
>
>  ;; We use the B prefix to denote any number of internal operands:
>  ;;  f  FLAGS_REG
> --- gcc/config/i386/sse.md.jj   2021-03-05 21:51:33.728349881 +0100
> +++ gcc/config/i386/sse.md      2021-03-06 13:48:13.283261323 +0100
> @@ -560,6 +560,14 @@ (define_mode_attr avx512
>     (V4SF "avx512vl") (V8SF "avx512vl") (V16SF "avx512f")
>     (V2DF "avx512vl") (V4DF "avx512vl") (V8DF "avx512f")])
>
> +(define_mode_attr v_Yw
> +  [(V16QI "Yw") (V32QI "Yw") (V64QI "v")
> +   (V8HI "Yw") (V16HI "Yw") (V32HI "v")
> +   (V4SI "v") (V8SI "v") (V16SI "v")
> +   (V2DI "v") (V4DI "v") (V8DI "v")
> +   (V4SF "v") (V8SF "v") (V16SF "v")
> +   (V2DF "v") (V4DF "v") (V8DF "v")])
> +
>  (define_mode_attr sse2_avx_avx512f
>    [(V16QI "sse2") (V32QI "avx") (V64QI "avx512f")
>     (V8HI  "avx512vl") (V16HI  "avx512vl") (V32HI "avx512bw")
> @@ -11677,10 +11685,10 @@ (define_expand "<insn><mode>3_mask"
>    "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
>
>  (define_insn "*<insn><mode>3"
> -  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
> +  [(set (match_operand:VI_AVX2 0 "register_operand" "=x,<v_Yw>")
>         (plusminus:VI_AVX2
> -         (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,v")
> -         (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,vmBr")))]
> +         (match_operand:VI_AVX2 1 "bcst_vector_operand" "<comm>0,<v_Yw>")
> +         (match_operand:VI_AVX2 2 "bcst_vector_operand" "xBm,<v_Yw>mBr")))]
>    "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
>    "@
>     p<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
> @@ -11790,9 +11798,9 @@ (define_expand "mul<mode>3<mask_name>"
>    "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
>
>  (define_insn "*mul<mode>3<mask_name>"
> -  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,v")
> -       (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,v")
> -                      (match_operand:VI2_AVX2 2 "vector_operand" "xBm,vm")))]
> +  [(set (match_operand:VI2_AVX2 0 "register_operand" "=x,<v_Yw>")
> +       (mult:VI2_AVX2 (match_operand:VI2_AVX2 1 "vector_operand" "%0,<v_Yw>")
> +                      (match_operand:VI2_AVX2 2 "vector_operand" "xBm,<v_Yw>m")))]
>    "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))
>     && <mask_mode512bit_condition> && <mask_avx512bw_condition>"
>    "@
> @@ -12618,10 +12626,10 @@ (define_expand "<code><mode>3"
>    "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
>
>  (define_insn "*avx2_<code><mode>3"
> -  [(set (match_operand:VI124_256 0 "register_operand" "=v")
> +  [(set (match_operand:VI124_256 0 "register_operand" "=<v_Yw>")
>         (maxmin:VI124_256
> -         (match_operand:VI124_256 1 "nonimmediate_operand" "%v")
> -         (match_operand:VI124_256 2 "nonimmediate_operand" "vm")))]
> +         (match_operand:VI124_256 1 "nonimmediate_operand" "%<v_Yw>")
> +         (match_operand:VI124_256 2 "nonimmediate_operand" "<v_Yw>m")))]
>    "TARGET_AVX2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
>    "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
>    [(set_attr "type" "sseiadd")
> @@ -12745,10 +12753,10 @@ (define_expand "<code><mode>3"
>  })
>
>  (define_insn "*sse4_1_<code><mode>3<mask_name>"
> -  [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,v")
> +  [(set (match_operand:VI14_128 0 "register_operand" "=Yr,*x,<v_Yw>")
>         (smaxmin:VI14_128
> -         (match_operand:VI14_128 1 "vector_operand" "%0,0,v")
> -         (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,vm")))]
> +         (match_operand:VI14_128 1 "vector_operand" "%0,0,<v_Yw>")
> +         (match_operand:VI14_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
>    "TARGET_SSE4_1
>     && <mask_mode512bit_condition>
>     && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
> @@ -12830,10 +12838,10 @@ (define_expand "<code><mode>3"
>  })
>
>  (define_insn "*sse4_1_<code><mode>3<mask_name>"
> -  [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,v")
> +  [(set (match_operand:VI24_128 0 "register_operand" "=Yr,*x,<v_Yw>")
>         (umaxmin:VI24_128
> -         (match_operand:VI24_128 1 "vector_operand" "%0,0,v")
> -         (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,vm")))]
> +         (match_operand:VI24_128 1 "vector_operand" "%0,0,<v_Yw>")
> +         (match_operand:VI24_128 2 "vector_operand" "YrBm,*xBm,<v_Yw>m")))]
>    "TARGET_SSE4_1
>     && <mask_mode512bit_condition>
>     && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
> --- gcc/config/i386/mmx.md.jj   2021-02-16 08:57:21.151962030 +0100
> +++ gcc/config/i386/mmx.md      2021-03-06 13:48:47.953874141 +0100
> @@ -2021,9 +2021,9 @@ (define_expand "mmx_pshufw"
>  })
>
>  (define_insn "mmx_pshufw_1"
> -  [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
> +  [(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
>          (vec_select:V4HI
> -          (match_operand:V4HI 1 "register_mmxmem_operand" "ym,xYw")
> +         (match_operand:V4HI 1 "register_mmxmem_operand" "ym,Yw")
>            (parallel [(match_operand 2 "const_0_to_3_operand")
>                       (match_operand 3 "const_0_to_3_operand")
>                       (match_operand 4 "const_0_to_3_operand")
> @@ -2105,10 +2105,10 @@ (define_insn "mmx_pswapdv2si2"
>     (set_attr "mode" "DI,TI")])
>
>  (define_insn "*vec_dupv4hi"
> -  [(set (match_operand:V4HI 0 "register_operand" "=y,xYw")
> +  [(set (match_operand:V4HI 0 "register_operand" "=y,Yw")
>         (vec_duplicate:V4HI
>           (truncate:HI
> -           (match_operand:SI 1 "register_operand" "0,xYw"))))]
> +           (match_operand:SI 1 "register_operand" "0,Yw"))))]
>    "(TARGET_MMX || TARGET_MMX_WITH_SSE)
>     && (TARGET_SSE || TARGET_3DNOW_A)"
>    "@
> --- gcc/testsuite/lib/target-supports.exp.jj    2021-03-02 18:20:07.988674666 +0100
> +++ gcc/testsuite/lib/target-supports.exp       2021-03-06 15:58:01.587771549 +0100
> @@ -8945,6 +8945,16 @@ proc check_effective_target_avx512bw { }
>      } "-mavx512bw" ]
>  }
>
> +# Return 1 if -Wa,-march=+noavx512bw is supported.
> +proc check_effective_target_assembler_march_noavx512bw {} {
> +    if { [istarget i?86*-*-*] || [istarget x86_64*-*-*] } {
> +       return [check_no_compiler_messages assembler_march_noavx512bw object {
> +           void foo (void) {}
> +       } "-mno-avx512bw -Wa,-march=+noavx512bw"]
> +    }
> +    return 0
> +}
> +
>  # Return 1 if avx512vp2intersect instructions can be compiled.
>  proc check_effective_target_avx512vp2intersect { } {
>      return [check_no_compiler_messages avx512vp2intersect object {
> --- gcc/testsuite/gcc.target/i386/avx512vl-pr99321-1.c.jj       2021-03-06 16:00:20.862232850 +0100
> +++ gcc/testsuite/gcc.target/i386/avx512vl-pr99321-1.c  2021-03-06 16:00:03.756421839 +0100
> @@ -0,0 +1,39 @@
> +/* PR target/99321 */
> +/* { dg-do assemble { target lp64 } } */
> +/* { dg-require-effective-target avx512vl } */
> +/* { dg-require-effective-target assembler_march_noavx512bw } */
> +/* { dg-options "-O2 -mavx512vl -mno-avx512bw -Wa,-march=+noavx512bw" } */
> +
> +#include <x86intrin.h>
> +
> +typedef unsigned char V1 __attribute__((vector_size (16)));
> +typedef unsigned char V2 __attribute__((vector_size (32)));
> +typedef unsigned short V3 __attribute__((vector_size (16)));
> +typedef unsigned short V4 __attribute__((vector_size (32)));
> +
> +void f1 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
> +void f2 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
> +void f3 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
> +void f4 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a += b; __asm ("" : : "v" (a)); }
> +void f5 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
> +void f6 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
> +void f7 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
> +void f8 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a -= b; __asm ("" : : "v" (a)); }
> +void f9 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a *= b; __asm ("" : : "v" (a)); }
> +void f10 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a *= b; __asm ("" : : "v" (a)); }
> +void f11 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_min_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> +void f12 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_min_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
> +void f13 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_min_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> +void f14 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_min_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
> +void f15 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_min_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> +void f16 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_min_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
> +void f17 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_min_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> +void f18 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_min_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
> +void f19 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_max_epu8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> +void f20 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_max_epu8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
> +void f21 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_max_epu16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> +void f22 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_max_epu16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
> +void f23 (void) { register V1 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V1) _mm_max_epi8 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> +void f24 (void) { register V2 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V2) _mm256_max_epi8 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
> +void f25 (void) { register V3 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V3) _mm_max_epi16 ((__m128i) a, (__m128i) b); __asm ("" : : "v" (a)); }
> +void f26 (void) { register V4 a __asm ("%xmm16"), b __asm ("%xmm17"); __asm ("" : "=v" (a), "=v" (b)); a = (V4) _mm256_max_epi16 ((__m256i) a, (__m256i) b); __asm ("" : : "v" (a)); }
>
>
>         Jakub
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2021-03-07  9:07 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-03-05 20:50 [PATCH] i386: Fix some -mavx512vl -mno-avx512bw bugs [PR99321] Jakub Jelinek
2021-03-06 10:19 ` Uros Bizjak
2021-03-06 10:34   ` Jakub Jelinek
2021-03-06 12:39     ` Uros Bizjak
2021-03-07  8:40       ` [PATCH] i386, v2: " Jakub Jelinek
2021-03-07  9:07         ` Uros Bizjak

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).